From 179125085bd4ca70e8e028913193a93653bd12f7 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 14 Feb 2017 10:26:03 -0800 Subject: ARM: OMAP3: Fix smartreflex platform data regression Commit d9d9cec02835 ("ARM: OMAP2+: Remove legacy data from hwmod for omap3") dropped platform data that should no longer be used as we're booting with device tree. It turns out that smartreflex is still using platform data and produces the following errors during probe: smartreflex smartreflex.0: invalid resource smartreflex smartreflex.0: omap_sr_probe: ioremap fail smartreflex: probe of smartreflex.0 failed with error -22 smartreflex smartreflex.1: invalid resource smartreflex smartreflex.1: omap_sr_probe: ioremap fail smartreflex: probe of smartreflex.1 failed with error -22 Let's fix the regression by adding back the smartreflex hwmod data. The long term is to update the smartreflex driver to use device tree based probing. Fixes: d9d9cec02835 ("ARM: OMAP2+: Remove legacy data from hwmod for omap3") Reported-by: Adam Ford Tested-by: Adam Ford Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap_hwmod_3xxx_data.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c index 56f917ec8621..507ff0795a8e 100644 --- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c @@ -2112,11 +2112,20 @@ static struct omap_hwmod_ocp_if omap3_l4_core__i2c3 = { }; /* L4 CORE -> SR1 interface */ +static struct omap_hwmod_addr_space omap3_sr1_addr_space[] = { + { + .pa_start = OMAP34XX_SR1_BASE, + .pa_end = OMAP34XX_SR1_BASE + SZ_1K - 1, + .flags = ADDR_TYPE_RT, + }, + { }, +}; static struct omap_hwmod_ocp_if omap34xx_l4_core__sr1 = { .master = &omap3xxx_l4_core_hwmod, .slave = &omap34xx_sr1_hwmod, .clk = "sr_l4_ick", + .addr = omap3_sr1_addr_space, .user = OCP_USER_MPU, }; @@ -2124,15 +2133,25 @@ static struct omap_hwmod_ocp_if omap36xx_l4_core__sr1 = { .master = &omap3xxx_l4_core_hwmod, .slave = &omap36xx_sr1_hwmod, .clk = "sr_l4_ick", + .addr = omap3_sr1_addr_space, .user = OCP_USER_MPU, }; /* L4 CORE -> SR1 interface */ +static struct omap_hwmod_addr_space omap3_sr2_addr_space[] = { + { + .pa_start = OMAP34XX_SR2_BASE, + .pa_end = OMAP34XX_SR2_BASE + SZ_1K - 1, + .flags = ADDR_TYPE_RT, + }, + { }, +}; static struct omap_hwmod_ocp_if omap34xx_l4_core__sr2 = { .master = &omap3xxx_l4_core_hwmod, .slave = &omap34xx_sr2_hwmod, .clk = "sr_l4_ick", + .addr = omap3_sr2_addr_space, .user = OCP_USER_MPU, }; @@ -2140,6 +2159,7 @@ static struct omap_hwmod_ocp_if omap36xx_l4_core__sr2 = { .master = &omap3xxx_l4_core_hwmod, .slave = &omap36xx_sr2_hwmod, .clk = "sr_l4_ick", + .addr = omap3_sr2_addr_space, .user = OCP_USER_MPU, }; -- cgit v1.2.3 From 448c077eeb02240c430db2a2c3bf5285a4c65d66 Mon Sep 17 00:00:00 2001 From: Matthijs van Duin Date: Thu, 16 Feb 2017 01:05:04 +0100 Subject: ARM: OMAP5 / DRA7: Fix HYP mode boot for thumb2 build 'adr' yields a data-pointer, not a function-pointer. Fixes: 999f934de195 ("ARM: omap5/dra7xx: Enable booting secondary CPU in HYP mode") Signed-off-by: Matthijs van Duin Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap-headsmp.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/omap-headsmp.S b/arch/arm/mach-omap2/omap-headsmp.S index fe36ce2734d4..4c6f14cf92a8 100644 --- a/arch/arm/mach-omap2/omap-headsmp.S +++ b/arch/arm/mach-omap2/omap-headsmp.S @@ -17,6 +17,7 @@ #include #include +#include #include "omap44xx.h" @@ -66,7 +67,7 @@ wait_2: ldr r2, =AUX_CORE_BOOT0_PA @ read from AuxCoreBoot0 cmp r0, r4 bne wait_2 ldr r12, =API_HYP_ENTRY - adr r0, hyp_boot + badr r0, hyp_boot smc #0 hyp_boot: b omap_secondary_startup -- cgit v1.2.3 From 72cedf599fcebfd6cd2550274d7855838068d28c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 21 Feb 2017 23:00:46 +0100 Subject: ASoC: mediatek: add I2C dependency for CS42XX8 We should not select drivers that depend on I2C when that is disabled, as it results in a build error: warning: (SND_SOC_MT2701_CS42448) selects SND_SOC_CS42XX8_I2C which has unmet direct dependencies (SOUND && !M68K && !UML && SND && SND_SOC && I2C) sound/soc/codecs/cs42xx8-i2c.c:60:1: warning: data definition has no type or storage class module_i2c_driver(cs42xx8_i2c_driver); sound/soc/codecs/cs42xx8-i2c.c:60:1: error: type defaults to 'int' in declaration of 'module_i2c_driver' [-Werror=implicit-int] Fixes: 1f458d53f76c ("ASoC: mediatek: Add mt2701-cs42448 driver and config option.") Signed-off-by: Arnd Bergmann Signed-off-by: Mark Brown --- sound/soc/mediatek/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/mediatek/Kconfig b/sound/soc/mediatek/Kconfig index 05cf809cf9e1..d7013bde6f45 100644 --- a/sound/soc/mediatek/Kconfig +++ b/sound/soc/mediatek/Kconfig @@ -13,7 +13,7 @@ config SND_SOC_MT2701 config SND_SOC_MT2701_CS42448 tristate "ASoc Audio driver for MT2701 with CS42448 codec" - depends on SND_SOC_MT2701 + depends on SND_SOC_MT2701 && I2C select SND_SOC_CS42XX8_I2C select SND_SOC_BT_SCO help -- cgit v1.2.3 From f5432f01240ef69a391940d623b6a51768aefd65 Mon Sep 17 00:00:00 2001 From: Sekhar Nori Date: Wed, 15 Feb 2017 20:42:52 +0530 Subject: ARM: dts: am57xx-idk: tpic2810 is on I2C bus, not SPI commit 50e95b6b854c ("ARM: dts: am57xx-idk: Add Industrial output support") added the TPIC2810 device-tree node under SPI bus instead of I2C1. Fix it. Tested on AM572x IDK by driving on-board LEDs connected to TPIC2810 Fixes: 50e95b6b854c ("ARM: dts: am57xx-idk: Add Industrial output support") Signed-off-by: Sekhar Nori Acked-by: Andrew F. Davis Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am57xx-idk-common.dtsi | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm/boot/dts/am57xx-idk-common.dtsi b/arch/arm/boot/dts/am57xx-idk-common.dtsi index 814a720d5c3d..d0a55b845690 100644 --- a/arch/arm/boot/dts/am57xx-idk-common.dtsi +++ b/arch/arm/boot/dts/am57xx-idk-common.dtsi @@ -311,6 +311,13 @@ /* ID & VBUS GPIOs provided in board dts */ }; }; + + tpic2810: tpic2810@60 { + compatible = "ti,tpic2810"; + reg = <0x60>; + gpio-controller; + #gpio-cells = <2>; + }; }; &mcspi3 { @@ -326,13 +333,6 @@ spi-max-frequency = <1000000>; spi-cpol; }; - - tpic2810: tpic2810@60 { - compatible = "ti,tpic2810"; - reg = <0x60>; - gpio-controller; - #gpio-cells = <2>; - }; }; &uart3 { -- cgit v1.2.3 From 0341735226dcfa9d3727ff001e030f879ab91ca2 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 17 Feb 2017 06:51:17 -0800 Subject: ARM: omap2plus_defconfig: Enable INPUT_MOUSEDEV as loadable modules Otherwise mice won't be happy. Signed-off-by: Tony Lindgren --- arch/arm/configs/omap2plus_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 195c98b85568..77ffccfd0c3f 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -188,6 +188,7 @@ CONFIG_WL12XX=m CONFIG_WL18XX=m CONFIG_WLCORE_SPI=m CONFIG_WLCORE_SDIO=m +CONFIG_INPUT_MOUSEDEV=m CONFIG_INPUT_JOYDEV=m CONFIG_INPUT_EVDEV=m CONFIG_KEYBOARD_ATKBD=m -- cgit v1.2.3 From 48385896e9c53e1061f103e1271a6be9a7ea00c4 Mon Sep 17 00:00:00 2001 From: Teresa Remmet Date: Fri, 10 Feb 2017 13:29:07 +0100 Subject: ARM: dts: am335x-pcm953: Fix legacy wakeup source binding Replaced the legacy binding "gpio-key,wakeup" with "wakeup-source" as noted in the kernel documentation. Signed-off-by: Teresa Remmet Reported-by: Sudeep Holla Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am335x-pcm-953.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/am335x-pcm-953.dtsi b/arch/arm/boot/dts/am335x-pcm-953.dtsi index 02981eae96b9..1ec8e0d80191 100644 --- a/arch/arm/boot/dts/am335x-pcm-953.dtsi +++ b/arch/arm/boot/dts/am335x-pcm-953.dtsi @@ -63,14 +63,14 @@ label = "home"; linux,code = ; gpios = <&gpio3 7 GPIO_ACTIVE_HIGH>; - gpio-key,wakeup; + wakeup-source; }; button@1 { label = "menu"; linux,code = ; gpios = <&gpio3 8 GPIO_ACTIVE_HIGH>; - gpio-key,wakeup; + wakeup-source; }; }; -- cgit v1.2.3 From 7807e086a2d1f69cc1a57958cac04fea79fc2112 Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Sat, 11 Feb 2017 14:02:49 +0100 Subject: ARM: OMAP2+: gpmc-onenand: propagate error on initialization failure gpmc_probe_onenand_child returns success even on gpmc_onenand_init failure. Fix that. Signed-off-by: Ladislav Michl Acked-by: Roger Quadros Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/gpmc-onenand.c | 10 ++++++---- drivers/memory/omap-gpmc.c | 4 +--- include/linux/omap-gpmc.h | 5 +++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c index 8633c703546a..2944af820558 100644 --- a/arch/arm/mach-omap2/gpmc-onenand.c +++ b/arch/arm/mach-omap2/gpmc-onenand.c @@ -367,7 +367,7 @@ static int gpmc_onenand_setup(void __iomem *onenand_base, int *freq_ptr) return ret; } -void gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data) +int gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data) { int err; struct device *dev = &gpmc_onenand_device.dev; @@ -393,15 +393,17 @@ void gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data) if (err < 0) { dev_err(dev, "Cannot request GPMC CS %d, error %d\n", gpmc_onenand_data->cs, err); - return; + return err; } gpmc_onenand_resource.end = gpmc_onenand_resource.start + ONENAND_IO_SIZE - 1; - if (platform_device_register(&gpmc_onenand_device) < 0) { + err = platform_device_register(&gpmc_onenand_device); + if (err) { dev_err(dev, "Unable to register OneNAND device\n"); gpmc_cs_free(gpmc_onenand_data->cs); - return; } + + return err; } diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c index 5457c361ad58..bf0fe0137dfe 100644 --- a/drivers/memory/omap-gpmc.c +++ b/drivers/memory/omap-gpmc.c @@ -1947,9 +1947,7 @@ static int gpmc_probe_onenand_child(struct platform_device *pdev, if (!of_property_read_u32(child, "dma-channel", &val)) gpmc_onenand_data->dma_channel = val; - gpmc_onenand_init(gpmc_onenand_data); - - return 0; + return gpmc_onenand_init(gpmc_onenand_data); } #else static int gpmc_probe_onenand_child(struct platform_device *pdev, diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h index 35d0fd7a4948..e821a3132a3e 100644 --- a/include/linux/omap-gpmc.h +++ b/include/linux/omap-gpmc.h @@ -88,10 +88,11 @@ static inline int gpmc_nand_init(struct omap_nand_platform_data *d, #endif #if IS_ENABLED(CONFIG_MTD_ONENAND_OMAP2) -extern void gpmc_onenand_init(struct omap_onenand_platform_data *d); +extern int gpmc_onenand_init(struct omap_onenand_platform_data *d); #else #define board_onenand_data NULL -static inline void gpmc_onenand_init(struct omap_onenand_platform_data *d) +static inline int gpmc_onenand_init(struct omap_onenand_platform_data *d) { + return 0; } #endif -- cgit v1.2.3 From ac28e47ccc3ff8dabce1aec6b224760c3e524044 Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Tue, 21 Feb 2017 10:44:45 +0100 Subject: ARM: OMAP2+: Remove legacy gpmc-nand.c This code is no longer used and can be removed as we are using device tree. Removing this code also removes a dependency between drivers/mtd and arch/arm/mach-omap2 making furhter driver changes easier. Signed-off-by: Ladislav Michl [tony@atomide.com: removed from header too, updated comments] Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/Makefile | 3 - arch/arm/mach-omap2/gpmc-nand.c | 154 ---------------------------------------- include/linux/omap-gpmc.h | 11 --- 3 files changed, 168 deletions(-) delete mode 100644 arch/arm/mach-omap2/gpmc-nand.c diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index 093458b62c8d..c89757abb0ae 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -241,6 +241,3 @@ obj-$(CONFIG_MACH_OMAP2_TUSB6010) += usb-tusb6010.o onenand-$(CONFIG_MTD_ONENAND_OMAP2) := gpmc-onenand.o obj-y += $(onenand-m) $(onenand-y) - -nand-$(CONFIG_MTD_NAND_OMAP2) := gpmc-nand.o -obj-y += $(nand-m) $(nand-y) diff --git a/arch/arm/mach-omap2/gpmc-nand.c b/arch/arm/mach-omap2/gpmc-nand.c deleted file mode 100644 index f6ac027f3c3b..000000000000 --- a/arch/arm/mach-omap2/gpmc-nand.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * gpmc-nand.c - * - * Copyright (C) 2009 Texas Instruments - * Vimal Singh - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include - -#include - -#include "soc.h" - -/* minimum size for IO mapping */ -#define NAND_IO_SIZE 4 - -static bool gpmc_hwecc_bch_capable(enum omap_ecc ecc_opt) -{ - /* platforms which support all ECC schemes */ - if (soc_is_am33xx() || soc_is_am43xx() || cpu_is_omap44xx() || - soc_is_omap54xx() || soc_is_dra7xx()) - return 1; - - if (ecc_opt == OMAP_ECC_BCH4_CODE_HW_DETECTION_SW || - ecc_opt == OMAP_ECC_BCH8_CODE_HW_DETECTION_SW) { - if (cpu_is_omap24xx()) - return 0; - else if (cpu_is_omap3630() && (GET_OMAP_REVISION() == 0)) - return 0; - else - return 1; - } - - /* OMAP3xxx do not have ELM engine, so cannot support ECC schemes - * which require H/W based ECC error detection */ - if ((cpu_is_omap34xx() || cpu_is_omap3630()) && - ((ecc_opt == OMAP_ECC_BCH4_CODE_HW) || - (ecc_opt == OMAP_ECC_BCH8_CODE_HW))) - return 0; - - /* legacy platforms support only HAM1 (1-bit Hamming) ECC scheme */ - if (ecc_opt == OMAP_ECC_HAM1_CODE_HW || - ecc_opt == OMAP_ECC_HAM1_CODE_SW) - return 1; - else - return 0; -} - -/* This function will go away once the device-tree convertion is complete */ -static void gpmc_set_legacy(struct omap_nand_platform_data *gpmc_nand_data, - struct gpmc_settings *s) -{ - /* Enable RD PIN Monitoring Reg */ - if (gpmc_nand_data->dev_ready) { - s->wait_on_read = true; - s->wait_on_write = true; - } - - if (gpmc_nand_data->devsize == NAND_BUSWIDTH_16) - s->device_width = GPMC_DEVWIDTH_16BIT; - else - s->device_width = GPMC_DEVWIDTH_8BIT; -} - -int gpmc_nand_init(struct omap_nand_platform_data *gpmc_nand_data, - struct gpmc_timings *gpmc_t) -{ - int err = 0; - struct gpmc_settings s; - struct platform_device *pdev; - struct resource gpmc_nand_res[] = { - { .flags = IORESOURCE_MEM, }, - { .flags = IORESOURCE_IRQ, }, - { .flags = IORESOURCE_IRQ, }, - }; - - BUG_ON(gpmc_nand_data->cs >= GPMC_CS_NUM); - - err = gpmc_cs_request(gpmc_nand_data->cs, NAND_IO_SIZE, - (unsigned long *)&gpmc_nand_res[0].start); - if (err < 0) { - pr_err("omap2-gpmc: Cannot request GPMC CS %d, error %d\n", - gpmc_nand_data->cs, err); - return err; - } - gpmc_nand_res[0].end = gpmc_nand_res[0].start + NAND_IO_SIZE - 1; - gpmc_nand_res[1].start = gpmc_get_client_irq(GPMC_IRQ_FIFOEVENTENABLE); - gpmc_nand_res[2].start = gpmc_get_client_irq(GPMC_IRQ_COUNT_EVENT); - - memset(&s, 0, sizeof(struct gpmc_settings)); - gpmc_set_legacy(gpmc_nand_data, &s); - - s.device_nand = true; - - if (gpmc_t) { - err = gpmc_cs_set_timings(gpmc_nand_data->cs, gpmc_t, &s); - if (err < 0) { - pr_err("omap2-gpmc: Unable to set gpmc timings: %d\n", - err); - return err; - } - } - - err = gpmc_cs_program_settings(gpmc_nand_data->cs, &s); - if (err < 0) - goto out_free_cs; - - err = gpmc_configure(GPMC_CONFIG_WP, 0); - if (err < 0) - goto out_free_cs; - - if (!gpmc_hwecc_bch_capable(gpmc_nand_data->ecc_opt)) { - pr_err("omap2-nand: Unsupported NAND ECC scheme selected\n"); - err = -EINVAL; - goto out_free_cs; - } - - - pdev = platform_device_alloc("omap2-nand", gpmc_nand_data->cs); - if (pdev) { - err = platform_device_add_resources(pdev, gpmc_nand_res, - ARRAY_SIZE(gpmc_nand_res)); - if (!err) - pdev->dev.platform_data = gpmc_nand_data; - } else { - err = -ENOMEM; - } - if (err) - goto out_free_pdev; - - err = platform_device_add(pdev); - if (err) { - dev_err(&pdev->dev, "Unable to register NAND device\n"); - goto out_free_pdev; - } - - return 0; - -out_free_pdev: - platform_device_put(pdev); -out_free_cs: - gpmc_cs_free(gpmc_nand_data->cs); - - return err; -} diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h index e821a3132a3e..fd0de00c0d77 100644 --- a/include/linux/omap-gpmc.h +++ b/include/linux/omap-gpmc.h @@ -76,17 +76,6 @@ struct gpmc_timings; struct omap_nand_platform_data; struct omap_onenand_platform_data; -#if IS_ENABLED(CONFIG_MTD_NAND_OMAP2) -extern int gpmc_nand_init(struct omap_nand_platform_data *d, - struct gpmc_timings *gpmc_t); -#else -static inline int gpmc_nand_init(struct omap_nand_platform_data *d, - struct gpmc_timings *gpmc_t) -{ - return 0; -} -#endif - #if IS_ENABLED(CONFIG_MTD_ONENAND_OMAP2) extern int gpmc_onenand_init(struct omap_onenand_platform_data *d); #else -- cgit v1.2.3 From 10e5778f54765c96fe0c8f104b7a030e5b35bc72 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 4 Mar 2017 07:02:10 -0800 Subject: ARM: OMAP2+: Fix device node reference counts After commit 0549bde0fcb1 ("of: fix of_node leak caused in of_find_node_opts_by_path"), the following error may be reported when running omap images. OF: ERROR: Bad of_node_put() on /ocp@68000000 CPU: 0 PID: 0 Comm: swapper Not tainted 4.10.0-rc7-next-20170210 #1 Hardware name: Generic OMAP3-GP (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x98/0xac) [] (dump_stack) from [] (kobject_release+0x48/0x7c) [] (kobject_release) from [] (of_find_node_by_name+0x74/0x94) [] (of_find_node_by_name) from [] (omap3xxx_hwmod_is_hs_ip_block_usable+0x24/0x2c) [] (omap3xxx_hwmod_is_hs_ip_block_usable) from [] (omap3xxx_hwmod_init+0x180/0x274) [] (omap3xxx_hwmod_init) from [] (omap3_init_early+0xa0/0x11c) [] (omap3_init_early) from [] (omap3430_init_early+0x8/0x30) [] (omap3430_init_early) from [] (setup_arch+0xc04/0xc34) [] (setup_arch) from [] (start_kernel+0x68/0x38c) [] (start_kernel) from [<8020807c>] (0x8020807c) of_find_node_by_name() drops the reference to the passed device node. The commit referenced above exposes this problem. To fix the problem, use of_get_child_by_name() instead of of_find_node_by_name(); of_get_child_by_name() does not drop the reference count of passed device nodes. While semantically different, we only look for immediate children of the passed device node, so of_get_child_by_name() is a more appropriate function to use anyway. Release the reference to the device node obtained with of_get_child_by_name() after it is no longer needed to avoid another device node leak. While at it, clean up the code and change the return type of omap3xxx_hwmod_is_hs_ip_block_usable() to bool to match its use and the return type of of_device_is_available(). Cc: Qi Hou Cc: Peter Rosin Cc: Rob Herring Signed-off-by: Guenter Roeck Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap_hwmod_3xxx_data.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c index 507ff0795a8e..0fa08c1e4701 100644 --- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c @@ -3131,16 +3131,20 @@ static struct omap_hwmod_ocp_if *omap3xxx_dss_hwmod_ocp_ifs[] __initdata = { * Return: 0 if device named @dev_name is not likely to be accessible, * or 1 if it is likely to be accessible. */ -static int __init omap3xxx_hwmod_is_hs_ip_block_usable(struct device_node *bus, - const char *dev_name) +static bool __init omap3xxx_hwmod_is_hs_ip_block_usable(struct device_node *bus, + const char *dev_name) { + struct device_node *node; + bool available; + if (!bus) - return (omap_type() == OMAP2_DEVICE_TYPE_GP) ? 1 : 0; + return omap_type() == OMAP2_DEVICE_TYPE_GP; - if (of_device_is_available(of_find_node_by_name(bus, dev_name))) - return 1; + node = of_get_child_by_name(bus, dev_name); + available = of_device_is_available(node); + of_node_put(node); - return 0; + return available; } int __init omap3xxx_hwmod_init(void) -- cgit v1.2.3 From b92675d998a9fa37fe9e0e35053a95b4a23c158b Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 4 Mar 2017 07:02:11 -0800 Subject: ARM: OMAP2+: Release device node after it is no longer needed. The device node returned by of_find_node_by_name() needs to be released after it is no longer needed to avoid a device node leak. Signed-off-by: Guenter Roeck Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap_hwmod_3xxx_data.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c index 0fa08c1e4701..1435fee39a89 100644 --- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c @@ -3213,15 +3213,20 @@ int __init omap3xxx_hwmod_init(void) if (h_sham && omap3xxx_hwmod_is_hs_ip_block_usable(bus, "sham")) { r = omap_hwmod_register_links(h_sham); - if (r < 0) + if (r < 0) { + of_node_put(bus); return r; + } } if (h_aes && omap3xxx_hwmod_is_hs_ip_block_usable(bus, "aes")) { r = omap_hwmod_register_links(h_aes); - if (r < 0) + if (r < 0) { + of_node_put(bus); return r; + } } + of_node_put(bus); /* * Register hwmod links specific to certain ES levels of a -- cgit v1.2.3 From 1c2bcc766be44467809f1798cd4ceacafe20a852 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 22 Feb 2017 17:25:42 +0100 Subject: batman-adv: Keep fragments equally sized MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The batman-adv fragmentation packets have the design problem that they cannot be refragmented and cannot handle padding by the underlying link. The latter often leads to problems when networks are incorrectly configured and don't use a common MTU. The sender could for example fragment a 1271 byte frame (plus external ethernet header (14) and batadv unicast header (10)) to fit in a 1280 bytes large MTU of the underlying link (max. 1294 byte frames). This would create a 1294 bytes large frame (fragment 2) and a 55 bytes large frame (fragment 1). The extra 54 bytes are the fragment header (20) added to each fragment and the external ethernet header (14) for the second fragment. Let us assume that the next hop is then not able to transport 1294 bytes to its next hop. The 1294 byte large frame will be dropped but the 55 bytes large fragment will still be forwarded to its destination. Or let us assume that the underlying hardware requires that each frame has a minimum size (e.g. 60 bytes). Then it will pad the 55 bytes frame to 60 bytes. The receiver of the 60 bytes frame will no longer be able to correctly assemble the two frames together because it is not aware that 5 bytes of the 60 bytes frame are padding and don't belong to the reassembled frame. This can partly be avoided by splitting frames more equally. In this example, the 675 and 674 bytes large fragment frames could both potentially reach its destination without being too large or too small. Reported-by: Martin Weinelt Fixes: ee75ed88879a ("batman-adv: Fragment and send skbs larger than mtu") Signed-off-by: Sven Eckelmann Acked-by: Linus Lüssing Signed-off-by: Simon Wunderlich --- net/batman-adv/fragmentation.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 11149e5be4e0..106bda56ec98 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -404,7 +404,7 @@ out: * batadv_frag_create - create a fragment from skb * @skb: skb to create fragment from * @frag_head: header to use in new fragment - * @mtu: size of new fragment + * @fragment_size: size of new fragment * * Split the passed skb into two fragments: A new one with size matching the * passed mtu and the old one with the rest. The new skb contains data from the @@ -414,11 +414,11 @@ out: */ static struct sk_buff *batadv_frag_create(struct sk_buff *skb, struct batadv_frag_packet *frag_head, - unsigned int mtu) + unsigned int fragment_size) { struct sk_buff *skb_fragment; unsigned int header_size = sizeof(*frag_head); - unsigned int fragment_size = mtu - header_size; + unsigned int mtu = fragment_size + header_size; skb_fragment = netdev_alloc_skb(NULL, mtu + ETH_HLEN); if (!skb_fragment) @@ -456,7 +456,7 @@ int batadv_frag_send_packet(struct sk_buff *skb, struct sk_buff *skb_fragment; unsigned int mtu = neigh_node->if_incoming->net_dev->mtu; unsigned int header_size = sizeof(frag_header); - unsigned int max_fragment_size, max_packet_size; + unsigned int max_fragment_size, num_fragments; int ret; /* To avoid merge and refragmentation at next-hops we never send @@ -464,10 +464,15 @@ int batadv_frag_send_packet(struct sk_buff *skb, */ mtu = min_t(unsigned int, mtu, BATADV_FRAG_MAX_FRAG_SIZE); max_fragment_size = mtu - header_size; - max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS; + + if (skb->len == 0 || max_fragment_size == 0) + return -EINVAL; + + num_fragments = (skb->len - 1) / max_fragment_size + 1; + max_fragment_size = (skb->len - 1) / num_fragments + 1; /* Don't even try to fragment, if we need more than 16 fragments */ - if (skb->len > max_packet_size) { + if (num_fragments > BATADV_FRAG_MAX_FRAGMENTS) { ret = -EAGAIN; goto free_skb; } @@ -507,7 +512,8 @@ int batadv_frag_send_packet(struct sk_buff *skb, goto put_primary_if; } - skb_fragment = batadv_frag_create(skb, &frag_header, mtu); + skb_fragment = batadv_frag_create(skb, &frag_header, + max_fragment_size); if (!skb_fragment) { ret = -ENOMEM; goto put_primary_if; -- cgit v1.2.3 From 1a9070ec91b37234fe915849b767c61584c64a44 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 4 Mar 2017 15:48:50 +0100 Subject: batman-adv: Initialize gw sel_class via batadv_algo The gateway selection class variable is shared between different algorithm versions. But the interpretation of the content is algorithm specific. The initialization is therefore also algorithm specific. But this was implemented incorrectly and the initialization for BATMAN_V always overwrote the value previously written for BATMAN_IV. This could only be avoided when BATMAN_V was disabled during compile time. Using a special batadv_algo hook for this initialization avoids this problem. Fixes: 50164d8f500f ("batman-adv: B.A.T.M.A.N. V - implement GW selection logic") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_iv_ogm.c | 11 +++++++++++ net/batman-adv/bat_v.c | 14 +++++++++++--- net/batman-adv/gateway_common.c | 5 +++++ net/batman-adv/soft-interface.c | 1 - net/batman-adv/types.h | 2 ++ 5 files changed, 29 insertions(+), 4 deletions(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index f00f666e2ccd..7bfd0d7ef49d 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -2477,6 +2477,16 @@ static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface) batadv_iv_ogm_schedule(hard_iface); } +/** + * batadv_iv_init_sel_class - initialize GW selection class + * @bat_priv: the bat priv with all the soft interface information + */ +static void batadv_iv_init_sel_class(struct batadv_priv *bat_priv) +{ + /* set default TQ difference threshold to 20 */ + atomic_set(&bat_priv->gw.sel_class, 20); +} + static struct batadv_gw_node * batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv) { @@ -2823,6 +2833,7 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = { .del_if = batadv_iv_ogm_orig_del_if, }, .gw = { + .init_sel_class = batadv_iv_init_sel_class, .get_best_gw_node = batadv_iv_gw_get_best_gw_node, .is_eligible = batadv_iv_gw_is_eligible, #ifdef CONFIG_BATMAN_ADV_DEBUGFS diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 2ac612d7bab4..2e2471ca84e3 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -668,6 +668,16 @@ err_ifinfo1: return ret; } +/** + * batadv_v_init_sel_class - initialize GW selection class + * @bat_priv: the bat priv with all the soft interface information + */ +static void batadv_v_init_sel_class(struct batadv_priv *bat_priv) +{ + /* set default throughput difference threshold to 5Mbps */ + atomic_set(&bat_priv->gw.sel_class, 50); +} + static ssize_t batadv_v_store_sel_class(struct batadv_priv *bat_priv, char *buff, size_t count) { @@ -1052,6 +1062,7 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = { .dump = batadv_v_orig_dump, }, .gw = { + .init_sel_class = batadv_v_init_sel_class, .store_sel_class = batadv_v_store_sel_class, .show_sel_class = batadv_v_show_sel_class, .get_best_gw_node = batadv_v_gw_get_best_gw_node, @@ -1092,9 +1103,6 @@ int batadv_v_mesh_init(struct batadv_priv *bat_priv) if (ret < 0) return ret; - /* set default throughput difference threshold to 5Mbps */ - atomic_set(&bat_priv->gw.sel_class, 50); - return 0; } diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index 21184810d89f..3e3f91ab694f 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -253,6 +253,11 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, */ void batadv_gw_init(struct batadv_priv *bat_priv) { + if (bat_priv->algo_ops->gw.init_sel_class) + bat_priv->algo_ops->gw.init_sel_class(bat_priv); + else + atomic_set(&bat_priv->gw.sel_class, 1); + batadv_tvlv_handler_register(bat_priv, batadv_gw_tvlv_ogm_handler_v1, NULL, BATADV_TVLV_GW, 1, BATADV_TVLV_HANDLER_OGM_CIFNOTFND); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 7b3494ae6ad9..2e0b3463ab4a 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -820,7 +820,6 @@ static int batadv_softif_init_late(struct net_device *dev) atomic_set(&bat_priv->mcast.num_want_all_ipv6, 0); #endif atomic_set(&bat_priv->gw.mode, BATADV_GW_MODE_OFF); - atomic_set(&bat_priv->gw.sel_class, 20); atomic_set(&bat_priv->gw.bandwidth_down, 100); atomic_set(&bat_priv->gw.bandwidth_up, 20); atomic_set(&bat_priv->orig_interval, 1000); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index e913aee28c98..5137d859694c 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1489,6 +1489,7 @@ struct batadv_algo_orig_ops { /** * struct batadv_algo_gw_ops - mesh algorithm callbacks (GW specific) + * @init_sel_class: initialize GW selection class (optional) * @store_sel_class: parse and stores a new GW selection class (optional) * @show_sel_class: prints the current GW selection class (optional) * @get_best_gw_node: select the best GW from the list of available nodes @@ -1499,6 +1500,7 @@ struct batadv_algo_orig_ops { * @dump: dump gateways to a netlink socket (optional) */ struct batadv_algo_gw_ops { + void (*init_sel_class)(struct batadv_priv *bat_priv); ssize_t (*store_sel_class)(struct batadv_priv *bat_priv, char *buff, size_t count); ssize_t (*show_sel_class)(struct batadv_priv *bat_priv, char *buff); -- cgit v1.2.3 From 3bec247474469f769af41e8c80d3a100dd97dd76 Mon Sep 17 00:00:00 2001 From: Song Hongyan Date: Wed, 22 Feb 2017 17:17:38 +0800 Subject: iio: hid-sensor-trigger: Change get poll value function order to avoid sensor properties losing after resume from S3 In function _hid_sensor_power_state(), when hid_sensor_read_poll_value() is called, sensor's all properties will be updated by the value from sensor hardware/firmware. In some implementation, sensor hardware/firmware will do a power cycle during S3. In this case, after resume, once hid_sensor_read_poll_value() is called, sensor's all properties which are kept by driver during S3 will be changed to default value. But instead, if a set feature function is called first, sensor hardware/firmware will be recovered to the last status. So change the sensor_hub_set_feature() calling order to behind of set feature function to avoid sensor properties lose. Signed-off-by: Song Hongyan Acked-by: Srinivas Pandruvada Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/common/hid-sensors/hid-sensor-trigger.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c index a3cce3a38300..ecf592d69043 100644 --- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c +++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c @@ -51,8 +51,6 @@ static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state) st->report_state.report_id, st->report_state.index, HID_USAGE_SENSOR_PROP_REPORTING_STATE_ALL_EVENTS_ENUM); - - poll_value = hid_sensor_read_poll_value(st); } else { int val; @@ -89,7 +87,9 @@ static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state) sensor_hub_get_feature(st->hsdev, st->power_state.report_id, st->power_state.index, sizeof(state_val), &state_val); - if (state && poll_value) + if (state) + poll_value = hid_sensor_read_poll_value(st); + if (poll_value > 0) msleep_interruptible(poll_value * 2); return 0; -- cgit v1.2.3 From 3ff861f59f6c1f5bf2bc03d2cd36ac3f992cbc06 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 1 Mar 2017 15:37:57 -0800 Subject: iio: magnetometer: ak8974: remove incorrect __exit markups Even if bus is not hot-pluggable, devices can be unbound from the driver via sysfs, so we should not be using __exit annotations on remove() methods. The only exception is drivers registered with platform_driver_probe() which specifically disables sysfs bind/unbind attributes. Signed-off-by: Dmitry Torokhov Reviewed-by: Linus Walleij Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/magnetometer/ak8974.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/magnetometer/ak8974.c b/drivers/iio/magnetometer/ak8974.c index 6dd8cbd7ce95..e13370dc9b1c 100644 --- a/drivers/iio/magnetometer/ak8974.c +++ b/drivers/iio/magnetometer/ak8974.c @@ -763,7 +763,7 @@ power_off: return ret; } -static int __exit ak8974_remove(struct i2c_client *i2c) +static int ak8974_remove(struct i2c_client *i2c) { struct iio_dev *indio_dev = i2c_get_clientdata(i2c); struct ak8974 *ak8974 = iio_priv(indio_dev); @@ -845,7 +845,7 @@ static struct i2c_driver ak8974_driver = { .of_match_table = of_match_ptr(ak8974_of_match), }, .probe = ak8974_probe, - .remove = __exit_p(ak8974_remove), + .remove = ak8974_remove, .id_table = ak8974_id, }; module_i2c_driver(ak8974_driver); -- cgit v1.2.3 From 9e10889a3177340dcda7d29c6d8fbd97247b007b Mon Sep 17 00:00:00 2001 From: Romain Izard Date: Fri, 17 Feb 2017 16:12:50 +0100 Subject: Revert "ARM: at91/dt: sama5d2: Use new compatible for ohci node" This reverts commit cab43282682e ("ARM: at91/dt: sama5d2: Use new compatible for ohci node") It depends from commit 7150bc9b4d43 ("usb: ohci-at91: Forcibly suspend ports while USB suspend") which was reverted and implemented differently. With the new implementation, the compatible string must remain the same. The compatible string introduced by this commit has been used in the default SAMA5D2 dtsi starting from Linux 4.8. As it has never been working correctly in an official release, removing it should not be breaking the stability rules. Fixes: cab43282682e ("ARM: at91/dt: sama5d2: Use new compatible for ohci node") Signed-off-by: Romain Izard cc: Signed-off-by: Alexandre Belloni --- arch/arm/boot/dts/sama5d2.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index 22332be72140..528b4e9c6d3d 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -266,7 +266,7 @@ }; usb1: ohci@00400000 { - compatible = "atmel,sama5d2-ohci", "usb-ohci"; + compatible = "atmel,at91rm9200-ohci", "usb-ohci"; reg = <0x00400000 0x100000>; interrupts = <41 IRQ_TYPE_LEVEL_HIGH 2>; clocks = <&uhphs_clk>, <&uhphs_clk>, <&uhpck>; -- cgit v1.2.3 From e51b999e11b84a766d78347afe9287e2c5b91c97 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Wed, 8 Feb 2017 15:37:11 -0500 Subject: ARM: dts: BCM5301X: Fix UARTs on bcm953012k The UARTs are outputting garbage on the console. This is due to a speed issue. We can simply use the clock speed (which is now defined in the DTSI file) and everything works fine. Signed-off-by: Jon Mason Fixes: cdc36b22 ("ARM: dts: enable clock support for BCM5301X") Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/bcm953012k.dts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/bcm953012k.dts b/arch/arm/boot/dts/bcm953012k.dts index bfd923096a8c..da5aa8f5ffa7 100644 --- a/arch/arm/boot/dts/bcm953012k.dts +++ b/arch/arm/boot/dts/bcm953012k.dts @@ -53,10 +53,9 @@ }; &uart0 { - clock-frequency = <62499840>; + status = "okay"; }; &uart1 { - clock-frequency = <62499840>; status = "okay"; }; -- cgit v1.2.3 From 88d1fa70c21d7b431386cfe70cdc514d98b0c9c4 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Wed, 8 Feb 2017 15:37:12 -0500 Subject: ARM: dts: BCM5301X: Fix memory start address Memory starts at 0x80000000, not 0. 0 "works" due to mirrior of the first 128M of RAM to that address. Anything greater than 128M will quickly find nothing there. Correcting the starting address has everything working again. Signed-off-by: Jon Mason Fixes: 7eb05f6d ("ARM: dts: bcm5301x: Add BCM SVK DT files") Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/bcm953012k.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/bcm953012k.dts b/arch/arm/boot/dts/bcm953012k.dts index da5aa8f5ffa7..ae31a5826e91 100644 --- a/arch/arm/boot/dts/bcm953012k.dts +++ b/arch/arm/boot/dts/bcm953012k.dts @@ -48,7 +48,7 @@ }; memory { - reg = <0x00000000 0x10000000>; + reg = <0x80000000 0x10000000>; }; }; -- cgit v1.2.3 From 0c2bf9f95983fe30aa2f6463cb761cd42c2d521a Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Thu, 2 Mar 2017 19:21:32 -0500 Subject: ARM: dts: BCM5301X: Correct GIC_PPI interrupt flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GIC_PPI flags were misconfigured for the timers, resulting in errors like: [ 0.000000] GIC: PPI11 is secure or misconfigured Changing them to being edge triggered corrects the issue Suggested-by: Rafał Miłecki Signed-off-by: Jon Mason Fixes: d27509f1 ("ARM: BCM5301X: add dts files for BCM4708 SoC") Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/bcm5301x.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi index 4fbb089cf5ad..00de62dc0042 100644 --- a/arch/arm/boot/dts/bcm5301x.dtsi +++ b/arch/arm/boot/dts/bcm5301x.dtsi @@ -66,14 +66,14 @@ timer@20200 { compatible = "arm,cortex-a9-global-timer"; reg = <0x20200 0x100>; - interrupts = ; + interrupts = ; clocks = <&periph_clk>; }; local-timer@20600 { compatible = "arm,cortex-a9-twd-timer"; reg = <0x20600 0x100>; - interrupts = ; + interrupts = ; clocks = <&periph_clk>; }; -- cgit v1.2.3 From 2201ac6129fa162ac24da089a034bb0971648ebb Mon Sep 17 00:00:00 2001 From: Matthias Reichl Date: Mon, 20 Feb 2017 20:01:16 +0100 Subject: dmaengine: bcm2835: Fix cyclic DMA period splitting The code responsible for splitting periods into chunks that can be handled by the DMA controller missed to update total_len, the number of bytes processed in the current period, when there are more chunks to follow. Therefore total_len was stuck at 0 and the code didn't work at all. This resulted in a wrong control block layout and audio issues because the cyclic DMA callback wasn't executing on period boundaries. Fix this by adding the missing total_len update. Signed-off-by: Matthias Reichl Signed-off-by: Martin Sperl Tested-by: Clive Messer Reviewed-by: Eric Anholt Signed-off-by: Vinod Koul --- drivers/dma/bcm2835-dma.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index e18dc596cf24..6204cc32d09c 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -251,8 +251,11 @@ static void bcm2835_dma_create_cb_set_length( */ /* have we filled in period_length yet? */ - if (*total_len + control_block->length < period_len) + if (*total_len + control_block->length < period_len) { + /* update number of bytes in this period so far */ + *total_len += control_block->length; return; + } /* calculate the length that remains to reach period_length */ control_block->length = period_len - *total_len; -- cgit v1.2.3 From 854c11e250730eaffec2ad56e9224c6be8a7979d Mon Sep 17 00:00:00 2001 From: Vlad Zakharov Date: Fri, 3 Mar 2017 14:30:00 +0300 Subject: ARC: [dts] add input clocks for cpu nodes ARC CPU cores are driven by core_clk so we add corresponding "clocks" property to ARC cpu nodes. Signed-off-by: Vlad Zakharov Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/skeleton.dtsi | 1 + arch/arc/boot/dts/skeleton_hs.dtsi | 1 + arch/arc/boot/dts/skeleton_hs_idu.dtsi | 1 + 3 files changed, 3 insertions(+) diff --git a/arch/arc/boot/dts/skeleton.dtsi b/arch/arc/boot/dts/skeleton.dtsi index 65808fe0a290..2891cb266cf0 100644 --- a/arch/arc/boot/dts/skeleton.dtsi +++ b/arch/arc/boot/dts/skeleton.dtsi @@ -26,6 +26,7 @@ device_type = "cpu"; compatible = "snps,arc770d"; reg = <0>; + clocks = <&core_clk>; }; }; diff --git a/arch/arc/boot/dts/skeleton_hs.dtsi b/arch/arc/boot/dts/skeleton_hs.dtsi index 2dfe8037dfbb..5e944d3e5b74 100644 --- a/arch/arc/boot/dts/skeleton_hs.dtsi +++ b/arch/arc/boot/dts/skeleton_hs.dtsi @@ -21,6 +21,7 @@ device_type = "cpu"; compatible = "snps,archs38"; reg = <0>; + clocks = <&core_clk>; }; }; diff --git a/arch/arc/boot/dts/skeleton_hs_idu.dtsi b/arch/arc/boot/dts/skeleton_hs_idu.dtsi index 4c11079f3565..662c5e020693 100644 --- a/arch/arc/boot/dts/skeleton_hs_idu.dtsi +++ b/arch/arc/boot/dts/skeleton_hs_idu.dtsi @@ -21,6 +21,7 @@ device_type = "cpu"; compatible = "snps,archs38xN"; reg = <0>; + clocks = <&core_clk>; }; }; -- cgit v1.2.3 From 4ed10958ae461168be310b4bbee13f745e4c1547 Mon Sep 17 00:00:00 2001 From: Vlad Zakharov Date: Fri, 3 Mar 2017 14:30:01 +0300 Subject: ARC: [dts] add cpu nodes to ARCHS SMP device tree Trying to get clock for CPU cores on SMP systems I found that I was only able to get clock for core[0]. That was because only one cpu@0 node was represented in ARC HS device tree and it was impossible to get clock for "non-existing" cores. So as ARC HS may have up to 4 cores we update device tree to match maximum possible cores quantity. Signed-off-by: Vlad Zakharov Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/skeleton_hs_idu.dtsi | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/arc/boot/dts/skeleton_hs_idu.dtsi b/arch/arc/boot/dts/skeleton_hs_idu.dtsi index 662c5e020693..54b277d7dea0 100644 --- a/arch/arc/boot/dts/skeleton_hs_idu.dtsi +++ b/arch/arc/boot/dts/skeleton_hs_idu.dtsi @@ -19,10 +19,28 @@ cpu@0 { device_type = "cpu"; - compatible = "snps,archs38xN"; + compatible = "snps,archs38"; reg = <0>; clocks = <&core_clk>; }; + cpu@1 { + device_type = "cpu"; + compatible = "snps,archs38"; + reg = <1>; + clocks = <&core_clk>; + }; + cpu@2 { + device_type = "cpu"; + compatible = "snps,archs38"; + reg = <2>; + clocks = <&core_clk>; + }; + cpu@3 { + device_type = "cpu"; + compatible = "snps,archs38"; + reg = <3>; + clocks = <&core_clk>; + }; }; /* TIMER0 with interrupt for clockevent */ -- cgit v1.2.3 From 7f35144cea219104fe42e7c6cd0ee5103016da2e Mon Sep 17 00:00:00 2001 From: Vlad Zakharov Date: Fri, 3 Mar 2017 14:30:02 +0300 Subject: ARC: get rate from clk driver instead of reading device tree We were reading clock rate directly from device tree "clock-frequency" property of corresponding clock node in show_cpuinfo function. Such approach is correct only in case cpu is always clocked by "fixed-clock". If we use clock driver that allows rate to be changed this won't work as rate may change during the time or even "clock-frequency" property may not be presented at all. So this commit replaces reading device tree with getting rate from clock driver. This approach is much more flexible and will work for both fixed and mutable clocks. Signed-off-by: Vlad Zakharov Signed-off-by: Vineet Gupta --- arch/arc/kernel/setup.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 3093fa898a23..fa62404ba58f 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -488,8 +489,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) { char *str; int cpu_id = ptr_to_cpu(v); - struct device_node *core_clk = of_find_node_by_name(NULL, "core_clk"); - u32 freq = 0; + struct device *cpu_dev = get_cpu_device(cpu_id); + struct clk *cpu_clk; + unsigned long freq = 0; if (!cpu_online(cpu_id)) { seq_printf(m, "processor [%d]\t: Offline\n", cpu_id); @@ -502,9 +504,15 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, arc_cpu_mumbojumbo(cpu_id, str, PAGE_SIZE)); - of_property_read_u32(core_clk, "clock-frequency", &freq); + cpu_clk = clk_get(cpu_dev, NULL); + if (IS_ERR(cpu_clk)) { + seq_printf(m, "CPU speed \t: Cannot get clock for processor [%d]\n", + cpu_id); + } else { + freq = clk_get_rate(cpu_clk); + } if (freq) - seq_printf(m, "CPU speed\t: %u.%02u Mhz\n", + seq_printf(m, "CPU speed\t: %lu.%02lu Mhz\n", freq / 1000000, (freq / 10000) % 100); seq_printf(m, "Bogo MIPS\t: %lu.%02lu\n", -- cgit v1.2.3 From ac8616e4c81dded650dfade49a7da283565d37ce Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 14 Feb 2017 11:35:22 +0800 Subject: clk: sunxi-ng: mp: Adjust parent rate for pre-dividers The MP style clocks support an mux with pre-dividers. While the driver correctly accounted for them in the .determine_rate callback, it did not in the .recalc_rate and .set_rate callbacks. This means when calculating the factors in the .set_rate callback, they would be off by a factor of the active pre-divider. Same goes for reading back the clock rate after it is set. Cc: stable@vger.kernel.org Fixes: 2ab836db5097 ("clk: sunxi-ng: Add M-P factor clock support") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard --- drivers/clk/sunxi-ng/ccu_mp.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/clk/sunxi-ng/ccu_mp.c b/drivers/clk/sunxi-ng/ccu_mp.c index 22c2ca7a2a22..b583f186a804 100644 --- a/drivers/clk/sunxi-ng/ccu_mp.c +++ b/drivers/clk/sunxi-ng/ccu_mp.c @@ -85,6 +85,10 @@ static unsigned long ccu_mp_recalc_rate(struct clk_hw *hw, unsigned int m, p; u32 reg; + /* Adjust parent_rate according to pre-dividers */ + ccu_mux_helper_adjust_parent_for_prediv(&cmp->common, &cmp->mux, + -1, &parent_rate); + reg = readl(cmp->common.base + cmp->common.reg); m = reg >> cmp->m.shift; @@ -117,6 +121,10 @@ static int ccu_mp_set_rate(struct clk_hw *hw, unsigned long rate, unsigned int m, p; u32 reg; + /* Adjust parent_rate according to pre-dividers */ + ccu_mux_helper_adjust_parent_for_prediv(&cmp->common, &cmp->mux, + -1, &parent_rate); + max_m = cmp->m.max ?: 1 << cmp->m.width; max_p = cmp->p.max ?: 1 << ((1 << cmp->p.width) - 1); -- cgit v1.2.3 From 69c9ae5041309553472ee798d7683be31bcdc434 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 Feb 2017 22:29:45 +0100 Subject: clk: sunxi: ccu-sun5i needs nkmp A randconfig build ran into this rare link error: drivers/clk/sunxi-ng/ccu-sun5i.o:(.data.__compound_literal.1+0x4): undefined reference to `ccu_nkmp_ops' drivers/clk/sunxi-ng/ccu-sun5i.o:(.data.__compound_literal.7+0x4): undefined reference to `ccu_nkmp_ops' This adds the missing 'select'. Fixes: 5e73761786d6 ("clk: sunxi-ng: Add sun5i CCU driver") Signed-off-by: Arnd Bergmann Signed-off-by: Maxime Ripard --- drivers/clk/sunxi-ng/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/sunxi-ng/Kconfig b/drivers/clk/sunxi-ng/Kconfig index 695bbf9ef428..72109d2cf41b 100644 --- a/drivers/clk/sunxi-ng/Kconfig +++ b/drivers/clk/sunxi-ng/Kconfig @@ -80,6 +80,7 @@ config SUN6I_A31_CCU select SUNXI_CCU_DIV select SUNXI_CCU_NK select SUNXI_CCU_NKM + select SUNXI_CCU_NKMP select SUNXI_CCU_NM select SUNXI_CCU_MP select SUNXI_CCU_PHASE -- cgit v1.2.3 From 9ad0bb39fce319d7b92c17d306ed0a9f70a02e7d Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 14 Feb 2017 10:23:32 +0800 Subject: clk: sunxi-ng: sun6i: Fix enable bit offset for hdmi-ddc module clock The enable bit offset for the hdmi-ddc module clock is wrong. It is pointing to the main hdmi module clock enable bit. Reported-by: Bob Ham Fixes: c6e6c96d8fa6 ("clk: sunxi-ng: Add A31/A31s clocks") Cc: stable@vger.kernel.org # 4.9.x- Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard --- drivers/clk/sunxi-ng/ccu-sun6i-a31.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c index 4c9a920ff4ab..89e68d29bf45 100644 --- a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c +++ b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c @@ -608,7 +608,7 @@ static SUNXI_CCU_M_WITH_MUX_GATE(hdmi_clk, "hdmi", lcd_ch1_parents, 0x150, 0, 4, 24, 2, BIT(31), CLK_SET_RATE_PARENT); -static SUNXI_CCU_GATE(hdmi_ddc_clk, "hdmi-ddc", "osc24M", 0x150, BIT(31), 0); +static SUNXI_CCU_GATE(hdmi_ddc_clk, "hdmi-ddc", "osc24M", 0x150, BIT(30), 0); static SUNXI_CCU_GATE(ps_clk, "ps", "lcd1-ch1", 0x140, BIT(31), 0); -- cgit v1.2.3 From 39319f504b5d91e853f5ec7753a56e43915fcaf4 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 30 Nov 2016 12:05:03 +0100 Subject: ARM: sun8i: Fix the mali clock rate The Mali clock rate was improperly assumed to be 408MHz, while it was really 384Mhz, 408MHz being the "extreme" frequency, and definitely not stable. Switch for the stable, correct frequency for the GPU. Signed-off-by: Maxime Ripard --- arch/arm/boot/dts/sun8i-a23-a33.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun8i-a23-a33.dtsi b/arch/arm/boot/dts/sun8i-a23-a33.dtsi index a952cc0703cc..8a3ed21cb7bc 100644 --- a/arch/arm/boot/dts/sun8i-a23-a33.dtsi +++ b/arch/arm/boot/dts/sun8i-a23-a33.dtsi @@ -495,7 +495,7 @@ resets = <&ccu RST_BUS_GPU>; assigned-clocks = <&ccu CLK_GPU>; - assigned-clock-rates = <408000000>; + assigned-clock-rates = <384000000>; }; gic: interrupt-controller@01c81000 { -- cgit v1.2.3 From 4b30eebfc35c67771b5f58d9274d3e321b72d7a8 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 3 Mar 2017 04:25:09 +0000 Subject: ASoC: rcar: avoid SSI_MODEx settings for SSI8 SSI8 is is sharing pin with SSI7, and nothing to do for SSI_MODEx. It is special pin and it needs special settings whole system, but we can't confirm it, because we never have SSI8 available board. This patch fixup SSI_MODEx settings error for SSI8 on connection test, but should be confirmed behavior on real board in the future. Signed-off-by: Kuninori Morimoto Tested-by: Hiroyuki Yokoyama Signed-off-by: Mark Brown --- sound/soc/sh/rcar/ssiu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/soc/sh/rcar/ssiu.c b/sound/soc/sh/rcar/ssiu.c index 4e817c8a18c0..14fafdaf1395 100644 --- a/sound/soc/sh/rcar/ssiu.c +++ b/sound/soc/sh/rcar/ssiu.c @@ -64,7 +64,11 @@ static int rsnd_ssiu_init(struct rsnd_mod *mod, mask1 = (1 << 4) | (1 << 20); /* mask sync bit */ mask2 = (1 << 4); /* mask sync bit */ val1 = val2 = 0; - if (rsnd_ssi_is_pin_sharing(io)) { + if (id == 8) { + /* + * SSI8 pin is sharing with SSI7, nothing to do. + */ + } else if (rsnd_ssi_is_pin_sharing(io)) { int shift = -1; switch (id) { -- cgit v1.2.3 From 04c8f2bf9117de7b8e8bc0b90e8c4bff15f4f613 Mon Sep 17 00:00:00 2001 From: Jeeja KP Date: Wed, 1 Mar 2017 22:41:23 +0530 Subject: ASoC: hdac_hdmi: avoid reference to invalid variable of the pin list Using pin list array iterator outside the iteration of the list can point to dummy element, which can be invalid. So don't use pin variable outside the pin list iteration. This fixes the following coccinelle warning: sound/soc/codecs/hdac_hdmi.c:1419:5-8: ERROR: invalid reference to the index variable of the iterator Fixes: 2acd8309a3a4('ASoC: hdac_hdmi: Add support to handle MST capable pin') Reported-by: Julia Lawall Signed-off-by: Jeeja KP Acked-by: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/codecs/hdac_hdmi.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c index 78fca8acd3ec..bb405698e102 100644 --- a/sound/soc/codecs/hdac_hdmi.c +++ b/sound/soc/codecs/hdac_hdmi.c @@ -1534,21 +1534,20 @@ static void hdac_hdmi_eld_notify_cb(void *aptr, int port, int pipe) pin->mst_capable = false; /* if not MST, default is port[0] */ hport = &pin->ports[0]; - goto out; } else { for (i = 0; i < pin->num_ports; i++) { pin->mst_capable = true; if (pin->ports[i].id == pipe) { hport = &pin->ports[i]; - goto out; + break; } } } + + if (hport) + hdac_hdmi_present_sense(pin, hport); } -out: - if (pin && hport) - hdac_hdmi_present_sense(pin, hport); } static struct i915_audio_component_audio_ops aops = { -- cgit v1.2.3 From 2fe42dd0f13812d38daaf05bcb1fd996afd0e87a Mon Sep 17 00:00:00 2001 From: Jeeja KP Date: Wed, 1 Mar 2017 22:41:24 +0530 Subject: ASoC: hdac_hdmi: don't update the iterator in pcm list remove Fix not to update the iterator element, instead use list_del to remove entry from the list. This fixes the following coccinelle and static checker warning: sound/soc/codecs/hdac_hdmi.c:1884:2-21:iterator with update on line 1885 sound/soc/codecs/hdac_hdmi.c:2011 hdac_hdmi_dev_remove() error: potential NULL dereference 'port'. Fixes: e0e5d3e5a53b('ASoC: hdac_hdmi: Add support for multiple ports to a PCM') Reported-by: Julia Lawall Reported-by: Dan Carpenter Signed-off-by: Jeeja KP Acked-by: Vinod Koul Signed-off-by: Mark Brown --- sound/soc/codecs/hdac_hdmi.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c index bb405698e102..fd272a40485b 100644 --- a/sound/soc/codecs/hdac_hdmi.c +++ b/sound/soc/codecs/hdac_hdmi.c @@ -1997,7 +1997,7 @@ static int hdac_hdmi_dev_remove(struct hdac_ext_device *edev) struct hdac_hdmi_pin *pin, *pin_next; struct hdac_hdmi_cvt *cvt, *cvt_next; struct hdac_hdmi_pcm *pcm, *pcm_next; - struct hdac_hdmi_port *port; + struct hdac_hdmi_port *port, *port_next; int i; snd_soc_unregister_codec(&edev->hdac.dev); @@ -2007,8 +2007,9 @@ static int hdac_hdmi_dev_remove(struct hdac_ext_device *edev) if (list_empty(&pcm->port_list)) continue; - list_for_each_entry(port, &pcm->port_list, head) - port = NULL; + list_for_each_entry_safe(port, port_next, + &pcm->port_list, head) + list_del(&port->head); list_del(&pcm->head); kfree(pcm); -- cgit v1.2.3 From 9ce9f7999741f342eeffd036ab09213a2fa93040 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Mon, 13 Feb 2017 13:49:58 -0600 Subject: gpio: altera-a10sr: Set gpio_chip parent property Set the gpio_chip parent property since some recent functions such as devprop_gpiochip_set_names() can use it. Signed-off-by: Thor Thayer Signed-off-by: Linus Walleij --- drivers/gpio/gpio-altera-a10sr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-altera-a10sr.c b/drivers/gpio/gpio-altera-a10sr.c index 9e1a138fed53..16a8951b2bed 100644 --- a/drivers/gpio/gpio-altera-a10sr.c +++ b/drivers/gpio/gpio-altera-a10sr.c @@ -96,7 +96,7 @@ static int altr_a10sr_gpio_probe(struct platform_device *pdev) gpio->regmap = a10sr->regmap; gpio->gp = altr_a10sr_gc; - + gpio->gp.parent = pdev->dev.parent; gpio->gp.of_node = pdev->dev.of_node; ret = devm_gpiochip_add_data(&pdev->dev, &gpio->gp, gpio); -- cgit v1.2.3 From fa6256db033067b57318decdc1c583512a1f8f68 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 15 Feb 2017 02:02:06 +0300 Subject: gpio: mockup: return -EFAULT if copy_from_user() fails copy_from_user() returns the number of bytes remaining to be copied but we want to return negative error codes on failue. Fixes: 9202ba2397d1 ("gpio: mockup: implement event injecting over debugfs") Signed-off-by: Dan Carpenter Acked-by: Bartosz Golaszewski Signed-off-by: Linus Walleij --- drivers/gpio/gpio-mockup.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c index 06dac72cb69c..d99338689213 100644 --- a/drivers/gpio/gpio-mockup.c +++ b/drivers/gpio/gpio-mockup.c @@ -197,7 +197,7 @@ static ssize_t gpio_mockup_event_write(struct file *file, struct seq_file *sfile; struct gpio_desc *desc; struct gpio_chip *gc; - int status, val; + int val; char buf; sfile = file->private_data; @@ -206,9 +206,8 @@ static ssize_t gpio_mockup_event_write(struct file *file, chip = priv->chip; gc = &chip->gc; - status = copy_from_user(&buf, usr_buf, 1); - if (status) - return status; + if (copy_from_user(&buf, usr_buf, 1)) + return -EFAULT; if (buf == '0') val = 0; -- cgit v1.2.3 From b115bebc07f282067eccc06fd5aa3060ab1426da Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 17 Feb 2017 16:13:44 +0100 Subject: gpio: xgene: mark PM functions as __maybe_unused When CONFIG_PM_SLEEP is disabled, we get a warning about unused functions: drivers/gpio/gpio-xgene.c:155:12: warning: 'xgene_gpio_resume' defined but not used [-Wunused-function] static int xgene_gpio_resume(struct device *dev) ^~~~~~~~~~~~~~~~~ drivers/gpio/gpio-xgene.c:142:12: warning: 'xgene_gpio_suspend' defined but not used [-Wunused-function] static int xgene_gpio_suspend(struct device *dev) The warnings are harmless and can be avoided by simplifying the code and marking the functions as __maybe_unused. Signed-off-by: Arnd Bergmann Signed-off-by: Linus Walleij --- drivers/gpio/gpio-xgene.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/gpio/gpio-xgene.c b/drivers/gpio/gpio-xgene.c index 40a8881c2ce8..f1c6ec17b90a 100644 --- a/drivers/gpio/gpio-xgene.c +++ b/drivers/gpio/gpio-xgene.c @@ -42,9 +42,7 @@ struct xgene_gpio { struct gpio_chip chip; void __iomem *base; spinlock_t lock; -#ifdef CONFIG_PM u32 set_dr_val[XGENE_MAX_GPIO_BANKS]; -#endif }; static int xgene_gpio_get(struct gpio_chip *gc, unsigned int offset) @@ -138,8 +136,7 @@ static int xgene_gpio_dir_out(struct gpio_chip *gc, return 0; } -#ifdef CONFIG_PM -static int xgene_gpio_suspend(struct device *dev) +static __maybe_unused int xgene_gpio_suspend(struct device *dev) { struct xgene_gpio *gpio = dev_get_drvdata(dev); unsigned long bank_offset; @@ -152,7 +149,7 @@ static int xgene_gpio_suspend(struct device *dev) return 0; } -static int xgene_gpio_resume(struct device *dev) +static __maybe_unused int xgene_gpio_resume(struct device *dev) { struct xgene_gpio *gpio = dev_get_drvdata(dev); unsigned long bank_offset; @@ -166,10 +163,6 @@ static int xgene_gpio_resume(struct device *dev) } static SIMPLE_DEV_PM_OPS(xgene_gpio_pm, xgene_gpio_suspend, xgene_gpio_resume); -#define XGENE_GPIO_PM_OPS (&xgene_gpio_pm) -#else -#define XGENE_GPIO_PM_OPS NULL -#endif static int xgene_gpio_probe(struct platform_device *pdev) { @@ -241,7 +234,7 @@ static struct platform_driver xgene_gpio_driver = { .name = "xgene-gpio", .of_match_table = xgene_gpio_of_match, .acpi_match_table = ACPI_PTR(xgene_gpio_acpi_match), - .pm = XGENE_GPIO_PM_OPS, + .pm = &xgene_gpio_pm, }, .probe = xgene_gpio_probe, }; -- cgit v1.2.3 From f759921cfbf4847319d197a6ed7c9534d593f8bc Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Mon, 20 Feb 2017 09:41:45 +0800 Subject: gpio: altera: Use handle_level_irq when configured as a level_high When a threaded irq handler is chained attached to one of the gpio pins when configure for level irq the altera_gpio_irq_leveL_high_handler does not mask the interrupt while being handled by the chained irq. This resulting in the threaded irq not getting enough cycles to complete quickly enough before the irq was disabled as faulty. handle_level_irq should be used in this situation instead of handle_simple_irq. In gpiochip_irqchip_add set default handler to handle_bad_irq as per Documentation/gpio/driver.txt. Then set the correct handler in the set_type callback. Signed-off-by: Phil Reid Signed-off-by: Linus Walleij --- drivers/gpio/gpio-altera.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/gpio/gpio-altera.c b/drivers/gpio/gpio-altera.c index 5bddbd507ca9..3fe6a21e05a5 100644 --- a/drivers/gpio/gpio-altera.c +++ b/drivers/gpio/gpio-altera.c @@ -90,21 +90,18 @@ static int altera_gpio_irq_set_type(struct irq_data *d, altera_gc = gpiochip_get_data(irq_data_get_irq_chip_data(d)); - if (type == IRQ_TYPE_NONE) + if (type == IRQ_TYPE_NONE) { + irq_set_handler_locked(d, handle_bad_irq); return 0; - if (type == IRQ_TYPE_LEVEL_HIGH && - altera_gc->interrupt_trigger == IRQ_TYPE_LEVEL_HIGH) - return 0; - if (type == IRQ_TYPE_EDGE_RISING && - altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_RISING) - return 0; - if (type == IRQ_TYPE_EDGE_FALLING && - altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_FALLING) - return 0; - if (type == IRQ_TYPE_EDGE_BOTH && - altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_BOTH) + } + if (type == altera_gc->interrupt_trigger) { + if (type == IRQ_TYPE_LEVEL_HIGH) + irq_set_handler_locked(d, handle_level_irq); + else + irq_set_handler_locked(d, handle_simple_irq); return 0; - + } + irq_set_handler_locked(d, handle_bad_irq); return -EINVAL; } @@ -230,7 +227,6 @@ static void altera_gpio_irq_edge_handler(struct irq_desc *desc) chained_irq_exit(chip, desc); } - static void altera_gpio_irq_leveL_high_handler(struct irq_desc *desc) { struct altera_gpio_chip *altera_gc; @@ -310,7 +306,7 @@ static int altera_gpio_probe(struct platform_device *pdev) altera_gc->interrupt_trigger = reg; ret = gpiochip_irqchip_add(&altera_gc->mmchip.gc, &altera_irq_chip, 0, - handle_simple_irq, IRQ_TYPE_NONE); + handle_bad_irq, IRQ_TYPE_NONE); if (ret) { dev_err(&pdev->dev, "could not add irqchip\n"); -- cgit v1.2.3 From f2f10b7e722a75c6d75a7f7cd06b0eee3ae20f7c Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Fri, 17 Feb 2017 07:40:52 -0600 Subject: HID: chicony: Add support for another ASUS Zen AiO keyboard Add support for media keys on the keyboard that comes with the Asus V221ID and ZN241IC All In One computers. The keys to support here are WLAN, BRIGHTNESSDOWN and BRIGHTNESSUP. This device is not visibly branded as Chicony, and the USB Vendor ID suggests that it is a JESS device. However this seems like the right place to put it: the usage codes are identical to the currently supported devices, and this driver already supports the ASUS AIO keyboard AK1D. Signed-off-by: Daniel Drake Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 4 ++-- drivers/hid/hid-chicony.c | 1 + drivers/hid/hid-core.c | 1 + drivers/hid/hid-ids.h | 1 + 4 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 1aeb80e52424..8eab3200ac9a 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -175,11 +175,11 @@ config HID_CHERRY Support for Cherry Cymotion keyboard. config HID_CHICONY - tristate "Chicony Tactical pad" + tristate "Chicony devices" depends on HID default !EXPERT ---help--- - Support for Chicony Tactical pad. + Support for Chicony Tactical pad and special keys on Chicony keyboards. config HID_CORSAIR tristate "Corsair devices" diff --git a/drivers/hid/hid-chicony.c b/drivers/hid/hid-chicony.c index bc3cec199fee..f04ed9aabc3f 100644 --- a/drivers/hid/hid-chicony.c +++ b/drivers/hid/hid-chicony.c @@ -86,6 +86,7 @@ static const struct hid_device_id ch_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS2) }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) }, + { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) }, { } }; MODULE_DEVICE_TABLE(hid, ch_devices); diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index e9e87d337446..ae01ae601d74 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1910,6 +1910,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081) }, { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A0C2) }, { HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) }, + { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) }, { HID_USB_DEVICE(USB_VENDOR_ID_JESS2, USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ION, USB_DEVICE_ID_ICADE) }, { HID_USB_DEVICE(USB_VENDOR_ID_KENSINGTON, USB_DEVICE_ID_KS_SLIMBLADE) }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 86c95d30ac80..b3df60da0297 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -557,6 +557,7 @@ #define USB_VENDOR_ID_JESS 0x0c45 #define USB_DEVICE_ID_JESS_YUREX 0x1010 +#define USB_DEVICE_ID_JESS_ZEN_AIO_KBD 0x5112 #define USB_VENDOR_ID_JESS2 0x0f30 #define USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD 0x0111 -- cgit v1.2.3 From a687c5765b5ae19fe559e14615ddc87ebb46d409 Mon Sep 17 00:00:00 2001 From: Roderick Colenbrander Date: Fri, 24 Feb 2017 16:14:15 -0800 Subject: HID: sony: Fix input device leak when connecting a DS4 twice using USB/BT When a user connects a DS4 twice using USB and BT, we reject the second device connection after the setup work. We then perform a cleanup, but during cleanup we are not removing the touchpad device. This leads to leakage of an input device, which we would never remove. It can likely result into a kernel oops as well when the touchpad evdev node is accessed and the underlaying HID device has been removed from the system. [jkosina@suse.cz: added stable annotation] Fixes: ac797b95f532 ("HID: sony: Make the DS4 touchpad a separate device") Cc: stable@vger.kernel.org Signed-off-by: Roderick Colenbrander Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-sony.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index f405b07d0381..740996f9bdd4 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -2632,6 +2632,8 @@ err_stop: sony_leds_remove(sc); if (sc->quirks & SONY_BATTERY_SUPPORT) sony_battery_remove(sc); + if (sc->touchpad) + sony_unregister_touchpad(sc); sony_cancel_work_sync(sc); kfree(sc->output_report_dmabuf); sony_remove_dev_list(sc); -- cgit v1.2.3 From 4bd035eae255a4f1464dca063885fa415e58c12e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 23 Feb 2017 00:26:09 +0000 Subject: EDAC, xgene: Fix wrongly spelled "procesing" Fix spelling mistake in dev_err message. Signed-off-by: Colin Ian King Reviewed-by: Loc Ho Cc: linux-edac Link: http://lkml.kernel.org/r/20170223002609.9440-1-colin.king@canonical.com Signed-off-by: Borislav Petkov --- drivers/edac/xgene_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/xgene_edac.c b/drivers/edac/xgene_edac.c index 6c270d9d304a..669246056812 100644 --- a/drivers/edac/xgene_edac.c +++ b/drivers/edac/xgene_edac.c @@ -1596,7 +1596,7 @@ static void xgene_edac_pa_report(struct edac_device_ctl_info *edac_dev) reg = readl(ctx->dev_csr + IOBPATRANSERRINTSTS); if (!reg) goto chk_iob_axi0; - dev_err(edac_dev->dev, "IOB procesing agent (PA) transaction error\n"); + dev_err(edac_dev->dev, "IOB processing agent (PA) transaction error\n"); if (reg & IOBPA_RDATA_CORRUPT_MASK) dev_err(edac_dev->dev, "IOB PA read data RAM error\n"); if (reg & IOBPA_M_RDATA_CORRUPT_MASK) -- cgit v1.2.3 From 67430a39ca7a6af28aade5acb92d43ee257c1014 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 6 Mar 2017 16:54:33 +0000 Subject: ASoC: wm_adsp: Return an error on write to a disabled volatile control Volatile controls should only be accessed when the firmware is active, currently however writes to these controls will succeed, but the data will be lost, if the firmware is powered down. Update this behaviour such that an error is returned the same as it is for reads. Signed-off-by: Charles Keepax Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index d151224ffcca..6313b3da967b 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -899,7 +899,10 @@ static int wm_coeff_put(struct snd_kcontrol *kctl, mutex_lock(&ctl->dsp->pwr_lock); - memcpy(ctl->cache, p, ctl->len); + if (ctl->flags & WMFW_CTL_FLAG_VOLATILE) + ret = -EPERM; + else + memcpy(ctl->cache, p, ctl->len); ctl->set = 1; if (ctl->enabled && ctl->dsp->running) @@ -926,6 +929,8 @@ static int wm_coeff_tlv_put(struct snd_kcontrol *kctl, ctl->set = 1; if (ctl->enabled && ctl->dsp->running) ret = wm_coeff_write_control(ctl, ctl->cache, size); + else if (ctl->flags & WMFW_CTL_FLAG_VOLATILE) + ret = -EPERM; } mutex_unlock(&ctl->dsp->pwr_lock); -- cgit v1.2.3 From 7b4af793a7a4f8e04175eb6600ba9c8ba855ad20 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 6 Mar 2017 16:54:34 +0000 Subject: ASoC: wm_adsp: Acknowledge controls should also check the DSP is running We should not be writing acknowledge controls until the firmware is running, as in the case of preloaded firmwares the DSP memory may be unaccessible to whilst in the preloaded state. This means a write to the control during this time could be lost. Signed-off-by: Charles Keepax Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 6313b3da967b..bbdb72f73df1 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -952,7 +952,7 @@ static int wm_coeff_put_acked(struct snd_kcontrol *kctl, mutex_lock(&ctl->dsp->pwr_lock); - if (ctl->enabled) + if (ctl->enabled && ctl->dsp->running) ret = wm_coeff_write_acked_control(ctl, val); else ret = -EPERM; -- cgit v1.2.3 From 68598d2ea886322f9b4b0058e5b288418622de95 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Wed, 1 Mar 2017 02:12:50 +0300 Subject: btrfs: remove btrfs_err_str function from uapi/linux/btrfs.h btrfs_err_str function is not called from anywhere and is replicated in the userspace headers for btrfs-progs. It's removal also fixes the following linux/btrfs.h userspace compilation error: /usr/include/linux/btrfs.h: In function 'btrfs_err_str': /usr/include/linux/btrfs.h:740:11: error: 'NULL' undeclared (first use in this function) return NULL; Suggested-by: Jeff Mahoney Signed-off-by: Dmitry V. Levin Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs.h | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index db4c253f8011..dcfc3a5a9cb1 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -713,33 +713,6 @@ enum btrfs_err_code { BTRFS_ERROR_DEV_ONLY_WRITABLE, BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS }; -/* An error code to error string mapping for the kernel -* error codes -*/ -static inline char *btrfs_err_str(enum btrfs_err_code err_code) -{ - switch (err_code) { - case BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET: - return "unable to go below two devices on raid1"; - case BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET: - return "unable to go below four devices on raid10"; - case BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET: - return "unable to go below two devices on raid5"; - case BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET: - return "unable to go below three devices on raid6"; - case BTRFS_ERROR_DEV_TGT_REPLACE: - return "unable to remove the dev_replace target dev"; - case BTRFS_ERROR_DEV_MISSING_NOT_FOUND: - return "no missing devices found to remove"; - case BTRFS_ERROR_DEV_ONLY_WRITABLE: - return "unable to remove the only writeable device"; - case BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS: - return "add/delete/balance/replace/resize operation "\ - "in progress"; - default: - return NULL; - } -} #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ struct btrfs_ioctl_vol_args) -- cgit v1.2.3 From cda82ace3d4eff6a38f00a65db435fe35a3916f0 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 21 Dec 2016 11:26:55 +0100 Subject: dt-bindings: arm: update Armada CP110 system controller binding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It turns out that in the CP110 HW block present in Marvell Armada 7K/8K SoCs, gatable clock n°18 not only controls SD/MMC, but also the GOP block. This commit updates the Device Tree binding for this piece of hardware accordingly. Signed-off-by: Thomas Petazzoni Signed-off-by: Stephen Boyd --- .../devicetree/bindings/arm/marvell/cp110-system-controller0.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/arm/marvell/cp110-system-controller0.txt b/Documentation/devicetree/bindings/arm/marvell/cp110-system-controller0.txt index 30c546900b60..07dbb358182c 100644 --- a/Documentation/devicetree/bindings/arm/marvell/cp110-system-controller0.txt +++ b/Documentation/devicetree/bindings/arm/marvell/cp110-system-controller0.txt @@ -45,7 +45,7 @@ The following clocks are available: - 1 15 SATA - 1 16 SATA USB - 1 17 Main - - 1 18 SD/MMC + - 1 18 SD/MMC/GOP - 1 21 Slow IO (SPI, NOR, BootROM, I2C, UART) - 1 22 USB3H0 - 1 23 USB3H1 @@ -65,7 +65,7 @@ Required properties: "cpm-audio", "cpm-communit", "cpm-nand", "cpm-ppv2", "cpm-sdio", "cpm-mg-domain", "cpm-mg-core", "cpm-xor1", "cpm-xor0", "cpm-gop-dp", "none", "cpm-pcie_x10", "cpm-pcie_x11", "cpm-pcie_x4", "cpm-pcie-xor", "cpm-sata", - "cpm-sata-usb", "cpm-main", "cpm-sd-mmc", "none", "none", "cpm-slow-io", + "cpm-sata-usb", "cpm-main", "cpm-sd-mmc-gop", "none", "none", "cpm-slow-io", "cpm-usb3h0", "cpm-usb3h1", "cpm-usb3dev", "cpm-eip150", "cpm-eip197"; Example: @@ -78,6 +78,6 @@ Example: gate-clock-output-names = "cpm-audio", "cpm-communit", "cpm-nand", "cpm-ppv2", "cpm-sdio", "cpm-mg-domain", "cpm-mg-core", "cpm-xor1", "cpm-xor0", "cpm-gop-dp", "none", "cpm-pcie_x10", "cpm-pcie_x11", "cpm-pcie_x4", "cpm-pcie-xor", "cpm-sata", - "cpm-sata-usb", "cpm-main", "cpm-sd-mmc", "none", "none", "cpm-slow-io", + "cpm-sata-usb", "cpm-main", "cpm-sd-mmc-gop", "none", "none", "cpm-slow-io", "cpm-usb3h0", "cpm-usb3h1", "cpm-usb3dev", "cpm-eip150", "cpm-eip197"; }; -- cgit v1.2.3 From 253160a8ad06bcc1c1db16a58b1f06d5128f6c5e Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Mon, 20 Feb 2017 15:20:56 +0200 Subject: clk: core: Copy connection id Some drivers use sprintf to build clk connection id names but the clk core will save those strings and occasionally print them back. Duplicate the con_id strings instead of fixing all the users. Signed-off-by: Leonard Crestez Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 0fb39fe217d1..67201f67a14a 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -2502,7 +2502,7 @@ struct clk *__clk_create_clk(struct clk_hw *hw, const char *dev_id, clk->core = hw->core; clk->dev_id = dev_id; - clk->con_id = con_id; + clk->con_id = kstrdup_const(con_id, GFP_KERNEL); clk->max_rate = ULONG_MAX; clk_prepare_lock(); @@ -2518,6 +2518,7 @@ void __clk_free_clk(struct clk *clk) hlist_del(&clk->clks_node); clk_prepare_unlock(); + kfree_const(clk->con_id); kfree(clk); } -- cgit v1.2.3 From 9b1b23f03abdd25ffde8bbfe5824b89bc0448c28 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Wed, 1 Mar 2017 22:00:41 +0100 Subject: clk: rockchip: add "," to mux_pll_src_apll_dpll_gpll_usb480m_p on rk3036 The mux_pll_src_apll_dpll_gpll_usb480m_p parent list was missing a "," between the 3rd and 4th parent names, making them fall together and thus lookups fail. Fix that. Fixes: 5190c08b2989 ("clk: rockchip: add clock controller for rk3036") Signed-off-by: Heiko Stuebner Signed-off-by: Stephen Boyd --- drivers/clk/rockchip/clk-rk3036.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/rockchip/clk-rk3036.c b/drivers/clk/rockchip/clk-rk3036.c index 924f560dcf80..dcde70f4c105 100644 --- a/drivers/clk/rockchip/clk-rk3036.c +++ b/drivers/clk/rockchip/clk-rk3036.c @@ -127,7 +127,7 @@ PNAME(mux_ddrphy_p) = { "dpll_ddr", "gpll_ddr" }; PNAME(mux_pll_src_3plls_p) = { "apll", "dpll", "gpll" }; PNAME(mux_timer_p) = { "xin24m", "pclk_peri_src" }; -PNAME(mux_pll_src_apll_dpll_gpll_usb480m_p) = { "apll", "dpll", "gpll" "usb480m" }; +PNAME(mux_pll_src_apll_dpll_gpll_usb480m_p) = { "apll", "dpll", "gpll", "usb480m" }; PNAME(mux_mmc_src_p) = { "apll", "dpll", "gpll", "xin24m" }; PNAME(mux_i2s_pre_p) = { "i2s_src", "i2s_frac", "ext_i2s", "xin12m" }; -- cgit v1.2.3 From f8ba2d68e54fbca340ad0fce97397291ba9637bc Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Wed, 1 Mar 2017 22:00:42 +0100 Subject: clk: rockchip: Make uartpll a child of the gpll on rk3036 The shared uart-pll is on boot a child of the apll that can get changed by cpu frequency scaling. So move it away to the more stable gpll to make sure the uart doesn't break on cpu frequency changes. This turned up during the 4.11 merge-window when commit 6a171b299379 ("serial: 8250_dw: Allow hardware flow control to be used") added general termios enablement making the uart on rk3036 change frequency and thus making it susceptible for the frequency scaling issue. Signed-off-by: Heiko Stuebner Signed-off-by: Stephen Boyd --- drivers/clk/rockchip/clk-rk3036.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/clk/rockchip/clk-rk3036.c b/drivers/clk/rockchip/clk-rk3036.c index dcde70f4c105..00d4150e33c3 100644 --- a/drivers/clk/rockchip/clk-rk3036.c +++ b/drivers/clk/rockchip/clk-rk3036.c @@ -450,6 +450,13 @@ static void __init rk3036_clk_init(struct device_node *np) return; } + /* + * Make uart_pll_clk a child of the gpll, as all other sources are + * not that usable / stable. + */ + writel_relaxed(HIWORD_UPDATE(0x2, 0x3, 10), + reg_base + RK2928_CLKSEL_CON(13)); + ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); if (IS_ERR(ctx)) { pr_err("%s: rockchip clk init failed\n", __func__); -- cgit v1.2.3 From d1a6fe41d3c4ff0d26f0b186d774493555ca5282 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Fri, 24 Feb 2017 11:48:41 +0900 Subject: ASoC: Intel: Skylake: fix invalid memory access due to wrong reference of pointer In 'skl_tplg_set_module_init_data()', a pointer to 'params' member of 'struct skl_algo_data' is calculated, then casted to (u32 *) and assigned to a member of configuration data. The configuration data is passed to the other functions and used to process intel IPC. In this processing, the value of member is used to get message data, however this can bring invalid memory access in 'skl_set_module_params()' as a result of calculation of a pointer for actual message data. (sound/soc/intel/skylake/skl-topology.c) skl_tplg_init_pipe_modules() ->skl_tplg_set_module_init_data() (has this bug) ->skl_tplg_set_module_params() (sound/soc/intel/skylake/skl-messages.c) ->skl_set_module_params() ((char *)param) + data_offset This commit fixes the bug. Fixes: abb740033b56 ("ASoC: Intel: Skylake: Add support to configure module params") Signed-off-by: Takashi Sakamoto Acked-by: Vinod Koul Signed-off-by: Mark Brown Cc: # v4.5+ --- sound/soc/intel/skylake/skl-topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c index ed58b5b3555a..2dbfb1b24ef4 100644 --- a/sound/soc/intel/skylake/skl-topology.c +++ b/sound/soc/intel/skylake/skl-topology.c @@ -512,7 +512,7 @@ static int skl_tplg_set_module_init_data(struct snd_soc_dapm_widget *w) if (bc->set_params != SKL_PARAM_INIT) continue; - mconfig->formats_config.caps = (u32 *)&bc->params; + mconfig->formats_config.caps = (u32 *)bc->params; mconfig->formats_config.caps_size = bc->size; break; -- cgit v1.2.3 From cd3ac9affc43b44f49d7af70d275f0bd426ba643 Mon Sep 17 00:00:00 2001 From: Songjun Wu Date: Fri, 24 Feb 2017 15:10:43 +0800 Subject: ASoC: atmel-classd: fix audio clock rate Fix the audio clock rate according to the datasheet. Reported-by: Dushara Jayasinghe Signed-off-by: Songjun Wu Acked-by: Nicolas Ferre Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/atmel/atmel-classd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/atmel/atmel-classd.c b/sound/soc/atmel/atmel-classd.c index 89ac5f5a93eb..7ae46c2647d4 100644 --- a/sound/soc/atmel/atmel-classd.c +++ b/sound/soc/atmel/atmel-classd.c @@ -349,7 +349,7 @@ static int atmel_classd_codec_dai_digital_mute(struct snd_soc_dai *codec_dai, } #define CLASSD_ACLK_RATE_11M2896_MPY_8 (112896 * 100 * 8) -#define CLASSD_ACLK_RATE_12M288_MPY_8 (12228 * 1000 * 8) +#define CLASSD_ACLK_RATE_12M288_MPY_8 (12288 * 1000 * 8) static struct { int rate; -- cgit v1.2.3 From 45838660e34d90db8d4f7cbc8fd66e8aff79f4fe Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 7 Mar 2017 09:31:29 -0800 Subject: Input: i8042 - add noloop quirk for Dell Embedded Box PC 3000 The aux port does not get detected without noloop quirk, so external PS/2 mouse cannot work as result. The PS/2 mouse can work with this quirk. BugLink: https://bugs.launchpad.net/bugs/1591053 Signed-off-by: Kai-Heng Feng Reviewed-by: Marcos Paulo de Souza Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-x86ia64io.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 05afd16ea9c9..e856b073d533 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -119,6 +119,13 @@ static const struct dmi_system_id __initconst i8042_dmi_noloop_table[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "DL760"), }, }, + { + /* Dell Embedded Box PC 3000 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Embedded Box PC 3000"), + }, + }, { /* OQO Model 01 */ .matches = { -- cgit v1.2.3 From a04e54f2c35823ca32d56afcd5cea5b783e2f51a Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 3 Nov 2016 23:06:53 -0700 Subject: target/pscsi: Fix TYPE_TAPE + TYPE_MEDIMUM_CHANGER export The following fixes a divide by zero OOPs with TYPE_TAPE due to pscsi_tape_read_blocksize() failing causing a zero sd->sector_size being propigated up via dev_attrib.hw_block_size. It also fixes another long-standing bug where TYPE_TAPE and TYPE_MEDIMUM_CHANGER where using pscsi_create_type_other(), which does not call scsi_device_get() to take the device reference. Instead, rename pscsi_create_type_rom() to pscsi_create_type_nondisk() and use it for all cases. Finally, also drop a dump_stack() in pscsi_get_blocks() for non TYPE_DISK, which in modern target-core can get invoked via target_sense_desc_format() during CHECK_CONDITION. Reported-by: Malcolm Haak Cc: Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_pscsi.c | 47 ++++++++++---------------------------- 1 file changed, 12 insertions(+), 35 deletions(-) diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index a8f8e53f2f57..44d92f23a3f0 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -154,7 +154,7 @@ static void pscsi_tape_read_blocksize(struct se_device *dev, buf = kzalloc(12, GFP_KERNEL); if (!buf) - return; + goto out_free; memset(cdb, 0, MAX_COMMAND_SIZE); cdb[0] = MODE_SENSE; @@ -169,9 +169,10 @@ static void pscsi_tape_read_blocksize(struct se_device *dev, * If MODE_SENSE still returns zero, set the default value to 1024. */ sdev->sector_size = (buf[9] << 16) | (buf[10] << 8) | (buf[11]); +out_free: if (!sdev->sector_size) sdev->sector_size = 1024; -out_free: + kfree(buf); } @@ -314,9 +315,10 @@ static int pscsi_add_device_to_list(struct se_device *dev, sd->lun, sd->queue_depth); } - dev->dev_attrib.hw_block_size = sd->sector_size; + dev->dev_attrib.hw_block_size = + min_not_zero((int)sd->sector_size, 512); dev->dev_attrib.hw_max_sectors = - min_t(int, sd->host->max_sectors, queue_max_hw_sectors(q)); + min_not_zero(sd->host->max_sectors, queue_max_hw_sectors(q)); dev->dev_attrib.hw_queue_depth = sd->queue_depth; /* @@ -339,8 +341,10 @@ static int pscsi_add_device_to_list(struct se_device *dev, /* * For TYPE_TAPE, attempt to determine blocksize with MODE_SENSE. */ - if (sd->type == TYPE_TAPE) + if (sd->type == TYPE_TAPE) { pscsi_tape_read_blocksize(dev, sd); + dev->dev_attrib.hw_block_size = sd->sector_size; + } return 0; } @@ -406,7 +410,7 @@ static int pscsi_create_type_disk(struct se_device *dev, struct scsi_device *sd) /* * Called with struct Scsi_Host->host_lock called. */ -static int pscsi_create_type_rom(struct se_device *dev, struct scsi_device *sd) +static int pscsi_create_type_nondisk(struct se_device *dev, struct scsi_device *sd) __releases(sh->host_lock) { struct pscsi_hba_virt *phv = dev->se_hba->hba_ptr; @@ -433,28 +437,6 @@ static int pscsi_create_type_rom(struct se_device *dev, struct scsi_device *sd) return 0; } -/* - * Called with struct Scsi_Host->host_lock called. - */ -static int pscsi_create_type_other(struct se_device *dev, - struct scsi_device *sd) - __releases(sh->host_lock) -{ - struct pscsi_hba_virt *phv = dev->se_hba->hba_ptr; - struct Scsi_Host *sh = sd->host; - int ret; - - spin_unlock_irq(sh->host_lock); - ret = pscsi_add_device_to_list(dev, sd); - if (ret) - return ret; - - pr_debug("CORE_PSCSI[%d] - Added Type: %s for %d:%d:%d:%llu\n", - phv->phv_host_id, scsi_device_type(sd->type), sh->host_no, - sd->channel, sd->id, sd->lun); - return 0; -} - static int pscsi_configure_device(struct se_device *dev) { struct se_hba *hba = dev->se_hba; @@ -542,11 +524,8 @@ static int pscsi_configure_device(struct se_device *dev) case TYPE_DISK: ret = pscsi_create_type_disk(dev, sd); break; - case TYPE_ROM: - ret = pscsi_create_type_rom(dev, sd); - break; default: - ret = pscsi_create_type_other(dev, sd); + ret = pscsi_create_type_nondisk(dev, sd); break; } @@ -611,8 +590,7 @@ static void pscsi_free_device(struct se_device *dev) else if (pdv->pdv_lld_host) scsi_host_put(pdv->pdv_lld_host); - if ((sd->type == TYPE_DISK) || (sd->type == TYPE_ROM)) - scsi_device_put(sd); + scsi_device_put(sd); pdv->pdv_sd = NULL; } @@ -1064,7 +1042,6 @@ static sector_t pscsi_get_blocks(struct se_device *dev) if (pdv->pdv_bd && pdv->pdv_bd->bd_part) return pdv->pdv_bd->bd_part->nr_sects; - dump_stack(); return 0; } -- cgit v1.2.3 From 13603685c1f12c67a7a2427f00b63f39a2b6f7c9 Mon Sep 17 00:00:00 2001 From: Max Lohrmann Date: Tue, 7 Mar 2017 22:09:56 -0800 Subject: target: Fix VERIFY_16 handling in sbc_parse_cdb As reported by Max, the Windows 2008 R2 chkdsk utility expects VERIFY_16 to be supported, and does not handle the returned CHECK_CONDITION properly, resulting in an infinite loop. The kernel will log huge amounts of this error: kernel: TARGET_CORE[iSCSI]: Unsupported SCSI Opcode 0x8f, sending CHECK_CONDITION. Signed-off-by: Max Lohrmann Cc: Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_sbc.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 68d8aef7ab78..c194063f169b 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -1105,9 +1105,15 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) return ret; break; case VERIFY: + case VERIFY_16: size = 0; - sectors = transport_get_sectors_10(cdb); - cmd->t_task_lba = transport_lba_32(cdb); + if (cdb[0] == VERIFY) { + sectors = transport_get_sectors_10(cdb); + cmd->t_task_lba = transport_lba_32(cdb); + } else { + sectors = transport_get_sectors_16(cdb); + cmd->t_task_lba = transport_lba_64(cdb); + } cmd->execute_cmd = sbc_emulate_noop; goto check_lba; case REZERO_UNIT: -- cgit v1.2.3 From 2e6c7747730296a6d4fd700894286db1132598c4 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 16 Feb 2017 12:39:01 +0000 Subject: MIPS: Force o32 fp64 support on 32bit MIPS64r6 kernels When a 32-bit kernel is configured to support MIPS64r6 (CPU_MIPS64_R6), MIPS_O32_FP64_SUPPORT won't be selected as it should be because MIPS32_O32 is disabled (o32 is already the default ABI available on 32-bit kernels). This results in userland FP breakage as CP0_Status.FR is read-only 1 since r6 (when an FPU is present) so __enable_fpu() will fail to clear FR. This causes the FPU emulator to get used which will incorrectly emulate 32-bit FPU registers. Force o32 fp64 support in this case by also selecting MIPS_O32_FP64_SUPPORT from CPU_MIPS64_R6 if 32BIT. Fixes: 4e9d324d4288 ("MIPS: Require O32 FP64 support for MIPS64 with O32 compat") Signed-off-by: James Hogan Reviewed-by: Paul Burton Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: # 4.0.x- Patchwork: https://patchwork.linux-mips.org/patch/15310/ Signed-off-by: James Hogan --- arch/mips/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index a008a9f03072..e0bb576410bb 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1531,7 +1531,7 @@ config CPU_MIPS64_R6 select CPU_SUPPORTS_HIGHMEM select CPU_SUPPORTS_MSA select GENERIC_CSUM - select MIPS_O32_FP64_SUPPORT if MIPS32_O32 + select MIPS_O32_FP64_SUPPORT if 32BIT || MIPS32_O32 select HAVE_KVM help Choose this option to build a kernel for release 6 or later of the -- cgit v1.2.3 From 4b5347a24a0f2d3272032c120664b484478455de Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 23 Feb 2017 14:50:24 +0000 Subject: MIPS: End spinlocks with .insn When building for microMIPS we need to ensure that the assembler always knows that there is code at the target of a branch or jump. Recent toolchains will fail to link a microMIPS kernel when this isn't the case due to what it thinks is a branch to non-microMIPS code. mips-mti-linux-gnu-ld kernel/built-in.o: .spinlock.text+0x2fc: Unsupported branch between ISA modes. mips-mti-linux-gnu-ld final link failed: Bad value This is due to inline assembly labels in spinlock.h not being followed by an instruction mnemonic, either due to a .subsection pseudo-op or the end of the inline asm block. Fix this with a .insn direction after such labels. Signed-off-by: Paul Burton Signed-off-by: James Hogan Reviewed-by: Maciej W. Rozycki Cc: Ralf Baechle Cc: Peter Zijlstra Cc: Ingo Molnar Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: Patchwork: https://patchwork.linux-mips.org/patch/15325/ Signed-off-by: James Hogan --- arch/mips/include/asm/spinlock.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h index f485afe51514..a8df44d60607 100644 --- a/arch/mips/include/asm/spinlock.h +++ b/arch/mips/include/asm/spinlock.h @@ -127,7 +127,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) " andi %[ticket], %[ticket], 0xffff \n" " bne %[ticket], %[my_ticket], 4f \n" " subu %[ticket], %[my_ticket], %[ticket] \n" - "2: \n" + "2: .insn \n" " .subsection 2 \n" "4: andi %[ticket], %[ticket], 0xffff \n" " sll %[ticket], 5 \n" @@ -202,7 +202,7 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) " sc %[ticket], %[ticket_ptr] \n" " beqz %[ticket], 1b \n" " li %[ticket], 1 \n" - "2: \n" + "2: .insn \n" " .subsection 2 \n" "3: b 2b \n" " li %[ticket], 0 \n" @@ -382,7 +382,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) " .set reorder \n" __WEAK_LLSC_MB " li %2, 1 \n" - "2: \n" + "2: .insn \n" : "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp), "=&r" (ret) : GCC_OFF_SMALL_ASM() (rw->lock) : "memory"); @@ -422,7 +422,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) " lui %1, 0x8000 \n" " sc %1, %0 \n" " li %2, 1 \n" - "2: \n" + "2: .insn \n" : "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp), "=&r" (ret) : GCC_OFF_SMALL_ASM() (rw->lock) -- cgit v1.2.3 From 7c5a3d813050ee235817b0220dd8c42359a9efd8 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Sat, 25 Feb 2017 11:54:23 +0100 Subject: MIPS: ralink: Fix typos in rt3883 pinctrl There are two copy & paste errors in the definition of the 5GHz LNA and second ethernet pinmux. Fixes: f576fb6a0700 ("MIPS: ralink: cleanup the soc specific pinmux data") Signed-off-by: John Crispin Signed-off-by: Daniel Golle Cc: linux-mips@linux-mips.org Cc: # 3.19.x- Patchwork: https://patchwork.linux-mips.org/patch/15328/ Signed-off-by: James Hogan --- arch/mips/ralink/rt3883.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/ralink/rt3883.c b/arch/mips/ralink/rt3883.c index c4ffd43d3996..48ce701557a4 100644 --- a/arch/mips/ralink/rt3883.c +++ b/arch/mips/ralink/rt3883.c @@ -35,7 +35,7 @@ static struct rt2880_pmx_func uartlite_func[] = { FUNC("uartlite", 0, 15, 2) }; static struct rt2880_pmx_func jtag_func[] = { FUNC("jtag", 0, 17, 5) }; static struct rt2880_pmx_func mdio_func[] = { FUNC("mdio", 0, 22, 2) }; static struct rt2880_pmx_func lna_a_func[] = { FUNC("lna a", 0, 32, 3) }; -static struct rt2880_pmx_func lna_g_func[] = { FUNC("lna a", 0, 35, 3) }; +static struct rt2880_pmx_func lna_g_func[] = { FUNC("lna g", 0, 35, 3) }; static struct rt2880_pmx_func pci_func[] = { FUNC("pci-dev", 0, 40, 32), FUNC("pci-host2", 1, 40, 32), @@ -43,7 +43,7 @@ static struct rt2880_pmx_func pci_func[] = { FUNC("pci-fnc", 3, 40, 32) }; static struct rt2880_pmx_func ge1_func[] = { FUNC("ge1", 0, 72, 12) }; -static struct rt2880_pmx_func ge2_func[] = { FUNC("ge1", 0, 84, 12) }; +static struct rt2880_pmx_func ge2_func[] = { FUNC("ge2", 0, 84, 12) }; static struct rt2880_pmx_group rt3883_pinmux_data[] = { GRP("i2c", i2c_func, 1, RT3883_GPIO_MODE_I2C), -- cgit v1.2.3 From 0c7e2bc87ea6c2eb6f369998f74a0278e64863e4 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Sat, 4 Mar 2017 00:32:03 +0000 Subject: MIPS: Include asm/ptrace.h now linux/sched.h doesn't Use of the task_pt_regs() based macros in MIPS' asm/processor.h for accessing the user context on the kernel stack need the definition of struct pt_regs from asm/ptrace.h. __own_fpu() in asm/fpu.h uses these macros but implicitly depended on linux/sched.h to include asm/ptrace.h. Since commit f780d89a0e82 ("sched/headers: Remove from ") however linux/sched.h no longer includes asm/ptrace.h, so include it explicitly from asm/fpu.h where it is needed instead. This fixes build errors such as: ./arch/mips/include/asm/fpu.h: In function '__own_fpu': ./arch/mips/include/asm/processor.h:385:31: error: invalid application of 'sizeof' to incomplete type 'struct pt_regs' THREAD_SIZE - 32 - sizeof(struct pt_regs)) ^ Fixes: f780d89a0e82 ("sched/headers: Remove from ") Signed-off-by: James Hogan Acked-by: Ingo Molnar Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/15386/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/fpu.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h index 321752bcbab6..1527efaf4af4 100644 --- a/arch/mips/include/asm/fpu.h +++ b/arch/mips/include/asm/fpu.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 9cb74b5e134c9f133001dd1585deef5353cd85f1 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Sat, 4 Mar 2017 00:41:25 +0000 Subject: MIPS: Wire up statx system call Wire up the statx system call for MIPS, which was introduced in commit a528d35e8bfc ("statx: Add a system call to make enhanced file info available"). Signed-off-by: James Hogan Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/15387/ Signed-off-by: Ralf Baechle --- arch/mips/include/uapi/asm/unistd.h | 15 +++++++++------ arch/mips/kernel/scall32-o32.S | 1 + arch/mips/kernel/scall64-64.S | 1 + arch/mips/kernel/scall64-n32.S | 1 + arch/mips/kernel/scall64-o32.S | 1 + 5 files changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/mips/include/uapi/asm/unistd.h b/arch/mips/include/uapi/asm/unistd.h index 3e940dbe0262..78faf4292e90 100644 --- a/arch/mips/include/uapi/asm/unistd.h +++ b/arch/mips/include/uapi/asm/unistd.h @@ -386,17 +386,18 @@ #define __NR_pkey_mprotect (__NR_Linux + 363) #define __NR_pkey_alloc (__NR_Linux + 364) #define __NR_pkey_free (__NR_Linux + 365) +#define __NR_statx (__NR_Linux + 366) /* * Offset of the last Linux o32 flavoured syscall */ -#define __NR_Linux_syscalls 365 +#define __NR_Linux_syscalls 366 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */ #define __NR_O32_Linux 4000 -#define __NR_O32_Linux_syscalls 365 +#define __NR_O32_Linux_syscalls 366 #if _MIPS_SIM == _MIPS_SIM_ABI64 @@ -730,16 +731,17 @@ #define __NR_pkey_mprotect (__NR_Linux + 323) #define __NR_pkey_alloc (__NR_Linux + 324) #define __NR_pkey_free (__NR_Linux + 325) +#define __NR_statx (__NR_Linux + 326) /* * Offset of the last Linux 64-bit flavoured syscall */ -#define __NR_Linux_syscalls 325 +#define __NR_Linux_syscalls 326 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */ #define __NR_64_Linux 5000 -#define __NR_64_Linux_syscalls 325 +#define __NR_64_Linux_syscalls 326 #if _MIPS_SIM == _MIPS_SIM_NABI32 @@ -1077,15 +1079,16 @@ #define __NR_pkey_mprotect (__NR_Linux + 327) #define __NR_pkey_alloc (__NR_Linux + 328) #define __NR_pkey_free (__NR_Linux + 329) +#define __NR_statx (__NR_Linux + 330) /* * Offset of the last N32 flavoured syscall */ -#define __NR_Linux_syscalls 329 +#define __NR_Linux_syscalls 330 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */ #define __NR_N32_Linux 6000 -#define __NR_N32_Linux_syscalls 329 +#define __NR_N32_Linux_syscalls 330 #endif /* _UAPI_ASM_UNISTD_H */ diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index c29d397eee86..80ed68b2c95e 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -600,3 +600,4 @@ EXPORT(sys_call_table) PTR sys_pkey_mprotect PTR sys_pkey_alloc PTR sys_pkey_free /* 4365 */ + PTR sys_statx diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index 0687f96ee912..49765b44aa9b 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -438,4 +438,5 @@ EXPORT(sys_call_table) PTR sys_pkey_mprotect PTR sys_pkey_alloc PTR sys_pkey_free /* 5325 */ + PTR sys_statx .size sys_call_table,.-sys_call_table diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 0331ba39a065..90bad2d1b2d3 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -433,4 +433,5 @@ EXPORT(sysn32_call_table) PTR sys_pkey_mprotect PTR sys_pkey_alloc PTR sys_pkey_free + PTR sys_statx /* 6330 */ .size sysn32_call_table,.-sysn32_call_table diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 5a47042dd25f..2dd70bd104e1 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -588,4 +588,5 @@ EXPORT(sys32_call_table) PTR sys_pkey_mprotect PTR sys_pkey_alloc PTR sys_pkey_free /* 4365 */ + PTR sys_statx .size sys32_call_table,.-sys32_call_table -- cgit v1.2.3 From 12aff99723901bcc0e2a6a34343a4f62c371fdd9 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 7 Feb 2017 17:14:14 -0200 Subject: ARM: dts: imx6sx-udoo-neo: Fix reboot hang After issuing a 'reboot' command the imx6sx-udoo-neo board does not reboot as expected and it just hangs instead. In mainline kernel only LDO enabled mode is supported. Do not provide arm-supply/soc-supply nodes in the device tree, so that the board operates in LDO enabled mode and can then successfully reboot via watchdog. Fixes: 76e691fc7653b85d39 ("ARM: dts: imx6sx: Add UDOO Neo support") Signed-off-by: Fabio Estevam Tested-by: Breno Lima Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6sx-udoo-neo.dtsi | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/arm/boot/dts/imx6sx-udoo-neo.dtsi b/arch/arm/boot/dts/imx6sx-udoo-neo.dtsi index 49f466fe0b1d..dcfc97591433 100644 --- a/arch/arm/boot/dts/imx6sx-udoo-neo.dtsi +++ b/arch/arm/boot/dts/imx6sx-udoo-neo.dtsi @@ -121,11 +121,6 @@ }; }; -&cpu0 { - arm-supply = <&sw1a_reg>; - soc-supply = <&sw1c_reg>; -}; - &fec1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet1>; -- cgit v1.2.3 From f1994a9c0930de4b2244816e62120cad08283cdc Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Wed, 8 Mar 2017 19:03:10 +0800 Subject: ASoC: rt5665: fix getting wrong work handler container We got rt5665 private data from wrong work. It will result in kernel panic. Signed-off-by: Bard Liao Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- sound/soc/codecs/rt5665.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt5665.c b/sound/soc/codecs/rt5665.c index 324461e985b3..fe2cf1ed8237 100644 --- a/sound/soc/codecs/rt5665.c +++ b/sound/soc/codecs/rt5665.c @@ -1241,7 +1241,7 @@ static irqreturn_t rt5665_irq(int irq, void *data) static void rt5665_jd_check_handler(struct work_struct *work) { struct rt5665_priv *rt5665 = container_of(work, struct rt5665_priv, - calibrate_work.work); + jd_check_work.work); if (snd_soc_read(rt5665->codec, RT5665_AJD1_CTRL) & 0x0010) { /* jack out */ -- cgit v1.2.3 From 593dd5d9fb66c0345cef5fc8caf4199bb6d4e093 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Wed, 8 Mar 2017 19:05:29 +0800 Subject: ASoC: rt5665: increase LDO level Too low LDO level will cause a few functions unstable. Signed-off-by: Bard Liao Signed-off-by: Mark Brown --- sound/soc/codecs/rt5665.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt5665.c b/sound/soc/codecs/rt5665.c index fe2cf1ed8237..a4c07c2ee414 100644 --- a/sound/soc/codecs/rt5665.c +++ b/sound/soc/codecs/rt5665.c @@ -4798,7 +4798,7 @@ static int rt5665_i2c_probe(struct i2c_client *i2c, /* Enhance performance*/ regmap_update_bits(rt5665->regmap, RT5665_PWR_ANLG_1, RT5665_HP_DRIVER_MASK | RT5665_LDO1_DVO_MASK, - RT5665_HP_DRIVER_5X | RT5665_LDO1_DVO_09); + RT5665_HP_DRIVER_5X | RT5665_LDO1_DVO_12); INIT_DELAYED_WORK(&rt5665->jack_detect_work, rt5665_jack_detect_handler); -- cgit v1.2.3 From 8f365313beb20742b68cb29a7d1ca6b27c036d81 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Wed, 8 Mar 2017 19:05:31 +0800 Subject: ASoC: rt5665: Vref3 is necessary for Mono Amp Vref3 is necessary for Mono Amp. So add it to dapm routes Signed-off-by: Bard Liao Signed-off-by: Mark Brown --- sound/soc/codecs/rt5665.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/rt5665.c b/sound/soc/codecs/rt5665.c index a4c07c2ee414..7a0244e5e321 100644 --- a/sound/soc/codecs/rt5665.c +++ b/sound/soc/codecs/rt5665.c @@ -3912,6 +3912,7 @@ static const struct snd_soc_dapm_route rt5665_dapm_routes[] = { {"Mono MIX", "MONOVOL Switch", "MONOVOL"}, {"Mono Amp", NULL, "Mono MIX"}, {"Mono Amp", NULL, "Vref2"}, + {"Mono Amp", NULL, "Vref3"}, {"Mono Amp", NULL, "CLKDET SYS"}, {"Mono Amp", NULL, "CLKDET MONO"}, {"Mono Playback", "Switch", "Mono Amp"}, -- cgit v1.2.3 From 09b50c3703cc354100fe36202ef5e52ee128b904 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Wed, 8 Mar 2017 19:05:32 +0800 Subject: ASoC: rt5665: CLKDET is also a power of ASRC We need to power on CLKDET to use ASRC function. Signed-off-by: Bard Liao Signed-off-by: Mark Brown --- sound/soc/codecs/rt5665.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/rt5665.c b/sound/soc/codecs/rt5665.c index 7a0244e5e321..61137160c116 100644 --- a/sound/soc/codecs/rt5665.c +++ b/sound/soc/codecs/rt5665.c @@ -3178,6 +3178,9 @@ static const struct snd_soc_dapm_route rt5665_dapm_routes[] = { {"DAC Mono Right Filter", NULL, "DAC Mono R ASRC", is_using_asrc}, {"DAC Stereo1 Filter", NULL, "DAC STO1 ASRC", is_using_asrc}, {"DAC Stereo2 Filter", NULL, "DAC STO2 ASRC", is_using_asrc}, + {"I2S1 ASRC", NULL, "CLKDET"}, + {"I2S2 ASRC", NULL, "CLKDET"}, + {"I2S3 ASRC", NULL, "CLKDET"}, /*Vref*/ {"Mic Det Power", NULL, "Vref2"}, -- cgit v1.2.3 From 7b4fdf77a450ec0fdcb2f677b080ddbf2c186544 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 3 Mar 2017 21:44:00 +0100 Subject: netfilter: don't track fragmented packets Andrey reports syzkaller splat caused by NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); in ipv4 nat. But this assertion (and the comment) are wrong, this function does see fragments when IP_NODEFRAG setsockopt is used. As conntrack doesn't track packets without complete l4 header, only the first fragment is tracked. Because applying nat to first packet but not the rest makes no sense this also turns off tracking of all fragments. Reported-by: Andrey Konovalov Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 4 ++++ net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | 5 ----- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index bc1486f2c064..2e14ed11a35c 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -165,6 +165,10 @@ static unsigned int ipv4_conntrack_local(void *priv, if (skb->len < sizeof(struct iphdr) || ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; + + if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */ + return NF_ACCEPT; + return nf_conntrack_in(state->net, PF_INET, state->hook, skb); } diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index f8aad03d674b..6f5e8d01b876 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -255,11 +255,6 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb, /* maniptype == SRC for postrouting. */ enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook); - /* We never see fragments: conntrack defrags on pre-routing - * and local-out, and nf_nat_out protects post-routing. - */ - NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); - ct = nf_ct_get(skb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would * have dropped it. Hence it's the user's responsibilty to -- cgit v1.2.3 From 8e05ba7f848475bdc3aa546cf88418f7e51a6671 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Sat, 4 Mar 2017 18:00:02 +0800 Subject: netfilter: nf_nat_sctp: fix ICMP packet to be dropped accidently Regarding RFC 792, the first 64 bits of the original SCTP datagram's data could be contained in ICMP packet, such as: 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Type | Code | Checksum | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | unused | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Internet Header + 64 bits of Original Data Datagram | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ However, according to RFC 4960, SCTP datagram header is as below: 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Source Port Number | Destination Port Number | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Verification Tag | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Checksum | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ It means only the first three fields of SCTP header can be carried in ICMP packet except for Checksum field. At present in sctp_manip_pkt(), no matter whether the packet is ICMP or not, it always calculates SCTP packet checksum. However, not only the calculation of checksum is unnecessary for ICMP, but also it causes another fatal issue that ICMP packet is dropped. The header size of SCTP is used to identify whether the writeable length of skb is bigger than skb->len through skb_make_writable() in sctp_manip_pkt(). But when it deals with ICMP packet, skb_make_writable() directly returns false as the writeable length of skb is bigger than skb->len. Subsequently ICMP is dropped. Now we correct this misbahavior. When sctp_manip_pkt() handles ICMP packet, 8 bytes rather than the whole SCTP header size is used to check if writeable length of skb is overflowed. Meanwhile, as it's meaningless to calculate checksum when packet is ICMP, the computation of checksum is ignored as well. Signed-off-by: Ying Xue Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_proto_sctp.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c index 31d358691af0..804e8a0ab36e 100644 --- a/net/netfilter/nf_nat_proto_sctp.c +++ b/net/netfilter/nf_nat_proto_sctp.c @@ -33,8 +33,16 @@ sctp_manip_pkt(struct sk_buff *skb, enum nf_nat_manip_type maniptype) { sctp_sctphdr_t *hdr; + int hdrsize = 8; - if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + /* This could be an inner header returned in imcp packet; in such + * cases we cannot update the checksum field since it is outside + * of the 8 bytes of transport layer headers we are guaranteed. + */ + if (skb->len >= hdroff + sizeof(*hdr)) + hdrsize = sizeof(*hdr); + + if (!skb_make_writable(skb, hdroff + hdrsize)) return false; hdr = (struct sctphdr *)(skb->data + hdroff); @@ -47,6 +55,9 @@ sctp_manip_pkt(struct sk_buff *skb, hdr->dest = tuple->dst.u.sctp.port; } + if (hdrsize < sizeof(*hdr)) + return true; + if (skb->ip_summed != CHECKSUM_PARTIAL) { hdr->checksum = sctp_compute_cksum(skb, hdroff); skb->ip_summed = CHECKSUM_NONE; -- cgit v1.2.3 From 568af6de058cb2b0c5b98d98ffcf37cdc6bc38a7 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 4 Mar 2017 19:53:47 +0100 Subject: netfilter: nf_tables: set pktinfo->thoff at AH header if found Phil Sutter reports that IPv6 AH header matching is broken. From userspace, nft generates bytecode that expects to find the AH header at NFT_PAYLOAD_TRANSPORT_HEADER both for IPv4 and IPv6. However, pktinfo->thoff is set to the inner header after the AH header in IPv6, while in IPv4 pktinfo->thoff points to the AH header indeed. This behaviour is inconsistent. This patch fixes this problem by updating ipv6_find_hdr() to get the IP6_FH_F_AUTH flag so this function stops at the AH header, so both IPv4 and IPv6 pktinfo->thoff point to the AH header. This is also inconsistent when trying to match encapsulated headers: 1) A packet that looks like IPv4 + AH + TCP dport 22 will *not* match. 2) A packet that looks like IPv6 + AH + TCP dport 22 will match. Reported-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_ipv6.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index d150b5066201..97983d1c05e4 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -9,12 +9,13 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, struct sk_buff *skb, const struct nf_hook_state *state) { + unsigned int flags = IP6_FH_F_AUTH; int protohdr, thoff = 0; unsigned short frag_off; nft_set_pktinfo(pkt, skb, state); - protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); + protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags); if (protohdr < 0) { nft_set_pktinfo_proto_unspec(pkt, skb); return; @@ -32,6 +33,7 @@ __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, const struct nf_hook_state *state) { #if IS_ENABLED(CONFIG_IPV6) + unsigned int flags = IP6_FH_F_AUTH; struct ipv6hdr *ip6h, _ip6h; unsigned int thoff = 0; unsigned short frag_off; @@ -50,7 +52,7 @@ __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, if (pkt_len + sizeof(*ip6h) > skb->len) return -1; - protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); + protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags); if (protohdr < 0) return -1; -- cgit v1.2.3 From 67b0503db9c29b04eadfeede6bebbfe5ddad94ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Br=C3=BCns?= Date: Sun, 12 Feb 2017 13:02:13 -0200 Subject: [media] dvb-usb-firmware: don't do DMA on stack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The buffer allocation for the firmware data was changed in commit 43fab9793c1f ("[media] dvb-usb: don't use stack for firmware load") but the same applies for the reset value. Fixes: 43fab9793c1f ("[media] dvb-usb: don't use stack for firmware load") Cc: stable@vger.kernel.org Signed-off-by: Stefan Brüns Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/dvb-usb-firmware.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/media/usb/dvb-usb/dvb-usb-firmware.c b/drivers/media/usb/dvb-usb/dvb-usb-firmware.c index ab9866024ec7..04033efe7ad5 100644 --- a/drivers/media/usb/dvb-usb/dvb-usb-firmware.c +++ b/drivers/media/usb/dvb-usb/dvb-usb-firmware.c @@ -36,16 +36,18 @@ static int usb_cypress_writemem(struct usb_device *udev,u16 addr,u8 *data, u8 le int usb_cypress_load_firmware(struct usb_device *udev, const struct firmware *fw, int type) { struct hexline *hx; - u8 reset; - int ret,pos=0; + u8 *buf; + int ret, pos = 0; + u16 cpu_cs_register = cypress[type].cpu_cs_register; - hx = kmalloc(sizeof(*hx), GFP_KERNEL); - if (!hx) + buf = kmalloc(sizeof(*hx), GFP_KERNEL); + if (!buf) return -ENOMEM; + hx = (struct hexline *)buf; /* stop the CPU */ - reset = 1; - if ((ret = usb_cypress_writemem(udev,cypress[type].cpu_cs_register,&reset,1)) != 1) + buf[0] = 1; + if (usb_cypress_writemem(udev, cpu_cs_register, buf, 1) != 1) err("could not stop the USB controller CPU."); while ((ret = dvb_usb_get_hexline(fw, hx, &pos)) > 0) { @@ -61,21 +63,21 @@ int usb_cypress_load_firmware(struct usb_device *udev, const struct firmware *fw } if (ret < 0) { err("firmware download failed at %d with %d",pos,ret); - kfree(hx); + kfree(buf); return ret; } if (ret == 0) { /* restart the CPU */ - reset = 0; - if (ret || usb_cypress_writemem(udev,cypress[type].cpu_cs_register,&reset,1) != 1) { + buf[0] = 0; + if (usb_cypress_writemem(udev, cpu_cs_register, buf, 1) != 1) { err("could not restart the USB controller CPU."); ret = -EINVAL; } } else ret = -EIO; - kfree(hx); + kfree(buf); return ret; } -- cgit v1.2.3 From e61555c29c28a4a3b6ba6207f4a0883ee236004d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Lefaure?= Date: Wed, 8 Mar 2017 20:18:09 -0500 Subject: EDAC, i5000, i5400: Fix use of MTR_DRAM_WIDTH macro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MTR_DRAM_WIDTH macro returns the data width. It is sometimes used as if it returned a boolean true if the width if 8. Fix the tests where MTR_DRAM_WIDTH is misused. Signed-off-by: Jérémy Lefaure Cc: linux-edac Link: http://lkml.kernel.org/r/20170309011809.8340-1-jeremy.lefaure@lse.epita.fr Signed-off-by: Borislav Petkov --- drivers/edac/i5000_edac.c | 2 +- drivers/edac/i5400_edac.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c index 1670d27bcac8..f683919981b0 100644 --- a/drivers/edac/i5000_edac.c +++ b/drivers/edac/i5000_edac.c @@ -1293,7 +1293,7 @@ static int i5000_init_csrows(struct mem_ctl_info *mci) dimm->mtype = MEM_FB_DDR2; /* ask what device type on this row */ - if (MTR_DRAM_WIDTH(mtr)) + if (MTR_DRAM_WIDTH(mtr) == 8) dimm->dtype = DEV_X8; else dimm->dtype = DEV_X4; diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c index abf6ef22e220..37a9ba71da44 100644 --- a/drivers/edac/i5400_edac.c +++ b/drivers/edac/i5400_edac.c @@ -1207,13 +1207,14 @@ static int i5400_init_dimms(struct mem_ctl_info *mci) dimm->nr_pages = size_mb << 8; dimm->grain = 8; - dimm->dtype = MTR_DRAM_WIDTH(mtr) ? DEV_X8 : DEV_X4; + dimm->dtype = MTR_DRAM_WIDTH(mtr) == 8 ? + DEV_X8 : DEV_X4; dimm->mtype = MEM_FB_DDR2; /* * The eccc mechanism is SDDC (aka SECC), with * is similar to Chipkill. */ - dimm->edac_mode = MTR_DRAM_WIDTH(mtr) ? + dimm->edac_mode = MTR_DRAM_WIDTH(mtr) == 8 ? EDAC_S8ECD8ED : EDAC_S4ECD4ED; ndimms++; } -- cgit v1.2.3 From 89cf83d4e065ff9fbd2ddc674489c8058eeca758 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 16 Feb 2017 12:54:41 +0000 Subject: drm/i915: Squelch any ktime/jiffie rounding errors for wait-ioctl We wait upon jiffies, but report the time elapsed using a high-resolution timer. This discrepancy can lead to us timing out the wait prior to us reporting the elapsed time as complete. This restores the squelching lost in commit e95433c73a11 ("drm/i915: Rearrange i915_wait_request() accounting with callers"). Fixes: e95433c73a11 ("drm/i915: Rearrange i915_wait_request() accounting with callers") Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Joonas Lahtinen Cc: # v4.10-rc1+ Cc: stable@vger.kernel.org Link: http://patchwork.freedesktop.org/patch/msgid/20170216125441.30923-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen (cherry picked from commit c1d2061b28c2aa25ec39b60d9c248e6beebd7315) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6908123162d1..c45af09555dc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3029,6 +3029,16 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start)); if (args->timeout_ns < 0) args->timeout_ns = 0; + + /* + * Apparently ktime isn't accurate enough and occasionally has a + * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch + * things up to make the test happy. We allow up to 1 jiffy. + * + * This is a regression from the timespec->ktime conversion. + */ + if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns)) + args->timeout_ns = 0; } i915_gem_object_put(obj); -- cgit v1.2.3 From 1d972d6021a1388021df51a58248e68372ce2b5d Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 23 Feb 2017 09:15:57 +0200 Subject: drm/i915/glk: Fix watermark computations for third sprite plane MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Geminilake has a third sprite plane (or fourth universal plane) that is independent from the cursor. Make sure that for_each_plane_id_on_crtc() is aware of that extra plane so that the watermark code takes it into account. Fixes: e9c9882556fc ("drm/i915/glk: Configure number of sprite planes properly") Cc: Ander Conselvan de Oliveira Cc: Rodrigo Vivi Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170223071600.14356-2-ander.conselvan.de.oliveira@intel.com (cherry picked from commit 19c3164db457e0fc65d4501fd354506228576241) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0a4b42d31391..7febe6eecf72 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -293,6 +293,7 @@ enum plane_id { PLANE_PRIMARY, PLANE_SPRITE0, PLANE_SPRITE1, + PLANE_SPRITE2, PLANE_CURSOR, I915_MAX_PLANES, }; -- cgit v1.2.3 From b717a0392530ae8da0da041abe5c3a6098b55660 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 24 Feb 2017 11:43:06 +0000 Subject: drm/i915/fbdev: Stop repeating tile configuration on stagnation If we cease making progress in finding matching outputs for a tiled configuration, stop looping over the remaining unconfigured outputs. v2: Use conn_seq (instead of pass) to only apply tile configuration on first pass. Fixes: b0ee9e7fa5b4 ("drm/fb: add support for tiled monitor configurations. (v2)") Signed-off-by: Chris Wilson Cc: Tomasz Lis Cc: Dave Airlie Cc: Daniel Vetter Cc: Jani Nikula Cc: Sean Paul Cc: # v3.19+ Reviewed-by: Tomasz Lis Link: http://patchwork.freedesktop.org/patch/msgid/20170224114306.4400-1-chris@chris-wilson.co.uk (cherry picked from commit 754a76591b12c88f57ad8b4ca533a5c9566a1922) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_fbdev.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 1b8ba2e77539..2d449fb5d1d2 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -357,14 +357,13 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, bool *enabled, int width, int height) { struct drm_i915_private *dev_priv = to_i915(fb_helper->dev); - unsigned long conn_configured, mask; + unsigned long conn_configured, conn_seq, mask; unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG); int i, j; bool *save_enabled; bool fallback = true; int num_connectors_enabled = 0; int num_connectors_detected = 0; - int pass = 0; save_enabled = kcalloc(count, sizeof(bool), GFP_KERNEL); if (!save_enabled) @@ -374,6 +373,7 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, mask = BIT(count) - 1; conn_configured = 0; retry: + conn_seq = conn_configured; for (i = 0; i < count; i++) { struct drm_fb_helper_connector *fb_conn; struct drm_connector *connector; @@ -387,7 +387,7 @@ retry: if (conn_configured & BIT(i)) continue; - if (pass == 0 && !connector->has_tile) + if (conn_seq == 0 && !connector->has_tile) continue; if (connector->status == connector_status_connected) @@ -498,10 +498,8 @@ retry: conn_configured |= BIT(i); } - if ((conn_configured & mask) != mask) { - pass++; + if ((conn_configured & mask) != mask && conn_configured != conn_seq) goto retry; - } /* * If the BIOS didn't enable everything it could, fall back to have the -- cgit v1.2.3 From 8c9923707f30ff56d9fd242053594b18f38d8036 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Feb 2017 12:26:54 +0000 Subject: drm/i915: Remove the vma from the drm_mm if binding fails As we track whether a vma has been inserted into the drm_mm using the vma->flags, if we fail to bind the vma into the GTT we do not update those bits and will attempt to reinsert the vma into the drm_mm on future passes. To prevent that, we want to unwind i915_vma_insert() if we fail in our attempt to bind. Fixes: 59bfa1248e22 ("drm/i915: Start passing around i915_vma from execbuffer") Testcase: igt/drv_selftest/live_gtt Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Joonas Lahtinen Cc: # v4.9+ Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170227122654.27651-3-chris@chris-wilson.co.uk (cherry picked from commit 31c7effa39f21f0fea1b3250ae9ff32b9c7e1ae5) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_vma.c | 57 ++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 155906e84812..df20e9bc1c0f 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -512,10 +512,36 @@ err_unpin: return ret; } +static void +i915_vma_remove(struct i915_vma *vma) +{ + struct drm_i915_gem_object *obj = vma->obj; + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); + + drm_mm_remove_node(&vma->node); + list_move_tail(&vma->vm_link, &vma->vm->unbound_list); + + /* Since the unbound list is global, only move to that list if + * no more VMAs exist. + */ + if (--obj->bind_count == 0) + list_move_tail(&obj->global_link, + &to_i915(obj->base.dev)->mm.unbound_list); + + /* And finally now the object is completely decoupled from this vma, + * we can drop its hold on the backing storage and allow it to be + * reaped by the shrinker. + */ + i915_gem_object_unpin_pages(obj); + GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count); +} + int __i915_vma_do_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { - unsigned int bound = vma->flags; + const unsigned int bound = vma->flags; int ret; lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); @@ -524,18 +550,18 @@ int __i915_vma_do_pin(struct i915_vma *vma, if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) { ret = -EBUSY; - goto err; + goto err_unpin; } if ((bound & I915_VMA_BIND_MASK) == 0) { ret = i915_vma_insert(vma, size, alignment, flags); if (ret) - goto err; + goto err_unpin; } ret = i915_vma_bind(vma, vma->obj->cache_level, flags); if (ret) - goto err; + goto err_remove; if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND) __i915_vma_set_map_and_fenceable(vma); @@ -544,7 +570,12 @@ int __i915_vma_do_pin(struct i915_vma *vma, GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); return 0; -err: +err_remove: + if ((bound & I915_VMA_BIND_MASK) == 0) { + GEM_BUG_ON(vma->pages); + i915_vma_remove(vma); + } +err_unpin: __i915_vma_unpin(vma); return ret; } @@ -657,9 +688,6 @@ int i915_vma_unbind(struct i915_vma *vma) } vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); - drm_mm_remove_node(&vma->node); - list_move_tail(&vma->vm_link, &vma->vm->unbound_list); - if (vma->pages != obj->mm.pages) { GEM_BUG_ON(!vma->pages); sg_free_table(vma->pages); @@ -667,18 +695,7 @@ int i915_vma_unbind(struct i915_vma *vma) } vma->pages = NULL; - /* Since the unbound list is global, only move to that list if - * no more VMAs exist. */ - if (--obj->bind_count == 0) - list_move_tail(&obj->global_link, - &to_i915(obj->base.dev)->mm.unbound_list); - - /* And finally now the object is completely decoupled from this vma, - * we can drop its hold on the backing storage and allow it to be - * reaped by the shrinker. - */ - i915_gem_object_unpin_pages(obj); - GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count); + i915_vma_remove(vma); destroy: if (unlikely(i915_vma_is_closed(vma))) -- cgit v1.2.3 From 34dc8993eef63681b062871413a9484008a2a78f Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Wed, 15 Feb 2017 15:52:59 +0200 Subject: drm/i915: Avoid tweaking evaluation thresholds on Baytrail v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Certain Baytrails, namely the 4 cpu core variants, have been plaqued by spurious system hangs, mostly occurring with light loads. Multiple bisects by various people point to a commit which changes the reclocking strategy for Baytrail to follow its bigger brethen: commit 8fb55197e64d ("drm/i915: Agressive downclocking on Baytrail") There is also a review comment attached to this commit from Deepak S on avoiding punit access on Cherryview and thus it was excluded on common reclocking path. By taking the same approach and omitting the punit access by not tweaking the thresholds when the hardware has been asked to move into different frequency, considerable gains in stability have been observed. With J1900 box, light render/video load would end up in system hang in usually less than 12 hours. With this patch applied, the cumulative uptime has now been 34 days without issues. To provoke system hang, light loads on both render and bsd engines in parallel have been used: glxgears >/dev/null 2>/dev/null & mpv --vo=vaapi --hwdec=vaapi --loop=inf vid.mp4 So far, author has not witnessed system hang with above load and this patch applied. Reports from the tenacious people at kernel bugzilla are also promising. Considering that the punit access frequency with this patch is considerably less, there is a possibility that this will push the, still unknown, root cause past the triggering point on most loads. But as we now can reliably reproduce the hang independently, we can reduce the pain that users are having and use a static thresholds until a root cause is found. v3: don't break debugfs and simplification (Chris Wilson) References: https://bugzilla.kernel.org/show_bug.cgi?id=109051 Cc: Chris Wilson Cc: Ville Syrjälä Cc: Len Brown Cc: Daniel Vetter Cc: Jani Nikula Cc: fritsch@xbmc.org Cc: miku@iki.fi Cc: Ezequiel Garcia CC: Michal Feix Cc: Hans de Goede Cc: Deepak S Cc: Jarkko Nikula Cc: # v4.2+ Acked-by: Daniel Vetter Acked-by: Chris Wilson Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1487166779-26945-1-git-send-email-mika.kuoppala@intel.com (cherry picked from commit 6067a27d1f0184596d51decbac1c1fdc4acb012f) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_pm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 249623d45be0..65cd4c56c9dd 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4891,6 +4891,12 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) break; } + /* When byt can survive without system hang with dynamic + * sw freq adjustments, this restriction can be lifted. + */ + if (IS_VALLEYVIEW(dev_priv)) + goto skip_hw_write; + I915_WRITE(GEN6_RP_UP_EI, GT_INTERVAL_FROM_US(dev_priv, ei_up)); I915_WRITE(GEN6_RP_UP_THRESHOLD, @@ -4911,6 +4917,7 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) GEN6_RP_UP_BUSY_AVG | GEN6_RP_DOWN_IDLE_AVG); +skip_hw_write: dev_priv->rps.power = new_power; dev_priv->rps.up_threshold = threshold_up; dev_priv->rps.down_threshold = threshold_down; -- cgit v1.2.3 From d253371c4c2f5fc2d884ef25f64decd7549aff5a Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 24 Feb 2017 16:32:10 +0200 Subject: drm/i915/gen9: Increase PCODE request timeout to 50ms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit 2c7d0602c815277f7cb7c932b091288710d8aba7 Author: Imre Deak Date: Mon Dec 5 18:27:37 2016 +0200 drm/i915/gen9: Fix PCODE polling during CDCLK change notification there is still one report of the CDCLK-change request timing out on a KBL machine, see the Reference link. On that machine the maximum time the request took to succeed was 34ms, so increase the timeout to 50ms. v2: - Change timeout from 100 to 50 ms to maintain the current 50 ms limit for atomic waits in the driver. (Chris, Tvrtko) Reference: https://bugs.freedesktop.org/show_bug.cgi?id=99345 Cc: Ville Syrjälä Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Signed-off-by: Imre Deak Acked-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1487946730-17162-1-git-send-email-imre.deak@intel.com (cherry picked from commit 0129936ddda26afd5d9d207c4e86b2425952579f) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_pm.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 65cd4c56c9dd..940bab22d464 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7923,10 +7923,10 @@ static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox, * @timeout_base_ms: timeout for polling with preemption enabled * * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE - * reports an error or an overall timeout of @timeout_base_ms+10 ms expires. + * reports an error or an overall timeout of @timeout_base_ms+50 ms expires. * The request is acknowledged once the PCODE reply dword equals @reply after * applying @reply_mask. Polling is first attempted with preemption enabled - * for @timeout_base_ms and if this times out for another 10 ms with + * for @timeout_base_ms and if this times out for another 50 ms with * preemption disabled. * * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some @@ -7962,14 +7962,15 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, * worst case) _and_ PCODE was busy for some reason even after a * (queued) request and @timeout_base_ms delay. As a workaround retry * the poll with preemption disabled to maximize the number of - * requests. Increase the timeout from @timeout_base_ms to 10ms to + * requests. Increase the timeout from @timeout_base_ms to 50ms to * account for interrupts that could reduce the number of these - * requests. + * requests, and for any quirks of the PCODE firmware that delays + * the request completion. */ DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n"); WARN_ON_ONCE(timeout_base_ms > 3); preempt_disable(); - ret = wait_for_atomic(COND, 10); + ret = wait_for_atomic(COND, 50); preempt_enable(); out: -- cgit v1.2.3 From 38230243ef316ac696956d75dc78a22e3aa789b9 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 28 Feb 2017 15:28:47 +0100 Subject: drm/i915: Move updating color management to before vblank evasion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This cannot be done reliably during vblank evasasion since the color management registers are not double buffered. The original commit that moved it always during vblank evasion was wrong, so revert it to before vblank evasion again. Signed-off-by: Maarten Lankhorst Fixes: 20a34e78f0d7 ("drm/i915: Update color management during vblank evasion.") Cc: stable@vger.kernel.org # v4.7+ Link: http://patchwork.freedesktop.org/patch/msgid/1488292128-14540-1-git-send-email-maarten.lankhorst@linux.intel.com Reviewed-by: Ville Syrjälä (cherry picked from commit 567f0792a6ad11c0c2620944b8eeb777359fb85a) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 01341670738f..9a8b6a13233d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14946,17 +14946,19 @@ static void intel_begin_crtc_commit(struct drm_crtc *crtc, to_intel_atomic_state(old_crtc_state->state); bool modeset = needs_modeset(crtc->state); + if (!modeset && + (intel_cstate->base.color_mgmt_changed || + intel_cstate->update_pipe)) { + intel_color_set_csc(crtc->state); + intel_color_load_luts(crtc->state); + } + /* Perform vblank evasion around commit operation */ intel_pipe_update_start(intel_crtc); if (modeset) goto out; - if (crtc->state->color_mgmt_changed || to_intel_crtc_state(crtc->state)->update_pipe) { - intel_color_set_csc(crtc->state); - intel_color_load_luts(crtc->state); - } - if (intel_cstate->update_pipe) intel_update_pipe_config(intel_crtc, old_intel_cstate); else if (INTEL_GEN(dev_priv) >= 9) -- cgit v1.2.3 From 0d9dc306e15b59bf50db87ebcb1e2248586d4733 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 Mar 2017 13:20:31 +0000 Subject: drm/i915: Store a permanent error in obj->mm.pages Once the object has been truncated, it is unrecoverable. To facilitate detection of this state store the error in obj->mm.pages. This is required for the next patch which should be applied to v4.10 (via stable), so we also need to mark this patch for backporting. In that regard, let's consider this to be a fix/improvement too. v2: Avoid dereferencing the ERR_PTR when freeing the object. Fixes: 1233e2db199d ("drm/i915: Move object backing storage manipulation to its own locking") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: # v4.10+ Link: http://patchwork.freedesktop.org/patch/msgid/20170307132031.32461-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen (cherry picked from commit 4e5462ee843c883790e9609cf560d88960ea4227) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c45af09555dc..3591e8656ff9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2119,6 +2119,7 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj) */ shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); obj->mm.madv = __I915_MADV_PURGED; + obj->mm.pages = ERR_PTR(-EFAULT); } /* Try to discard unwanted pages */ @@ -2218,7 +2219,9 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, __i915_gem_object_reset_page_iter(obj); - obj->ops->put_pages(obj, pages); + if (!IS_ERR(pages)) + obj->ops->put_pages(obj, pages); + unlock: mutex_unlock(&obj->mm.lock); } @@ -2437,7 +2440,7 @@ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) if (err) return err; - if (unlikely(!obj->mm.pages)) { + if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) { err = ____i915_gem_object_get_pages(obj); if (err) goto unlock; @@ -2515,7 +2518,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, pinned = true; if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) { - if (unlikely(!obj->mm.pages)) { + if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) { ret = ____i915_gem_object_get_pages(obj); if (ret) goto err_unlock; -- cgit v1.2.3 From 4e6fdafa7ac395ad47a80a0e7b4fd1e11550f862 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 Mar 2017 12:03:38 +0000 Subject: drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl Before we instantiate/pin the backing store for our use, we can prepopulate the shmemfs filp efficiently using a write into the pagecache. We avoid the penalty of instantiating all the pages, important if the user is just writing to a few and never uses the object on the GPU, and using a direct write into shmemfs allows it to avoid the cost of retrieving a page (mostly the clear-before-use, but in theory we could curtail swapin) before it is overwritten. This can be extended later to provide additional specialisation for other backends (other than shmemfs). For now it provides a defense against very large write-only allocations from exhausting all of system memory. v2: Smelling fixes. Fixes: fe115628d567 ("drm/i915: Implement pwrite without struct-mutex") References: https://bugs.freedesktop.org/show_bug.cgi?id=99107 Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Joonas Lahtinen Cc: Mika Kuoppala Cc: # v4.10+ Reviewed-by: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170307120338.7277-2-chris@chris-wilson.co.uk (cherry picked from commit 7c55e2c5772dcf3cbacd0fa2bcfeefae416b73f7) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem.c | 78 ++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_object.h | 3 ++ 2 files changed, 81 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3591e8656ff9..10777da73039 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1434,6 +1434,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, trace_i915_gem_object_pwrite(obj, args->offset, args->size); + ret = -ENODEV; + if (obj->ops->pwrite) + ret = obj->ops->pwrite(obj, args); + if (ret != -ENODEV) + goto err; + ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL, @@ -2566,6 +2572,75 @@ err_unlock: goto out_unlock; } +static int +i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pwrite *arg) +{ + struct address_space *mapping = obj->base.filp->f_mapping; + char __user *user_data = u64_to_user_ptr(arg->data_ptr); + u64 remain, offset; + unsigned int pg; + + /* Before we instantiate/pin the backing store for our use, we + * can prepopulate the shmemfs filp efficiently using a write into + * the pagecache. We avoid the penalty of instantiating all the + * pages, important if the user is just writing to a few and never + * uses the object on the GPU, and using a direct write into shmemfs + * allows it to avoid the cost of retrieving a page (either swapin + * or clearing-before-use) before it is overwritten. + */ + if (READ_ONCE(obj->mm.pages)) + return -ENODEV; + + /* Before the pages are instantiated the object is treated as being + * in the CPU domain. The pages will be clflushed as required before + * use, and we can freely write into the pages directly. If userspace + * races pwrite with any other operation; corruption will ensue - + * that is userspace's prerogative! + */ + + remain = arg->size; + offset = arg->offset; + pg = offset_in_page(offset); + + do { + unsigned int len, unwritten; + struct page *page; + void *data, *vaddr; + int err; + + len = PAGE_SIZE - pg; + if (len > remain) + len = remain; + + err = pagecache_write_begin(obj->base.filp, mapping, + offset, len, 0, + &page, &data); + if (err < 0) + return err; + + vaddr = kmap(page); + unwritten = copy_from_user(vaddr + pg, user_data, len); + kunmap(page); + + err = pagecache_write_end(obj->base.filp, mapping, + offset, len, len - unwritten, + page, data); + if (err < 0) + return err; + + if (unwritten) + return -EFAULT; + + remain -= len; + user_data += len; + offset += len; + pg = 0; + } while (remain); + + return 0; +} + static bool ban_context(const struct i915_gem_context *ctx) { return (i915_gem_context_is_bannable(ctx) && @@ -3987,8 +4062,11 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, static const struct drm_i915_gem_object_ops i915_gem_object_ops = { .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = i915_gem_object_get_pages_gtt, .put_pages = i915_gem_object_put_pages_gtt, + + .pwrite = i915_gem_object_pwrite_gtt, }; struct drm_i915_gem_object * diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index bf90b07163d1..76b80a0be797 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -54,6 +54,9 @@ struct drm_i915_gem_object_ops { struct sg_table *(*get_pages)(struct drm_i915_gem_object *); void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *); + int (*pwrite)(struct drm_i915_gem_object *, + const struct drm_i915_gem_pwrite *); + int (*dmabuf_export)(struct drm_i915_gem_object *); void (*release)(struct drm_i915_gem_object *); }; -- cgit v1.2.3 From edd06b8353772dca7afcd4640dafa83b521edd55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 7 Mar 2017 22:54:19 +0200 Subject: drm/i915: Nuke debug messages from the pipe update critical section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit printks are slow so we should not be doing them from the vblank evade critical section. These could explain why we sometimes seem to blow past our 100 usec deadline. The problem has been there ever since commit bfd16b2a23dc ("drm/i915: Make updating pipe without modeset atomic.") but it may not have been readily visible until commit e1edbd44e23b ("drm/i915: Complain if we take too long under vblank evasion.") increased our chances of noticing it. Cc: stable@vger.kernel.org Cc: Maarten Lankhorst Fixes: bfd16b2a23dc ("drm/i915: Make updating pipe without modeset atomic.") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170307205419.19447-1-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst (cherry picked from commit c3f8ad57a01a31397e5a0349a226a32f35ddc19c) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 9a8b6a13233d..b3e0cd133b49 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3669,10 +3669,6 @@ static void intel_update_pipe_config(struct intel_crtc *crtc, /* drm_atomic_helper_update_legacy_modeset_state might not be called. */ crtc->base.mode = crtc->base.state->mode; - DRM_DEBUG_KMS("Updating pipe size %ix%i -> %ix%i\n", - old_crtc_state->pipe_src_w, old_crtc_state->pipe_src_h, - pipe_config->pipe_src_w, pipe_config->pipe_src_h); - /* * Update pipe size and adjust fitter if needed: the reason for this is * that in compute_mode_changes we check the native mode (not the pfit @@ -4796,23 +4792,17 @@ static void skylake_pfit_enable(struct intel_crtc *crtc) struct intel_crtc_scaler_state *scaler_state = &crtc->config->scaler_state; - DRM_DEBUG_KMS("for crtc_state = %p\n", crtc->config); - if (crtc->config->pch_pfit.enabled) { int id; - if (WARN_ON(crtc->config->scaler_state.scaler_id < 0)) { - DRM_ERROR("Requesting pfit without getting a scaler first\n"); + if (WARN_ON(crtc->config->scaler_state.scaler_id < 0)) return; - } id = scaler_state->scaler_id; I915_WRITE(SKL_PS_CTRL(pipe, id), PS_SCALER_EN | PS_FILTER_MEDIUM | scaler_state->scalers[id].mode); I915_WRITE(SKL_PS_WIN_POS(pipe, id), crtc->config->pch_pfit.pos); I915_WRITE(SKL_PS_WIN_SZ(pipe, id), crtc->config->pch_pfit.size); - - DRM_DEBUG_KMS("for crtc_state = %p scaler_id = %d\n", crtc->config, id); } } -- cgit v1.2.3 From 5a8cf90d743f2d05433c6109f6c1b9b904b0cdb7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Feb 2017 20:47:41 +0000 Subject: drm/i915: Drain the freed state from the tail of the next commit If we have any residual freed atomic state from earlier commits, flush the freed list after performing the current modeset. This prevents the freed list from ever-growing if userspace manages to starve the kernel threads (i.e. we are never able to run our free state worker and eventually the system may even oom). Fixes: 6f0f02dc56f1 ("drm/i915: Move atomic state free from out of fence release") Testcase: igt/kms_cursor/legacy/all-pipes-single-bo Reported-by: Maarten Lankhorst Signed-off-by: Chris Wilson Cc: Maarten Lankhorst Cc: Joonas Lahtinen Cc: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170202204741.18231-1-chris@chris-wilson.co.uk Reviewed-by: Maarten Lankhorst (cherry picked from commit ba318c61a9719577b6f451c055f364e4116874b2) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b3e0cd133b49..3282b0f4b134 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14369,6 +14369,24 @@ static void skl_update_crtcs(struct drm_atomic_state *state, } while (progress); } +static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv) +{ + struct intel_atomic_state *state, *next; + struct llist_node *freed; + + freed = llist_del_all(&dev_priv->atomic_helper.free_list); + llist_for_each_entry_safe(state, next, freed, freed) + drm_atomic_state_put(&state->base); +} + +static void intel_atomic_helper_free_state_worker(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), atomic_helper.free_work); + + intel_atomic_helper_free_state(dev_priv); +} + static void intel_atomic_commit_tail(struct drm_atomic_state *state) { struct drm_device *dev = state->dev; @@ -14535,6 +14553,8 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) * can happen also when the device is completely off. */ intel_uncore_arm_unclaimed_mmio_detection(dev_priv); + + intel_atomic_helper_free_state(dev_priv); } static void intel_atomic_commit_work(struct work_struct *work) @@ -16591,18 +16611,6 @@ fail: drm_modeset_acquire_fini(&ctx); } -static void intel_atomic_helper_free_state(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), atomic_helper.free_work); - struct intel_atomic_state *state, *next; - struct llist_node *freed; - - freed = llist_del_all(&dev_priv->atomic_helper.free_list); - llist_for_each_entry_safe(state, next, freed, freed) - drm_atomic_state_put(&state->base); -} - int intel_modeset_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -16623,7 +16631,7 @@ int intel_modeset_init(struct drm_device *dev) dev->mode_config.funcs = &intel_mode_funcs; INIT_WORK(&dev_priv->atomic_helper.free_work, - intel_atomic_helper_free_state); + intel_atomic_helper_free_state_worker); intel_init_quirks(dev); -- cgit v1.2.3 From 9200c6f177638909dbbaded8aeeeccbd48744400 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Thu, 9 Feb 2017 00:30:22 +0100 Subject: Revert "phy: Add USB3 PHY support for Broadcom NSP SoC" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit d7bc1a7d41bf ("phy: Add USB3 PHY support for Broadcom NSP SoC") as we already have driver for this PHY (shared by NS and NSP). It was added in commit e5666281d9ea ("phy: bcm-ns-usb3: new driver for USB 3.0 PHY on Northstar"). Instead of adding separated driver & duplicating code we should work on improving existing (old) one. Thanks to work done by Broadcom we know there is MDIO bus we weren't aware of & we know register names which makes initialization more clear. This is very valuable info and we should work on using it in existing driver afterwards. Acked-by: Jon Mason Signed-off-by: Rafał Miłecki --- drivers/phy/Kconfig | 8 -- drivers/phy/Makefile | 1 - drivers/phy/phy-bcm-nsp-usb3.c | 177 ----------------------------------------- 3 files changed, 186 deletions(-) delete mode 100644 drivers/phy/phy-bcm-nsp-usb3.c diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig index dc5277ad1b5a..c11044439fd4 100644 --- a/drivers/phy/Kconfig +++ b/drivers/phy/Kconfig @@ -510,12 +510,4 @@ config PHY_MESON8B_USB2 and GXBB SoCs. If unsure, say N. -config PHY_NSP_USB3 - tristate "Broadcom NorthStar plus USB3 PHY driver" - depends on OF && (ARCH_BCM_NSP || COMPILE_TEST) - select GENERIC_PHY - default ARCH_BCM_NSP - help - Enable this to support the Broadcom Northstar plus USB3 PHY. - If unsure, say N. endmenu diff --git a/drivers/phy/Makefile b/drivers/phy/Makefile index e7b0feb1e125..dd8f3b5d2918 100644 --- a/drivers/phy/Makefile +++ b/drivers/phy/Makefile @@ -62,4 +62,3 @@ obj-$(CONFIG_PHY_CYGNUS_PCIE) += phy-bcm-cygnus-pcie.o obj-$(CONFIG_ARCH_TEGRA) += tegra/ obj-$(CONFIG_PHY_NS2_PCIE) += phy-bcm-ns2-pcie.o obj-$(CONFIG_PHY_MESON8B_USB2) += phy-meson8b-usb2.o -obj-$(CONFIG_PHY_NSP_USB3) += phy-bcm-nsp-usb3.o diff --git a/drivers/phy/phy-bcm-nsp-usb3.c b/drivers/phy/phy-bcm-nsp-usb3.c deleted file mode 100644 index 49024eaa5545..000000000000 --- a/drivers/phy/phy-bcm-nsp-usb3.c +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright (C) 2016 Broadcom - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed "as is" WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define NSP_USB3_RST_CTRL_OFFSET 0x3f8 - -/* mdio reg access */ -#define NSP_USB3_PHY_BASE_ADDR_REG 0x1f - -#define NSP_USB3_PHY_PLL30_BLOCK 0x8000 -#define NSP_USB3_PLL_CONTROL 0x01 -#define NSP_USB3_PLLA_CONTROL0 0x0a -#define NSP_USB3_PLLA_CONTROL1 0x0b - -#define NSP_USB3_PHY_TX_PMD_BLOCK 0x8040 -#define NSP_USB3_TX_PMD_CONTROL1 0x01 - -#define NSP_USB3_PHY_PIPE_BLOCK 0x8060 -#define NSP_USB3_LFPS_CMP 0x02 -#define NSP_USB3_LFPS_DEGLITCH 0x03 - -struct nsp_usb3_phy { - struct regmap *usb3_ctrl; - struct phy *phy; - struct mdio_device *mdiodev; -}; - -static int nsp_usb3_phy_init(struct phy *phy) -{ - struct nsp_usb3_phy *iphy = phy_get_drvdata(phy); - struct mii_bus *bus = iphy->mdiodev->bus; - int addr = iphy->mdiodev->addr; - u32 data; - int rc; - - rc = regmap_read(iphy->usb3_ctrl, 0, &data); - if (rc) - return rc; - data |= 1; - rc = regmap_write(iphy->usb3_ctrl, 0, data); - if (rc) - return rc; - - rc = regmap_write(iphy->usb3_ctrl, NSP_USB3_RST_CTRL_OFFSET, 1); - if (rc) - return rc; - - rc = mdiobus_write(bus, addr, NSP_USB3_PHY_BASE_ADDR_REG, - NSP_USB3_PHY_PLL30_BLOCK); - if (rc) - return rc; - - rc = mdiobus_write(bus, addr, NSP_USB3_PLL_CONTROL, 0x1000); - if (rc) - return rc; - - rc = mdiobus_write(bus, addr, NSP_USB3_PLLA_CONTROL0, 0x6400); - if (rc) - return rc; - - rc = mdiobus_write(bus, addr, NSP_USB3_PLLA_CONTROL1, 0xc000); - if (rc) - return rc; - - rc = mdiobus_write(bus, addr, NSP_USB3_PLLA_CONTROL1, 0x8000); - if (rc) - return rc; - - rc = regmap_write(iphy->usb3_ctrl, NSP_USB3_RST_CTRL_OFFSET, 0); - if (rc) - return rc; - - rc = mdiobus_write(bus, addr, NSP_USB3_PLL_CONTROL, 0x9000); - if (rc) - return rc; - - rc = mdiobus_write(bus, addr, NSP_USB3_PHY_BASE_ADDR_REG, - NSP_USB3_PHY_PIPE_BLOCK); - if (rc) - return rc; - - rc = mdiobus_write(bus, addr, NSP_USB3_LFPS_CMP, 0xf30d); - if (rc) - return rc; - - rc = mdiobus_write(bus, addr, NSP_USB3_LFPS_DEGLITCH, 0x6302); - if (rc) - return rc; - - rc = mdiobus_write(bus, addr, NSP_USB3_PHY_BASE_ADDR_REG, - NSP_USB3_PHY_TX_PMD_BLOCK); - if (rc) - return rc; - - rc = mdiobus_write(bus, addr, NSP_USB3_TX_PMD_CONTROL1, 0x1003); - - return rc; -} - -static struct phy_ops nsp_usb3_phy_ops = { - .init = nsp_usb3_phy_init, - .owner = THIS_MODULE, -}; - -static int nsp_usb3_phy_probe(struct mdio_device *mdiodev) -{ - struct device *dev = &mdiodev->dev; - struct phy_provider *provider; - struct nsp_usb3_phy *iphy; - - iphy = devm_kzalloc(dev, sizeof(*iphy), GFP_KERNEL); - if (!iphy) - return -ENOMEM; - iphy->mdiodev = mdiodev; - - iphy->usb3_ctrl = syscon_regmap_lookup_by_phandle(dev->of_node, - "usb3-ctrl-syscon"); - if (IS_ERR(iphy->usb3_ctrl)) - return PTR_ERR(iphy->usb3_ctrl); - - iphy->phy = devm_phy_create(dev, dev->of_node, &nsp_usb3_phy_ops); - if (IS_ERR(iphy->phy)) { - dev_err(dev, "failed to create PHY\n"); - return PTR_ERR(iphy->phy); - } - - phy_set_drvdata(iphy->phy, iphy); - - provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); - if (IS_ERR(provider)) { - dev_err(dev, "could not register PHY provider\n"); - return PTR_ERR(provider); - } - - return 0; -} - -static const struct of_device_id nsp_usb3_phy_of_match[] = { - {.compatible = "brcm,nsp-usb3-phy",}, - { /* sentinel */ } -}; - -static struct mdio_driver nsp_usb3_phy_driver = { - .mdiodrv = { - .driver = { - .name = "nsp-usb3-phy", - .of_match_table = nsp_usb3_phy_of_match, - }, - }, - .probe = nsp_usb3_phy_probe, -}; - -mdio_module_driver(nsp_usb3_phy_driver); - -MODULE_DESCRIPTION("Broadcom NSP USB3 PHY driver"); -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Yendapally Reddy Dhananjaya Reddy Date: Thu, 9 Feb 2017 00:30:23 +0100 Subject: Revert "dt-bindings: phy: Add documentation for NSP USB3 PHY" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit c8ca631f9480 ("dt-bindings: phy: Add documentation for NSP USB3 PHY") to match reverting commit adding the new PHY driver. Please note we revert this commit before it reached stable release. If new compatible string is needed it should be added to the existing bcm-ns-usb3-phy.txt which already describes this PHY. Acked-by: Jon Mason Acked-by: Rob Herring Signed-off-by: Rafał Miłecki --- .../devicetree/bindings/phy/brcm,nsp-usb3-phy.txt | 39 ---------------------- 1 file changed, 39 deletions(-) delete mode 100644 Documentation/devicetree/bindings/phy/brcm,nsp-usb3-phy.txt diff --git a/Documentation/devicetree/bindings/phy/brcm,nsp-usb3-phy.txt b/Documentation/devicetree/bindings/phy/brcm,nsp-usb3-phy.txt deleted file mode 100644 index e68ae5dec9c9..000000000000 --- a/Documentation/devicetree/bindings/phy/brcm,nsp-usb3-phy.txt +++ /dev/null @@ -1,39 +0,0 @@ -Broadcom USB3 phy binding for northstar plus SoC -The USB3 phy is internal to the SoC and is accessed using mdio interface. - -Required mdio bus properties: -- reg: Should be 0x0 for SoC internal USB3 phy -- #address-cells: must be 1 -- #size-cells: must be 0 - -Required USB3 PHY properties: -- compatible: should be "brcm,nsp-usb3-phy" -- reg: USB3 Phy address on SoC internal MDIO bus and it should be 0x10. -- usb3-ctrl-syscon: handler of syscon node defining physical address - of usb3 control register. -- #phy-cells: must be 0 - -Required usb3 control properties: -- compatible: should be "brcm,nsp-usb3-ctrl" -- reg: offset and length of the control registers - -Example: - - mdio@0 { - reg = <0x0>; - #address-cells = <1>; - #size-cells = <0>; - - usb3_phy: usb-phy@10 { - compatible = "brcm,nsp-usb3-phy"; - reg = <0x10>; - usb3-ctrl-syscon = <&usb3_ctrl>; - #phy-cells = <0>; - status = "disabled"; - }; - }; - - usb3_ctrl: syscon@104408 { - compatible = "brcm,nsp-usb3-ctrl", "syscon"; - reg = <0x104408 0x3fc>; - }; -- cgit v1.2.3 From 11d94e026b962eee6e1fbc4237f2cbfbec839067 Mon Sep 17 00:00:00 2001 From: Jaehoon Chung Date: Wed, 8 Mar 2017 17:22:42 +0900 Subject: phy: phy-exynos-pcie: fix the wrong error return When it doesn't get the blk_base's resource, it was returned the error about phy_base, not blk_base. This patch is for fixing the wrong error return about blk_base. Fixes: cf0adb8e281b ("phy: phy-exynos-pcie: Add support for Exynos PCIe PHY") Signed-off-by: Jaehoon Chung Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/phy-exynos-pcie.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/phy-exynos-pcie.c b/drivers/phy/phy-exynos-pcie.c index 4f60b83641d5..60baf25d98e2 100644 --- a/drivers/phy/phy-exynos-pcie.c +++ b/drivers/phy/phy-exynos-pcie.c @@ -254,8 +254,8 @@ static int exynos_pcie_phy_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 1); exynos_phy->blk_base = devm_ioremap_resource(dev, res); - if (IS_ERR(exynos_phy->phy_base)) - return PTR_ERR(exynos_phy->phy_base); + if (IS_ERR(exynos_phy->blk_base)) + return PTR_ERR(exynos_phy->blk_base); exynos_phy->drv_data = drv_data; -- cgit v1.2.3 From 1a09b6a7c10e22c489a8b212dd6862b1fd9674ad Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 9 Mar 2017 13:45:44 +0530 Subject: phy: qcom-usb-hs: Add depends on EXTCON We get the following compile errors if EXTCON is enabled as a module but this driver is builtin: drivers/built-in.o: In function `qcom_usb_hs_phy_power_off': phy-qcom-usb-hs.c:(.text+0x1089): undefined reference to `extcon_unregister_notifier' drivers/built-in.o: In function `qcom_usb_hs_phy_probe': phy-qcom-usb-hs.c:(.text+0x11b5): undefined reference to `extcon_get_edev_by_phandle' drivers/built-in.o: In function `qcom_usb_hs_phy_power_on': phy-qcom-usb-hs.c:(.text+0x128e): undefined reference to `extcon_get_state' phy-qcom-usb-hs.c:(.text+0x12a9): undefined reference to `extcon_register_notifier' so let's mark this as needing to follow the modular status of the extcon framework. Fixes: 9994a33865f4 e2427b09ba929c2b9 (phy: Add support for Qualcomm's USB HS phy") Signed-off-by: Stephen Boyd Signed-off-by: Kishon Vijay Abraham I --- drivers/phy/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig index c11044439fd4..005cadb7a3f8 100644 --- a/drivers/phy/Kconfig +++ b/drivers/phy/Kconfig @@ -449,6 +449,7 @@ config PHY_QCOM_UFS config PHY_QCOM_USB_HS tristate "Qualcomm USB HS PHY module" depends on USB_ULPI_BUS + depends on EXTCON || !EXTCON # if EXTCON=m, this cannot be built-in select GENERIC_PHY help Support for the USB high-speed ULPI compliant phy on Qualcomm -- cgit v1.2.3 From d6c098a1db468b7fd4635e831f276851dfd8852c Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 8 Mar 2017 15:18:54 -0800 Subject: ASoC: don't dereference NULL pcm_{new,free} Not all platform drivers have pcm_{new,free} callbacks. Seen with a "snd-soc-dummy" codec from sound/soc/rockchip/rk3399_gru_sound.c. Fixes: 99b04f4c4051 ("ASoC: add Component level pcm_new/pcm_free") Signed-off-by: Brian Norris Signed-off-by: Mark Brown --- sound/soc/soc-core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 6dca408faae3..2722bb0c5573 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -3326,7 +3326,10 @@ static int snd_soc_platform_drv_pcm_new(struct snd_soc_pcm_runtime *rtd) { struct snd_soc_platform *platform = rtd->platform; - return platform->driver->pcm_new(rtd); + if (platform->driver->pcm_new) + return platform->driver->pcm_new(rtd); + else + return 0; } static void snd_soc_platform_drv_pcm_free(struct snd_pcm *pcm) @@ -3334,7 +3337,8 @@ static void snd_soc_platform_drv_pcm_free(struct snd_pcm *pcm) struct snd_soc_pcm_runtime *rtd = pcm->private_data; struct snd_soc_platform *platform = rtd->platform; - platform->driver->pcm_free(pcm); + if (platform->driver->pcm_free) + platform->driver->pcm_free(pcm); } /** -- cgit v1.2.3 From 6d399783e9d4e9bd44931501948059d24ad96ff8 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 23 Feb 2017 12:26:41 -0800 Subject: md/raid10: submit bio directly to replacement disk Commit 57c67df(md/raid10: submit IO from originating thread instead of md thread) submits bio directly for normal disks but not for replacement disks. There is no point we shouldn't do this for replacement disks. Cc: NeilBrown Signed-off-by: Shaohua Li --- drivers/md/raid10.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 063c43d83b72..1443305613c5 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1477,11 +1477,24 @@ retry_write: mbio->bi_bdev = (void*)rdev; atomic_inc(&r10_bio->remaining); + + cb = blk_check_plugged(raid10_unplug, mddev, + sizeof(*plug)); + if (cb) + plug = container_of(cb, struct raid10_plug_cb, + cb); + else + plug = NULL; spin_lock_irqsave(&conf->device_lock, flags); - bio_list_add(&conf->pending_bio_list, mbio); - conf->pending_count++; + if (plug) { + bio_list_add(&plug->pending, mbio); + plug->pending_cnt++; + } else { + bio_list_add(&conf->pending_bio_list, mbio); + conf->pending_count++; + } spin_unlock_irqrestore(&conf->device_lock, flags); - if (!mddev_check_plugged(mddev)) + if (!plug) md_wakeup_thread(mddev->thread); } } -- cgit v1.2.3 From 99b3d74ec05c4a4c57766a90d65b53d78ab06404 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 23 Feb 2017 12:31:10 -0800 Subject: md: delete dead code Nobody is using mddev_check_plugged(), so delete the dead code Signed-off-by: Shaohua Li --- drivers/md/md.c | 8 -------- drivers/md/md.h | 6 ------ 2 files changed, 14 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 548d1b8014f8..82bd1f3d2b19 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -440,14 +440,6 @@ void md_flush_request(struct mddev *mddev, struct bio *bio) } EXPORT_SYMBOL(md_flush_request); -void md_unplug(struct blk_plug_cb *cb, bool from_schedule) -{ - struct mddev *mddev = cb->data; - md_wakeup_thread(mddev->thread); - kfree(cb); -} -EXPORT_SYMBOL(md_unplug); - static inline struct mddev *mddev_get(struct mddev *mddev) { atomic_inc(&mddev->active); diff --git a/drivers/md/md.h b/drivers/md/md.h index b8859cbf84b6..dde8ecb760c8 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -676,16 +676,10 @@ extern void mddev_resume(struct mddev *mddev); extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, struct mddev *mddev); -extern void md_unplug(struct blk_plug_cb *cb, bool from_schedule); extern void md_reload_sb(struct mddev *mddev, int raid_disk); extern void md_update_sb(struct mddev *mddev, int force); extern void md_kick_rdev_from_array(struct md_rdev * rdev); struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr); -static inline int mddev_check_plugged(struct mddev *mddev) -{ - return !!blk_check_plugged(md_unplug, mddev, - sizeof(struct blk_plug_cb)); -} static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev) { -- cgit v1.2.3 From 9c8043f337f14d1743006dfc59c03e80a42e3884 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 24 Feb 2017 11:15:12 +0800 Subject: md-cluster: free md_cluster_info if node leave cluster To avoid memory leak, we need to free the cinfo which is allocated when node join cluster. Reviewed-by: NeilBrown Signed-off-by: Guoqing Jiang Signed-off-by: Shaohua Li --- drivers/md/md-cluster.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 2b13117fb918..ba7edcdd09ce 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -974,6 +974,7 @@ static int leave(struct mddev *mddev) lockres_free(cinfo->bitmap_lockres); unlock_all_bitmaps(mddev); dlm_release_lockspace(cinfo->lockspace, 2); + kfree(cinfo); return 0; } -- cgit v1.2.3 From 75df023f4f2188c21181996e28234fef9351ef45 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 24 Feb 2017 11:15:13 +0800 Subject: md-cluster: remove useless memset from gather_all_resync_info This memset is not needed. The lvb is already zeroed because it was recently allocated by lockres_init, which uses kzalloc(), and read_resync_info() doesn't need it to be zero anyway. Reviewed-by: NeilBrown Signed-off-by: Guoqing Jiang Signed-off-by: Shaohua Li --- drivers/md/md-cluster.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index ba7edcdd09ce..321ecac23027 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -777,7 +777,6 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots) bm_lockres->flags |= DLM_LKF_NOQUEUE; ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW); if (ret == -EAGAIN) { - memset(bm_lockres->lksb.sb_lvbptr, '\0', LVB_SIZE); s = read_resync_info(mddev, bm_lockres); if (s) { pr_info("%s:%d Resync[%llu..%llu] in progress on %d\n", -- cgit v1.2.3 From c94836342192b05d599d6aa3397f732f7a238689 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 24 Feb 2017 11:15:23 +0800 Subject: md: move funcs from pers->resize to update_size raid1_resize and raid5_resize should also check the mddev->queue if run underneath dm-raid. And both set_capacity and revalidate_disk are used in pers->resize such as raid1, raid10 and raid5. So move them from personality file to common code. Reviewed-by: NeilBrown Signed-off-by: Guoqing Jiang Signed-off-by: Shaohua Li --- drivers/md/md.c | 8 ++++++-- drivers/md/raid1.c | 2 -- drivers/md/raid10.c | 4 ---- drivers/md/raid5.c | 2 -- 4 files changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 82bd1f3d2b19..bd15a18485c8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6525,8 +6525,12 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) return -ENOSPC; } rv = mddev->pers->resize(mddev, num_sectors); - if (!rv) - revalidate_disk(mddev->gendisk); + if (!rv) { + if (mddev->queue) { + set_capacity(mddev->gendisk, mddev->array_sectors); + revalidate_disk(mddev->gendisk); + } + } return rv; } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index fbc2d7851b49..10c3865e1186 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -3246,8 +3246,6 @@ static int raid1_resize(struct mddev *mddev, sector_t sectors) return ret; } md_set_array_sectors(mddev, newsize); - set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk(mddev->gendisk); if (sectors > mddev->dev_sectors && mddev->recovery_cp > mddev->dev_sectors) { mddev->recovery_cp = mddev->dev_sectors; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 1443305613c5..c4db6d1fb6a2 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3956,10 +3956,6 @@ static int raid10_resize(struct mddev *mddev, sector_t sectors) return ret; } md_set_array_sectors(mddev, size); - if (mddev->queue) { - set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk(mddev->gendisk); - } if (sectors > mddev->dev_sectors && mddev->recovery_cp > oldsize) { mddev->recovery_cp = oldsize; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4fb09b3fcb41..6bfedfcf41c1 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7605,8 +7605,6 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors) return ret; } md_set_array_sectors(mddev, newsize); - set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk(mddev->gendisk); if (sectors > mddev->dev_sectors && mddev->recovery_cp > mddev->dev_sectors) { mddev->recovery_cp = mddev->dev_sectors; -- cgit v1.2.3 From 1b3bae49fba52f1ec499c36c53bc07761a9f6c4d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 1 Mar 2017 07:31:28 +1100 Subject: md: don't impose the MD_SB_DISKS limit on arrays without metadata. These arrays, created with "mdadm --build" don't benefit from a limit. The default will be used, which is '0' and is interpreted as "don't impose a limit". Reported-by: ian_bruce@mail.ru Signed-off-by: NeilBrown Signed-off-by: Shaohua Li --- drivers/md/md.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index bd15a18485c8..cd89ad3c3a0d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6450,11 +6450,10 @@ static int set_array_info(struct mddev *mddev, mdu_array_info_t *info) mddev->layout = info->layout; mddev->chunk_sectors = info->chunk_size >> 9; - mddev->max_disks = MD_SB_DISKS; - if (mddev->persistent) { - mddev->flags = 0; - mddev->sb_flags = 0; + mddev->max_disks = MD_SB_DISKS; + mddev->flags = 0; + mddev->sb_flags = 0; } set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); -- cgit v1.2.3 From 61eb2b43b99ebdc9bc6bc83d9792257b243e7cb3 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 28 Feb 2017 13:00:20 -0800 Subject: md/raid1/10: fix potential deadlock Neil Brown pointed out a potential deadlock in raid 10 code with bio_split/chain. The raid1 code could have the same issue, but recent barrier rework makes it less likely to happen. The deadlock happens in below sequence: 1. generic_make_request(bio), this will set current->bio_list 2. raid10_make_request will split bio to bio1 and bio2 3. __make_request(bio1), wait_barrer, add underlayer disk bio to current->bio_list 4. __make_request(bio2), wait_barrer If raise_barrier happens between 3 & 4, since wait_barrier runs at 3, raise_barrier waits for IO completion from 3. And since raise_barrier sets barrier, 4 waits for raise_barrier. But IO from 3 can't be dispatched because raid10_make_request() doesn't finished yet. The solution is to adjust the IO ordering. Quotes from Neil: " It is much safer to: if (need to split) { split = bio_split(bio, ...) bio_chain(...) make_request_fn(split); generic_make_request(bio); } else make_request_fn(mddev, bio); This way we first process the initial section of the bio (in 'split') which will queue some requests to the underlying devices. These requests will be queued in generic_make_request. Then we queue the remainder of the bio, which will be added to the end of the generic_make_request queue. Then we return. generic_make_request() will pop the lower-level device requests off the queue and handle them first. Then it will process the remainder of the original bio once the first section has been fully processed. " Note, this only happens in read path. In write path, the bio is flushed to underlaying disks either by blk flush (from schedule) or offladed to raid1/10d. It's queued in current->bio_list. Cc: Coly Li Cc: stable@vger.kernel.org (v3.14+, only the raid10 part) Suggested-by: NeilBrown Reviewed-by: Jack Wang Signed-off-by: Shaohua Li --- drivers/md/raid1.c | 25 +++++++++++++++++++++++-- drivers/md/raid10.c | 18 ++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 10c3865e1186..c33e96e33b8e 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1587,9 +1587,30 @@ static void raid1_make_request(struct mddev *mddev, struct bio *bio) split = bio; } - if (bio_data_dir(split) == READ) + if (bio_data_dir(split) == READ) { raid1_read_request(mddev, split); - else + + /* + * If a bio is splitted, the first part of bio will + * pass barrier but the bio is queued in + * current->bio_list (see generic_make_request). If + * there is a raise_barrier() called here, the second + * part of bio can't pass barrier. But since the first + * part bio isn't dispatched to underlaying disks yet, + * the barrier is never released, hence raise_barrier + * will alays wait. We have a deadlock. + * Note, this only happens in read path. For write + * path, the first part of bio is dispatched in a + * schedule() call (because of blk plug) or offloaded + * to raid10d. + * Quitting from the function immediately can change + * the bio order queued in bio_list and avoid the deadlock. + */ + if (split != bio) { + generic_make_request(bio); + break; + } + } else raid1_write_request(mddev, split); } while (split != bio); } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index c4db6d1fb6a2..b1b1f982a722 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1584,7 +1584,25 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio) split = bio; } + /* + * If a bio is splitted, the first part of bio will pass + * barrier but the bio is queued in current->bio_list (see + * generic_make_request). If there is a raise_barrier() called + * here, the second part of bio can't pass barrier. But since + * the first part bio isn't dispatched to underlaying disks + * yet, the barrier is never released, hence raise_barrier will + * alays wait. We have a deadlock. + * Note, this only happens in read path. For write path, the + * first part of bio is dispatched in a schedule() call + * (because of blk plug) or offloaded to raid10d. + * Quitting from the function immediately can change the bio + * order queued in bio_list and avoid the deadlock. + */ __make_request(mddev, split); + if (split != bio && bio_data_dir(bio) == READ) { + generic_make_request(bio); + break; + } } while (split != bio); /* In case raid10d snuck in to freeze_array */ -- cgit v1.2.3 From 6e347b5e05ea2ac4ac467a5a1cfaebb2c7f06f80 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 9 Mar 2017 11:27:07 -0600 Subject: PCI: iproc: Save host bridge window resource in struct iproc_pcie MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The host bridge memory window resource is inserted into the iomem_resource tree and cannot be deallocated until the host bridge itself is removed. Previously, the window was on the stack, which meant the iomem_resource entry pointed into the stack and was corrupted as soon as the probe function returned, which caused memory corruption and errors like this: pcie_iproc_bcma bcma0:8: resource collision: [mem 0x40000000-0x47ffffff] conflicts with PCIe MEM space [mem 0x40000000-0x47ffffff] Move the memory window resource from the stack into struct iproc_pcie so its lifetime matches that of the host bridge. Fixes: c3245a566400 ("PCI: iproc: Request host bridge window resources") Reported-and-tested-by: Rafał Miłecki Signed-off-by: Bjorn Helgaas CC: stable@vger.kernel.org # v4.8+ --- drivers/pci/host/pcie-iproc-bcma.c | 24 ++++++++++++------------ drivers/pci/host/pcie-iproc-platform.c | 19 ++++++++++--------- drivers/pci/host/pcie-iproc.h | 1 + 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/drivers/pci/host/pcie-iproc-bcma.c b/drivers/pci/host/pcie-iproc-bcma.c index bd4c9ec25edc..384c27e664fe 100644 --- a/drivers/pci/host/pcie-iproc-bcma.c +++ b/drivers/pci/host/pcie-iproc-bcma.c @@ -44,8 +44,7 @@ static int iproc_pcie_bcma_probe(struct bcma_device *bdev) { struct device *dev = &bdev->dev; struct iproc_pcie *pcie; - LIST_HEAD(res); - struct resource res_mem; + LIST_HEAD(resources); int ret; pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL); @@ -63,22 +62,23 @@ static int iproc_pcie_bcma_probe(struct bcma_device *bdev) pcie->base_addr = bdev->addr; - res_mem.start = bdev->addr_s[0]; - res_mem.end = bdev->addr_s[0] + SZ_128M - 1; - res_mem.name = "PCIe MEM space"; - res_mem.flags = IORESOURCE_MEM; - pci_add_resource(&res, &res_mem); + pcie->mem.start = bdev->addr_s[0]; + pcie->mem.end = bdev->addr_s[0] + SZ_128M - 1; + pcie->mem.name = "PCIe MEM space"; + pcie->mem.flags = IORESOURCE_MEM; + pci_add_resource(&resources, &pcie->mem); pcie->map_irq = iproc_pcie_bcma_map_irq; - ret = iproc_pcie_setup(pcie, &res); - if (ret) + ret = iproc_pcie_setup(pcie, &resources); + if (ret) { dev_err(dev, "PCIe controller setup failed\n"); - - pci_free_resource_list(&res); + pci_free_resource_list(&resources); + return ret; + } bcma_set_drvdata(bdev, pcie); - return ret; + return 0; } static void iproc_pcie_bcma_remove(struct bcma_device *bdev) diff --git a/drivers/pci/host/pcie-iproc-platform.c b/drivers/pci/host/pcie-iproc-platform.c index f4909bb0b2ad..8c6a327ca6cd 100644 --- a/drivers/pci/host/pcie-iproc-platform.c +++ b/drivers/pci/host/pcie-iproc-platform.c @@ -51,7 +51,7 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev) struct device_node *np = dev->of_node; struct resource reg; resource_size_t iobase = 0; - LIST_HEAD(res); + LIST_HEAD(resources); int ret; pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL); @@ -96,10 +96,10 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev) pcie->phy = NULL; } - ret = of_pci_get_host_bridge_resources(np, 0, 0xff, &res, &iobase); + ret = of_pci_get_host_bridge_resources(np, 0, 0xff, &resources, + &iobase); if (ret) { - dev_err(dev, - "unable to get PCI host bridge resources\n"); + dev_err(dev, "unable to get PCI host bridge resources\n"); return ret; } @@ -112,14 +112,15 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev) pcie->map_irq = of_irq_parse_and_map_pci; } - ret = iproc_pcie_setup(pcie, &res); - if (ret) + ret = iproc_pcie_setup(pcie, &resources); + if (ret) { dev_err(dev, "PCIe controller setup failed\n"); - - pci_free_resource_list(&res); + pci_free_resource_list(&resources); + return ret; + } platform_set_drvdata(pdev, pcie); - return ret; + return 0; } static int iproc_pcie_pltfm_remove(struct platform_device *pdev) diff --git a/drivers/pci/host/pcie-iproc.h b/drivers/pci/host/pcie-iproc.h index 04fed8e907f1..0bbe2ea44f3e 100644 --- a/drivers/pci/host/pcie-iproc.h +++ b/drivers/pci/host/pcie-iproc.h @@ -90,6 +90,7 @@ struct iproc_pcie { #ifdef CONFIG_ARM struct pci_sys_data sysdata; #endif + struct resource mem; struct pci_bus *root_bus; struct phy *phy; int (*map_irq)(const struct pci_dev *, u8, u8); -- cgit v1.2.3 From 5427290d64c752b97d6a2af7506771c7d10eb750 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 7 Mar 2017 17:22:43 +0800 Subject: SUNRPC/backchanel: set XPT_CONG_CTRL flag for bc xprt The xprt for backchannel is created separately, not in TCP/UDP code. It needs the XPT_CONG_CTRL flag set on it too--otherwise requests on the NFSv4.1 backchannel are rjected in svc_process_common(): 1191 if (versp->vs_need_cong_ctrl && 1192 !test_bit(XPT_CONG_CTRL, &rqstp->rq_xprt->xpt_flags)) 1193 goto err_bad_vers; Fixes: 5283b03ee5 ("nfs/nfsd/sunrpc: enforce transport...") Signed-off-by: Kinglong Mee Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 8931e33b6541..2b720fa35c4f 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1635,6 +1635,7 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv, xprt = &svsk->sk_xprt; svc_xprt_init(net, &svc_tcp_bc_class, xprt, serv); + set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags); serv->sv_bc_xprt = xprt; -- cgit v1.2.3 From a1016e94cce9fb6ea56d7602263783e2d95d6e92 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 9 Mar 2017 17:14:32 +0000 Subject: ARM: wire up statx syscall Wire up the new statx syscall for ARM. Signed-off-by: Russell King --- arch/arm/tools/syscall.tbl | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 3c2cb5d5adfa..0bb0e9c6376c 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -411,3 +411,4 @@ 394 common pkey_mprotect sys_pkey_mprotect 395 common pkey_alloc sys_pkey_alloc 396 common pkey_free sys_pkey_free +397 common statx sys_statx -- cgit v1.2.3 From 9a8b0a230aca55ee142fd76f4765f1da1799da93 Mon Sep 17 00:00:00 2001 From: Mihail Atanassov Date: Wed, 15 Feb 2017 14:00:15 +0000 Subject: drm: mali-dp: Remove mclk rate management The rate of mclk depends on the use-case. If no downscaling is required, then mclk == pxlclk is a valid option; with downscaling however, the rate at which mclk runs determines how much a plane can be downscaled before composition. This is a system integration + power management issue that is more suited to firmware rather than this driver. Signed-off-by: Mihail Atanassov Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/malidp_crtc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c index 08e6a71f5d05..294b53697334 100644 --- a/drivers/gpu/drm/arm/malidp_crtc.c +++ b/drivers/gpu/drm/arm/malidp_crtc.c @@ -63,8 +63,7 @@ static void malidp_crtc_enable(struct drm_crtc *crtc) clk_prepare_enable(hwdev->pxlclk); - /* mclk needs to be set to the same or higher rate than pxlclk */ - clk_set_rate(hwdev->mclk, crtc->state->adjusted_mode.crtc_clock * 1000); + /* We rely on firmware to set mclk to a sensible level. */ clk_set_rate(hwdev->pxlclk, crtc->state->adjusted_mode.crtc_clock * 1000); hwdev->modeset(hwdev, &vm); -- cgit v1.2.3 From d1479f6108006555fe33d7cfe8db4f95ad614b9a Mon Sep 17 00:00:00 2001 From: Mihail Atanassov Date: Thu, 9 Feb 2017 11:32:00 +0000 Subject: drm: mali-dp: Fix smart layer not going to composition Use rectangle 1 as a generic plane. Existing code already sets the smart layer bounding box size + offset. The rectangles' offsets are relative to the bounding box, so there is no need to set R1's offset (reset value is 0), just its size which is the same as the bounding box. Signed-off-by: Mihail Atanassov Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/malidp_hw.c | 2 +- drivers/gpu/drm/arm/malidp_planes.c | 18 ++++++++++++++++-- drivers/gpu/drm/arm/malidp_regs.h | 1 + 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/arm/malidp_hw.c b/drivers/gpu/drm/arm/malidp_hw.c index 488aedf5b58d..9f5513006eee 100644 --- a/drivers/gpu/drm/arm/malidp_hw.c +++ b/drivers/gpu/drm/arm/malidp_hw.c @@ -83,7 +83,7 @@ static const struct malidp_layer malidp550_layers[] = { { DE_VIDEO1, MALIDP550_DE_LV1_BASE, MALIDP550_DE_LV1_PTR_BASE, MALIDP_DE_LV_STRIDE0 }, { DE_GRAPHICS1, MALIDP550_DE_LG_BASE, MALIDP550_DE_LG_PTR_BASE, MALIDP_DE_LG_STRIDE }, { DE_VIDEO2, MALIDP550_DE_LV2_BASE, MALIDP550_DE_LV2_PTR_BASE, MALIDP_DE_LV_STRIDE0 }, - { DE_SMART, MALIDP550_DE_LS_BASE, MALIDP550_DE_LS_PTR_BASE, 0 }, + { DE_SMART, MALIDP550_DE_LS_BASE, MALIDP550_DE_LS_PTR_BASE, MALIDP550_DE_LS_R1_STRIDE }, }; #define MALIDP_DE_DEFAULT_PREFETCH_START 5 diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c index 414aada10fe5..d5aec082294c 100644 --- a/drivers/gpu/drm/arm/malidp_planes.c +++ b/drivers/gpu/drm/arm/malidp_planes.c @@ -37,6 +37,8 @@ #define LAYER_V_VAL(x) (((x) & 0x1fff) << 16) #define MALIDP_LAYER_COMP_SIZE 0x010 #define MALIDP_LAYER_OFFSET 0x014 +#define MALIDP550_LS_ENABLE 0x01c +#define MALIDP550_LS_R1_IN_SIZE 0x020 /* * This 4-entry look-up-table is used to determine the full 8-bit alpha value @@ -242,6 +244,11 @@ static void malidp_de_plane_update(struct drm_plane *plane, LAYER_V_VAL(plane->state->crtc_y), mp->layer->base + MALIDP_LAYER_OFFSET); + if (mp->layer->id == DE_SMART) + malidp_hw_write(mp->hwdev, + LAYER_H_VAL(src_w) | LAYER_V_VAL(src_h), + mp->layer->base + MALIDP550_LS_R1_IN_SIZE); + /* first clear the rotation bits */ val = malidp_hw_read(mp->hwdev, mp->layer->base + MALIDP_LAYER_CONTROL); val &= ~LAYER_ROT_MASK; @@ -330,9 +337,16 @@ int malidp_de_planes_init(struct drm_device *drm) plane->hwdev = malidp->dev; plane->layer = &map->layers[i]; - /* Skip the features which the SMART layer doesn't have */ - if (id == DE_SMART) + if (id == DE_SMART) { + /* + * Enable the first rectangle in the SMART layer to be + * able to use it as a drm plane. + */ + malidp_hw_write(malidp->dev, 1, + plane->layer->base + MALIDP550_LS_ENABLE); + /* Skip the features which the SMART layer doesn't have. */ continue; + } drm_plane_create_rotation_property(&plane->base, DRM_ROTATE_0, flags); malidp_hw_write(malidp->dev, MALIDP_ALPHA_LUT, diff --git a/drivers/gpu/drm/arm/malidp_regs.h b/drivers/gpu/drm/arm/malidp_regs.h index aff6d4a84e99..b816067a65c5 100644 --- a/drivers/gpu/drm/arm/malidp_regs.h +++ b/drivers/gpu/drm/arm/malidp_regs.h @@ -84,6 +84,7 @@ /* Stride register offsets relative to Lx_BASE */ #define MALIDP_DE_LG_STRIDE 0x18 #define MALIDP_DE_LV_STRIDE0 0x18 +#define MALIDP550_DE_LS_R1_STRIDE 0x28 /* macros to set values into registers */ #define MALIDP_DE_H_FRONTPORCH(x) (((x) & 0xfff) << 0) -- cgit v1.2.3 From 8b38f890eff6efc153130254a96452570f971159 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 10 Mar 2017 10:41:50 +0900 Subject: MAINTAINERS: add Masahiro Yamada as a Kbuild maintainer It has been difficult lately for Michal to work on Kbuild on his regular basis. We discussed the maintainership of Kbuild, and I decided to be a co-maintainer. Add myself to the maintainer field, and replace the repository with my own. Signed-off-by: Masahiro Yamada Acked-by: Michal Marek --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index c265a5fe4848..f1023c52b1e3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7084,9 +7084,9 @@ S: Maintained F: fs/autofs4/ KERNEL BUILD + files below scripts/ (unless maintained elsewhere) +M: Masahiro Yamada M: Michal Marek -T: git git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild.git for-next -T: git git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild.git rc-fixes +T: git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git L: linux-kbuild@vger.kernel.org S: Maintained F: Documentation/kbuild/ -- cgit v1.2.3 From cb6950b7152fb3760942f9cb16bd2a35e5a1bfd1 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Tue, 7 Mar 2017 00:34:57 +0530 Subject: arm64: kprobes: remove kprobe_exceptions_notify Commit fc62d0207ae0 ("kprobes: Introduce weak variant of kprobe_exceptions_notify()") introduces a generic empty version of the function for architectures that don't need special handling, like arm64. As such, remove the arch/arm64/ specific handler. Signed-off-by: Naveen N. Rao Signed-off-by: Will Deacon --- arch/arm64/kernel/probes/kprobes.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 2a07aae5b8a2..c5c45942fb6e 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -372,12 +372,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr) return 0; } -int __kprobes kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - return NOTIFY_DONE; -} - static void __kprobes kprobe_handler(struct pt_regs *regs) { struct kprobe *p, *cur_kprobe; -- cgit v1.2.3 From b0de0ccc8b9edd8846828e0ecdc35deacdf186b0 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 6 Mar 2017 19:06:40 +0000 Subject: arm64: kasan: avoid bad virt_to_pfn() Booting a v4.11-rc1 kernel with DEBUG_VIRTUAL and KASAN enabled produces the following splat (trimmed for brevity): [ 0.000000] virt_to_phys used for non-linear address: ffff200008080000 (0xffff200008080000) [ 0.000000] WARNING: CPU: 0 PID: 0 at arch/arm64/mm/physaddr.c:14 __virt_to_phys+0x48/0x70 [ 0.000000] PC is at __virt_to_phys+0x48/0x70 [ 0.000000] LR is at __virt_to_phys+0x48/0x70 [ 0.000000] Call trace: [ 0.000000] [] __virt_to_phys+0x48/0x70 [ 0.000000] [] kasan_init+0x1c0/0x498 [ 0.000000] [] setup_arch+0x2fc/0x948 [ 0.000000] [] start_kernel+0xb8/0x570 [ 0.000000] [] __primary_switched+0x6c/0x74 This is because we use virt_to_pfn() on a kernel image address when trying to figure out its nid, so that we can allocate its shadow from the same node. As with other recent changes, this patch uses lm_alias() to solve this. We could instead use NUMA_NO_NODE, as x86 does for all shadow allocations, though we'll likely want the "real" memory shadow to be backed from its corresponding nid anyway, so we may as well be consistent and find the nid for the image shadow. Cc: Catalin Marinas Cc: Will Deacon Acked-by: Laura Abbott Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/mm/kasan_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 55d1e9205543..687a358a3733 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -162,7 +162,7 @@ void __init kasan_init(void) clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); vmemmap_populate(kimg_shadow_start, kimg_shadow_end, - pfn_to_nid(virt_to_pfn(_text))); + pfn_to_nid(virt_to_pfn(lm_alias(_text)))); /* * vmemmap_populate() has populated the shadow region that covers the -- cgit v1.2.3 From 5c2a625937ba49bc691089370638223d310cda9a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 8 Mar 2017 16:27:04 -0800 Subject: arm64: support keyctl() system call in 32-bit mode As is the case for a number of other architectures that have a 32-bit compat mode, enable KEYS_COMPAT if both COMPAT and KEYS are enabled. This allows AArch32 programs to use the keyctl() system call when running on an AArch64 kernel. Signed-off-by: Eric Biggers Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a39029b5414e..f21e9a76ff67 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1063,6 +1063,10 @@ config SYSVIPC_COMPAT def_bool y depends on COMPAT && SYSVIPC +config KEYS_COMPAT + def_bool y + depends on COMPAT && KEYS + endmenu menu "Power management options" -- cgit v1.2.3 From 14088540ad63c648e5cdf490412033f792d16b6b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 10 Mar 2017 17:44:18 +0000 Subject: arm64: use const cap for system_uses_ttbr0_pan() Since commit 4b65a5db362783ab ("arm64: Introduce uaccess_{disable,enable} functionality based on TTBR0_EL1"), system_uses_ttbr0_pan() has used cpus_have_cap() to determine whether PAN is present. Since commit a4023f682739439b ("arm64: Add hypervisor safe helper for checking constant capabilities"), which was introduced around the same time, cpus_have_cap() doesn't try to use a static key, and must always perform a load, test, and consitional branch (likely a tbnz for the latter two). Elsewhere, we moved to using cpus_have_const_cap(), which can use a static key (i.e. a non-conditional branch), which is patched at runtime when the feature is detected. This patch makes system_uses_ttbr0_pan() use cpus_have_const_cap(). The static key is likely a win for hot-paths like the uacccess primitives, and this makes our usage consistent regardless. Signed-off-by: Mark Rutland Reviewed-by: Suzuki K Poulose Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 05310ad8c5ab..f31c48d0cd68 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -251,7 +251,7 @@ static inline bool system_supports_fpsimd(void) static inline bool system_uses_ttbr0_pan(void) { return IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) && - !cpus_have_cap(ARM64_HAS_PAN); + !cpus_have_const_cap(ARM64_HAS_PAN); } #endif /* __ASSEMBLY__ */ -- cgit v1.2.3 From 0e4c0e6ea7d4a988a5ae2791c7cb5769b5256dad Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 17 Feb 2017 15:25:08 +0100 Subject: arm64: kernel: Update kerneldoc for cpu_suspend() rename Commit af391b15f7b56ce1 ("arm64: kernel: rename __cpu_suspend to keep it aligned with arm") renamed cpu_suspend() to arm_cpuidle_suspend(), but forgot to update the kerneldoc header. Fixes: af391b15f7b56ce1 ("arm64: kernel: rename __cpu_suspend to keep it aligned with arm") Signed-off-by: Geert Uytterhoeven Signed-off-by: Will Deacon --- arch/arm64/kernel/cpuidle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index 75a0f8acef66..fd691087dc9a 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -30,7 +30,7 @@ int arm_cpuidle_init(unsigned int cpu) } /** - * cpu_suspend() - function to enter a low-power idle state + * arm_cpuidle_suspend() - function to enter a low-power idle state * @arg: argument to pass to CPU suspend operations * * Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU -- cgit v1.2.3 From b6573da139ecbee6f2c77392b51266d4521d50ac Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Fri, 10 Mar 2017 10:10:54 -0800 Subject: Input: synaptics-rmi4 - prevent null pointer dereference in f30 If the platform data has f30_data.disable set, f30 in rmi_f30_config() might be null. Prevent a kernel oops by checking for non-null f30. Signed-off-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/rmi_f30.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/input/rmi4/rmi_f30.c b/drivers/input/rmi4/rmi_f30.c index 3422464af229..b8572b342dcb 100644 --- a/drivers/input/rmi4/rmi_f30.c +++ b/drivers/input/rmi4/rmi_f30.c @@ -170,6 +170,10 @@ static int rmi_f30_config(struct rmi_function *fn) rmi_get_platform_data(fn->rmi_dev); int error; + /* can happen if f30_data.disable is set */ + if (!f30) + return 0; + if (pdata->f30_data.trackstick_buttons) { /* Try [re-]establish link to F03. */ f30->f03 = rmi_find_function(fn->rmi_dev, 0x03); -- cgit v1.2.3 From ed46e66cc1b3d684042f92dfa2ab15ee917b4cac Mon Sep 17 00:00:00 2001 From: Oleksandr Tyshchenko Date: Mon, 27 Feb 2017 14:30:25 +0200 Subject: iommu/io-pgtable-arm: Check for leaf entry before dereferencing it Do a check for already installed leaf entry at the current level before dereferencing it in order to avoid walking the page table down with wrong pointer to the next level. Signed-off-by: Oleksandr Tyshchenko CC: Will Deacon CC: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index feacc54bec68..f9bc6ebb8140 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -335,8 +335,12 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) pte |= ARM_LPAE_PTE_NSTABLE; __arm_lpae_set_pte(ptep, pte, cfg); - } else { + } else if (!iopte_leaf(pte, lvl)) { cptep = iopte_deref(pte, data); + } else { + /* We require an unmap first */ + WARN_ON(!selftest_running); + return -EEXIST; } /* Rinse, repeat */ -- cgit v1.2.3 From a03849e7210277fa212779b7cd9c30e1ab6194b2 Mon Sep 17 00:00:00 2001 From: Oleksandr Tyshchenko Date: Mon, 27 Feb 2017 14:30:26 +0200 Subject: iommu/io-pgtable-arm-v7s: Check for leaf entry before dereferencing it Do a check for already installed leaf entry at the current level before dereferencing it in order to avoid walking the page table down with wrong pointer to the next level. Signed-off-by: Oleksandr Tyshchenko CC: Will Deacon CC: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm-v7s.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 1c049e2e12bf..8d6ca28c3e1f 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -422,8 +422,12 @@ static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova, pte |= ARM_V7S_ATTR_NS_TABLE; __arm_v7s_set_pte(ptep, pte, 1, cfg); - } else { + } else if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) { cptep = iopte_deref(pte, lvl); + } else { + /* We require an unmap first */ + WARN_ON(!selftest_running); + return -EEXIST; } /* Rinse, repeat */ -- cgit v1.2.3 From d8a8ed9758241e138933c67e40db2db2790eca19 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Thu, 9 Mar 2017 13:21:07 -0500 Subject: drm/amd/amdgpu: Disable GFX_PG on Carrizo until compute issues solved Currently compute jobs will stall if GFX_PG is enabled. Until this is resolved we'll disable GFX_PG. Signed-off-by: Tom St Denis Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 50bdb24ef8d6..4a785d6acfb9 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1051,7 +1051,7 @@ static int vi_common_early_init(void *handle) /* rev0 hardware requires workarounds to support PG */ adev->pg_flags = 0; if (adev->rev_id != 0x00) { - adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | + adev->pg_flags |= AMD_PG_SUPPORT_GFX_SMG | AMD_PG_SUPPORT_GFX_PIPELINE | AMD_PG_SUPPORT_CP | -- cgit v1.2.3 From 607523d19c9d67ba4cf7bdaced644f11ed04992c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 10 Mar 2017 12:13:04 +1000 Subject: drm/amdgpu: fix parser init error path to avoid crash in parser fini MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we don't reset the chunk info in the error path, the subsequent fini path will double free. Reviewed-by: Christian König Signed-off-by: Dave Airlie Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d2d0f60ff36d..99424cb8020b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -240,6 +240,8 @@ free_partial_kdata: for (; i >= 0; i--) drm_free_large(p->chunks[i].kdata); kfree(p->chunks); + p->chunks = NULL; + p->nchunks = 0; put_ctx: amdgpu_ctx_put(p->ctx); free_chunk: -- cgit v1.2.3 From 3fb632e40d7667d8bedfabc28850ac06d5493f54 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 10 Mar 2017 11:27:23 +0800 Subject: md: fix super_offset endianness in super_1_rdev_size_change The sb->super_offset should be big-endian, but the rdev->sb_start is in host byte order, so fix this by adding cpu_to_le64. Signed-off-by: Jason Yan Signed-off-by: Shaohua Li --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index cd89ad3c3a0d..6e76d97a8fc3 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1879,7 +1879,7 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) } sb = page_address(rdev->sb_page); sb->data_size = cpu_to_le64(num_sectors); - sb->super_offset = rdev->sb_start; + sb->super_offset = cpu_to_le64(rdev->sb_start); sb->sb_csum = calc_sb_1_csum(sb); do { md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, -- cgit v1.2.3 From 1345921393ba23b60d3fcf15933e699232ad25ae Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 10 Mar 2017 11:49:12 +0800 Subject: md: fix incorrect use of lexx_to_cpu in does_sb_need_changing The sb->layout is of type __le32, so we shoud use le32_to_cpu. Signed-off-by: Jason Yan Signed-off-by: Shaohua Li --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 6e76d97a8fc3..f6ae1d67bcd0 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2287,7 +2287,7 @@ static bool does_sb_need_changing(struct mddev *mddev) /* Check if any mddev parameters have changed */ if ((mddev->dev_sectors != le64_to_cpu(sb->size)) || (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) || - (mddev->layout != le64_to_cpu(sb->layout)) || + (mddev->layout != le32_to_cpu(sb->layout)) || (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) || (mddev->chunk_sectors != le32_to_cpu(sb->chunksize))) return true; -- cgit v1.2.3 From c952cd4e949ab3d07287efc2e80246e03727d15d Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 10 Mar 2017 09:52:20 +0800 Subject: nfsd: map the ENOKEY to nfserr_perm for avoiding warning Now that Ext4 and f2fs filesystems support encrypted directories and files, attempts to access those files may return ENOKEY, resulting in the following WARNING. Map ENOKEY to nfserr_perm instead of nfserr_io. [ 1295.411759] ------------[ cut here ]------------ [ 1295.411787] WARNING: CPU: 0 PID: 12786 at fs/nfsd/nfsproc.c:796 nfserrno+0x74/0x80 [nfsd] [ 1295.411806] nfsd: non-standard errno: -126 [ 1295.411816] Modules linked in: nfsd nfs_acl auth_rpcgss nfsv4 nfs lockd fscache tun bridge stp llc fuse ip_set nfnetlink vmw_vsock_vmci_transport vsock snd_seq_midi snd_seq_midi_event coretemp crct10dif_pclmul crc32_generic crc32_pclmul snd_ens1371 gameport ghash_clmulni_intel snd_ac97_codec f2fs intel_rapl_perf ac97_bus snd_seq ppdev snd_pcm snd_rawmidi snd_timer vmw_balloon snd_seq_device snd joydev soundcore parport_pc parport nfit acpi_cpufreq tpm_tis vmw_vmci tpm_tis_core tpm shpchp i2c_piix4 grace sunrpc xfs libcrc32c vmwgfx drm_kms_helper ttm drm crc32c_intel e1000 mptspi scsi_transport_spi serio_raw mptscsih mptbase ata_generic pata_acpi fjes [last unloaded: nfs_acl] [ 1295.412522] CPU: 0 PID: 12786 Comm: nfsd Tainted: G W 4.11.0-rc1+ #521 [ 1295.412959] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015 [ 1295.413814] Call Trace: [ 1295.414252] dump_stack+0x63/0x86 [ 1295.414666] __warn+0xcb/0xf0 [ 1295.415087] warn_slowpath_fmt+0x5f/0x80 [ 1295.415502] ? put_filp+0x42/0x50 [ 1295.415927] nfserrno+0x74/0x80 [nfsd] [ 1295.416339] nfsd_open+0xd7/0x180 [nfsd] [ 1295.416746] nfs4_get_vfs_file+0x367/0x3c0 [nfsd] [ 1295.417182] ? security_inode_permission+0x41/0x60 [ 1295.417591] nfsd4_process_open2+0x9b2/0x1200 [nfsd] [ 1295.418007] nfsd4_open+0x481/0x790 [nfsd] [ 1295.418409] nfsd4_proc_compound+0x395/0x680 [nfsd] [ 1295.418812] nfsd_dispatch+0xb8/0x1f0 [nfsd] [ 1295.419233] svc_process_common+0x4d9/0x830 [sunrpc] [ 1295.419631] svc_process+0xfe/0x1b0 [sunrpc] [ 1295.420033] nfsd+0xe9/0x150 [nfsd] [ 1295.420420] kthread+0x101/0x140 [ 1295.420802] ? nfsd_destroy+0x60/0x60 [nfsd] [ 1295.421199] ? kthread_park+0x90/0x90 [ 1295.421598] ret_from_fork+0x2c/0x40 [ 1295.421996] ---[ end trace 0d5a969cd7852e1f ]--- Signed-off-by: Kinglong Mee Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsproc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index fa82b7707e85..03a7e9da4da0 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -786,6 +786,7 @@ nfserrno (int errno) { nfserr_serverfault, -ESERVERFAULT }, { nfserr_serverfault, -ENFILE }, { nfserr_io, -EUCLEAN }, + { nfserr_perm, -ENOKEY }, }; int i; -- cgit v1.2.3 From abcb4dacb098a1baca746406a8775e9930f47f3f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 10 Mar 2017 11:36:39 +1100 Subject: NFSD: further refinement of content of /proc/fs/nfsd/versions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prior to e35659f1b03c ("NFSD: correctly range-check v4.x minor version when setting versions.") v4.0 could not be disabled without disabling all NFSv4 protocols. So the 'versions' file contained ±4 ±4.1 ±4.2. Writing "-4" would disable all v4 completely. Writing +4 would enabled those minor versions that are currently enabled, either by default or otherwise. After that commit, it was possible to disable v4.0 independently. To maximize backward compatibility with use cases which never disabled v4.0, the "versions" file would never contain "+4.0" - that was implied by "+4", unless explicitly negated by "-4.0". This introduced an inconsistency in that it was possible to disable all minor versions, but still have the major version advertised. e.g. "-4.0 -4.1 -4.2 +4" would result in NFSv4 support being advertised, but all attempts to use it rejected. Commit d3635ff07e8c ("nfsd: fix configuration of supported minor versions") and following removed this inconsistency. If all minor version were disabled, the major would be disabled too. If any minor was enabled, the major would be disabled. This patch also treated "+4" as equivalent to "+4.0" and "-4" as "-4.0". A consequence of this is that writing "-4" would only disable 4.0. This is a regression against the earlier behaviour, in a use case that rpc.nfsd actually uses. The command "rpc.nfsd -N 4" will write "+2 +3 -4" to the versions files. Previously, that would disable v4 completely. Now it will only disable v4.0. Also "4.0" never appears in the "versions" file when read. So if only v4.1 is available, the previous kernel would have reported "+4 -4.0 +4.1 -4.2" the current kernel reports "-4 +4.1 -4.2" which could easily confuse. This patch restores the implication that "+4" and "-4" apply more globals and do not imply "4.0". Specifically: writing "-4" will disable all 4.x minor versions. writing "+4" will enable all 4.1 minor version if none are currently enabled. rpc.nfsd will list minor versions before major versions, so rpc.nfsd -V 4.2 -N 4.1 will write "-4.1 +4.2 +2 +3 +4" so it would be a regression for "+4" to enable always all versions. reading "-4" implies that no v4.x are enabled reading "+4" implies that some v4.x are enabled, and that v4.0 is enabled unless "-4.0" is also present. All other minor versions will explicitly be listed. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsctl.c | 43 +++++++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 73e75ac90525..8bf8f667a8cf 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -538,13 +538,21 @@ out_free: static ssize_t nfsd_print_version_support(char *buf, int remaining, const char *sep, - unsigned vers, unsigned minor) + unsigned vers, int minor) { - const char *format = (minor == 0) ? "%s%c%u" : "%s%c%u.%u"; + const char *format = minor < 0 ? "%s%c%u" : "%s%c%u.%u"; bool supported = !!nfsd_vers(vers, NFSD_TEST); - if (vers == 4 && !nfsd_minorversion(minor, NFSD_TEST)) + if (vers == 4 && minor >= 0 && + !nfsd_minorversion(minor, NFSD_TEST)) supported = false; + if (minor == 0 && supported) + /* + * special case for backward compatability. + * +4.0 is never reported, it is implied by + * +4, unless -4.0 is present. + */ + return 0; return snprintf(buf, remaining, format, sep, supported ? '+' : '-', vers, minor); } @@ -554,7 +562,6 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) char *mesg = buf; char *vers, *minorp, sign; int len, num, remaining; - unsigned minor; ssize_t tlen = 0; char *sep; struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id); @@ -575,6 +582,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) if (len <= 0) return -EINVAL; do { enum vers_op cmd; + unsigned minor; sign = *vers; if (sign == '+' || sign == '-') num = simple_strtol((vers+1), &minorp, 0); @@ -585,8 +593,8 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) return -EINVAL; if (kstrtouint(minorp+1, 0, &minor) < 0) return -EINVAL; - } else - minor = 0; + } + cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET; switch(num) { case 2: @@ -594,8 +602,20 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) nfsd_vers(num, cmd); break; case 4: - if (nfsd_minorversion(minor, cmd) >= 0) - break; + if (*minorp == '.') { + if (nfsd_minorversion(minor, cmd) < 0) + return -EINVAL; + } else if ((cmd == NFSD_SET) != nfsd_vers(num, NFSD_TEST)) { + /* + * Either we have +4 and no minors are enabled, + * or we have -4 and at least one minor is enabled. + * In either case, propagate 'cmd' to all minors. + */ + minor = 0; + while (nfsd_minorversion(minor, cmd) >= 0) + minor++; + } + break; default: return -EINVAL; } @@ -612,9 +632,11 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) sep = ""; remaining = SIMPLE_TRANSACTION_LIMIT; for (num=2 ; num <= 4 ; num++) { + int minor; if (!nfsd_vers(num, NFSD_AVAIL)) continue; - minor = 0; + + minor = -1; do { len = nfsd_print_version_support(buf, remaining, sep, num, minor); @@ -624,7 +646,8 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) buf += len; tlen += len; minor++; - sep = " "; + if (len) + sep = " "; } while (num == 4 && minor <= NFSD_SUPPORTED_MINOR_VERSION); } out: -- cgit v1.2.3 From 928c6fb3a9bfd6c5b287aa3465226add551c13c0 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 10 Mar 2017 11:36:39 +1100 Subject: NFSD: fix nfsd_minorversion(.., NFSD_AVAIL) Current code will return 1 if the version is supported, and -1 if it isn't. This is confusing and inconsistent with the one place where this is used. So change to return 1 if it is supported, and zero if not. i.e. an error is never returned. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 786a4a2cb2d7..892137b1e330 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -167,7 +167,8 @@ nfsd_adjust_nfsd_versions4(void) int nfsd_minorversion(u32 minorversion, enum vers_op change) { - if (minorversion > NFSD_SUPPORTED_MINOR_VERSION) + if (minorversion > NFSD_SUPPORTED_MINOR_VERSION && + change != NFSD_AVAIL) return -1; switch(change) { case NFSD_SET: -- cgit v1.2.3 From 800a938f0bf9130c8256116649c0cc5806bfb2fd Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 10 Mar 2017 11:36:39 +1100 Subject: NFSD: fix nfsd_reset_versions for NFSv4. If you write "-2 -3 -4" to the "versions" file, it will notice that no versions are enabled, and nfsd_reset_versions() is called. This enables all major versions, not no minor versions. So we lose the invariant that NFSv4 is only advertised when at least one minor is enabled. Fix the code to explicitly enable minor versions for v4, change it to use nfsd_vers() to test and set, and simplify the code. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 892137b1e330..31e1f9593457 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -416,23 +416,20 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net) void nfsd_reset_versions(void) { - int found_one = 0; int i; - for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) { - if (nfsd_program.pg_vers[i]) - found_one = 1; - } + for (i = 0; i < NFSD_NRVERS; i++) + if (nfsd_vers(i, NFSD_TEST)) + return; - if (!found_one) { - for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) - nfsd_program.pg_vers[i] = nfsd_version[i]; -#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) - for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++) - nfsd_acl_program.pg_vers[i] = - nfsd_acl_version[i]; -#endif - } + for (i = 0; i < NFSD_NRVERS; i++) + if (i != 4) + nfsd_vers(i, NFSD_SET); + else { + int minor = 0; + while (nfsd_minorversion(minor, NFSD_SET) >= 0) + minor++; + } } /* -- cgit v1.2.3 From a4c2a13129f7c5bcf81704c06851601593303fd5 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 28 Feb 2017 17:14:41 -0800 Subject: Input: i8042 - add TUXEDO BU1406 (N24_25BU) to the nomux list TUXEDO BU1406 does not implement active multiplexing mode properly, and takes around 550 ms in i8042_set_mux_mode(). Given that the device does not have external AUX port, there is no downside in disabling the MUX mode. Reported-by: Paul Menzel Suggested-by: Vojtech Pavlik Reviewed-by: Marcos Paulo de Souza Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-x86ia64io.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index e856b073d533..312bd6ca9198 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -520,6 +520,13 @@ static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "IC4I"), }, }, + { + /* TUXEDO BU1406 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Notebook"), + DMI_MATCH(DMI_PRODUCT_NAME, "N24_25BU"), + }, + }, { } }; -- cgit v1.2.3 From 92ef6f97a66e580189a41a132d0f8a9f78d6ddce Mon Sep 17 00:00:00 2001 From: Matjaz Hegedic Date: Fri, 10 Mar 2017 14:33:09 -0800 Subject: Input: elan_i2c - add ASUS EeeBook X205TA special touchpad fw EeeBook X205TA is yet another ASUS device with a special touchpad firmware that needs to be accounted for during initialization, or else the touchpad will go into an invalid state upon suspend/resume. Adding the appropriate ic_type and product_id check fixes the problem. Signed-off-by: Matjaz Hegedic Acked-by: KT Liao Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_core.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 352050e9031d..d5ab9ddef3e3 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -218,17 +218,19 @@ static int elan_query_product(struct elan_tp_data *data) static int elan_check_ASUS_special_fw(struct elan_tp_data *data) { - if (data->ic_type != 0x0E) - return false; - - switch (data->product_id) { - case 0x05 ... 0x07: - case 0x09: - case 0x13: + if (data->ic_type == 0x0E) { + switch (data->product_id) { + case 0x05 ... 0x07: + case 0x09: + case 0x13: + return true; + } + } else if (data->ic_type == 0x08 && data->product_id == 0x26) { + /* ASUS EeeBook X205TA */ return true; - default: - return false; } + + return false; } static int __elan_initialize(struct elan_tp_data *data) -- cgit v1.2.3 From 0134ed4fb9e78672ee9f7b18007114404c81e63f Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 10 Mar 2017 13:24:22 -0700 Subject: device-dax: fix pmd/pte fault fallback handling Jeff Moyer reports: With a device dax alignment of 4KB or 2MB, I get sigbus when running the attached fio job file for the current kernel (4.11.0-rc1+). If I specify an alignment of 1GB, it works. I turned on debug output, and saw that it was failing in the huge fault code. dax dax1.0: dax_open dax dax1.0: dax_mmap dax dax1.0: dax_dev_huge_fault: fio: write (0x7f08f0a00000 - dax dax1.0: __dax_dev_pud_fault: phys_to_pgoff(0xffffffffcf60 dax dax1.0: dax_release fio config for reproduce: [global] ioengine=dev-dax direct=0 filename=/dev/dax0.0 bs=2m [write] rw=write [read] stonewall rw=read The driver fails to fallback when taking a fault that is larger than the device alignment, or handling a larger fault when a smaller mapping is already established. While we could support larger mappings for a device with a smaller alignment, that change is too large for the immediate fix. The simplest change is to force fallback until the fault size matches the alignment. Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap") Cc: Reported-by: Jeff Moyer Signed-off-by: Dave Jiang Signed-off-by: Dan Williams --- drivers/dax/dax.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c index 8d9829ff2a78..a284dc532e46 100644 --- a/drivers/dax/dax.c +++ b/drivers/dax/dax.c @@ -427,6 +427,7 @@ static int __dax_dev_pte_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) int rc = VM_FAULT_SIGBUS; phys_addr_t phys; pfn_t pfn; + unsigned int fault_size = PAGE_SIZE; if (check_vma(dax_dev, vmf->vma, __func__)) return VM_FAULT_SIGBUS; @@ -437,6 +438,9 @@ static int __dax_dev_pte_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } + if (fault_size != dax_region->align) + return VM_FAULT_SIGBUS; + phys = pgoff_to_phys(dax_dev, vmf->pgoff, PAGE_SIZE); if (phys == -1) { dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__, @@ -464,6 +468,7 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) phys_addr_t phys; pgoff_t pgoff; pfn_t pfn; + unsigned int fault_size = PMD_SIZE; if (check_vma(dax_dev, vmf->vma, __func__)) return VM_FAULT_SIGBUS; @@ -480,6 +485,16 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } + if (fault_size < dax_region->align) + return VM_FAULT_SIGBUS; + else if (fault_size > dax_region->align) + return VM_FAULT_FALLBACK; + + /* if we are outside of the VMA */ + if (pmd_addr < vmf->vma->vm_start || + (pmd_addr + PMD_SIZE) > vmf->vma->vm_end) + return VM_FAULT_SIGBUS; + pgoff = linear_page_index(vmf->vma, pmd_addr); phys = pgoff_to_phys(dax_dev, pgoff, PMD_SIZE); if (phys == -1) { -- cgit v1.2.3 From 4607ebf04b8190d2c7aa5504959e9fddfc0cd736 Mon Sep 17 00:00:00 2001 From: "Allan, Bruce W" Date: Tue, 7 Mar 2017 15:48:23 -0800 Subject: kbuild: external module build warnings when KBUILD_OUTPUT set and W=1 Commit db547ef19064 ("Kbuild: don't add obj tree in additional includes") causes warnings (-Wmissing-include-dirs) when compiling external modules with KBUILD_OUTPUT set and W=1. This is because $src can be an absolute path to the external module source which when prefixed with -I$(srctree)/ generates an incorrect directory path. Signed-off-by: Bruce Allan Acked-by: Arnd Bergmann Signed-off-by: Masahiro Yamada --- scripts/Makefile.lib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 0a07f9014944..7234e61e7ce3 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -155,7 +155,7 @@ else # $(call addtree,-I$(obj)) locates .h files in srctree, from generated .c files # and locates generated .h files # FIXME: Replace both with specific CFLAGS* statements in the makefiles -__c_flags = $(if $(obj),-I$(srctree)/$(src) -I$(obj)) \ +__c_flags = $(if $(obj),$(call addtree,-I$(src)) -I$(obj)) \ $(call flags,_c_flags) __a_flags = $(call flags,_a_flags) __cpp_flags = $(call flags,_cpp_flags) -- cgit v1.2.3 From 70b085b06c4560a69e95607f77bb4c2b2e41943c Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 10 Mar 2017 13:24:27 -0700 Subject: device-dax: fix pud fault fallback handling Jeff Moyer reports: With a device dax alignment of 4KB or 2MB, I get sigbus when running the attached fio job file for the current kernel (4.11.0-rc1+). If I specify an alignment of 1GB, it works. I turned on debug output, and saw that it was failing in the huge fault code. dax dax1.0: dax_open dax dax1.0: dax_mmap dax dax1.0: dax_dev_huge_fault: fio: write (0x7f08f0a00000 - dax dax1.0: __dax_dev_pud_fault: phys_to_pgoff(0xffffffffcf60) dax dax1.0: dax_release fio config for reproduce: [global] ioengine=dev-dax direct=0 filename=/dev/dax0.0 bs=2m [write] rw=write [read] stonewall rw=read The driver fails to fallback when taking a fault that is larger than the device alignment, or handling a larger fault when a smaller mapping is already established. While we could support larger mappings for a device with a smaller alignment, that change is too large for the immediate fix. The simplest change is to force fallback until the fault size matches the alignment. Reported-by: Jeff Moyer Signed-off-by: Dave Jiang Signed-off-by: Dan Williams --- drivers/dax/dax.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c index a284dc532e46..523fecec7bda 100644 --- a/drivers/dax/dax.c +++ b/drivers/dax/dax.c @@ -518,6 +518,8 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) phys_addr_t phys; pgoff_t pgoff; pfn_t pfn; + unsigned int fault_size = PUD_SIZE; + if (check_vma(dax_dev, vmf->vma, __func__)) return VM_FAULT_SIGBUS; @@ -534,6 +536,16 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } + if (fault_size < dax_region->align) + return VM_FAULT_SIGBUS; + else if (fault_size > dax_region->align) + return VM_FAULT_FALLBACK; + + /* if we are outside of the VMA */ + if (pud_addr < vmf->vma->vm_start || + (pud_addr + PUD_SIZE) > vmf->vma->vm_end) + return VM_FAULT_SIGBUS; + pgoff = linear_page_index(vmf->vma, pud_addr); phys = pgoff_to_phys(dax_dev, pgoff, PUD_SIZE); if (phys == -1) { -- cgit v1.2.3 From 52084f89b38cdd896b59627c629915ef1a7bf615 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 9 Mar 2017 16:56:01 -0700 Subject: device-dax: fix debug output typo The debug output for return the return data of pgoff_to_phys() in the fault handlers has 'phys' and 'pgoff' incorrectly swapped. Reported-by: Jeff Moyer Signed-off-by: Dave Jiang Signed-off-by: Dan Williams --- drivers/dax/dax.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c index 523fecec7bda..80c6db279ae1 100644 --- a/drivers/dax/dax.c +++ b/drivers/dax/dax.c @@ -443,7 +443,7 @@ static int __dax_dev_pte_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) phys = pgoff_to_phys(dax_dev, vmf->pgoff, PAGE_SIZE); if (phys == -1) { - dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__, + dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__, vmf->pgoff); return VM_FAULT_SIGBUS; } @@ -498,7 +498,7 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) pgoff = linear_page_index(vmf->vma, pmd_addr); phys = pgoff_to_phys(dax_dev, pgoff, PMD_SIZE); if (phys == -1) { - dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__, + dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__, pgoff); return VM_FAULT_SIGBUS; } @@ -549,7 +549,7 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) pgoff = linear_page_index(vmf->vma, pud_addr); phys = pgoff_to_phys(dax_dev, pgoff, PUD_SIZE); if (phys == -1) { - dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__, + dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__, pgoff); return VM_FAULT_SIGBUS; } -- cgit v1.2.3 From c962cff17dfa11f4a8227ac16de2b28aea3312e4 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Fri, 3 Mar 2017 16:02:23 +0800 Subject: Revert "x86/acpi: Set persistent cpuid <-> nodeid mapping when booting" Revert: dc6db24d2476 ("x86/acpi: Set persistent cpuid <-> nodeid mapping when booting") The mapping of "cpuid <-> nodeid" is established at boot time via ACPI tables to keep associations of workqueues and other node related items consistent across cpu hotplug. But, ACPI tables are unreliable and failures with that boot time mapping have been reported on machines where the ACPI table and the physical information which is retrieved at actual hotplug is inconsistent. Revert the mapping implementation so it can be replaced with a less error prone approach. Signed-off-by: Dou Liyang Tested-by: Xiaolong Ye Cc: rjw@rjwysocki.net Cc: linux-acpi@vger.kernel.org Cc: guzheng1@huawei.com Cc: izumi.taku@jp.fujitsu.com Cc: lenb@kernel.org Link: http://lkml.kernel.org/r/1488528147-2279-2-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/boot.c | 2 +- drivers/acpi/acpi_processor.c | 5 --- drivers/acpi/bus.c | 1 - drivers/acpi/processor_core.c | 73 ------------------------------------------- include/linux/acpi.h | 3 -- 5 files changed, 1 insertion(+), 83 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index ae32838cac5f..f6b0e87d2388 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -710,7 +710,7 @@ static void __init acpi_set_irq_model_ioapic(void) #ifdef CONFIG_ACPI_HOTPLUG_CPU #include -int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) +static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) { #ifdef CONFIG_ACPI_NUMA int nid; diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 4467a8089ab8..5d208a99d0c9 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -182,11 +182,6 @@ int __weak arch_register_cpu(int cpu) void __weak arch_unregister_cpu(int cpu) {} -int __weak acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) -{ - return -ENODEV; -} - static int acpi_processor_hotadd_init(struct acpi_processor *pr) { unsigned long long sta; diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 80cb5eb75b63..34fbe027e73a 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -1249,7 +1249,6 @@ static int __init acpi_init(void) acpi_wakeup_device_init(); acpi_debugger_init(); acpi_setup_sb_notify_handler(); - acpi_set_processor_mapping(); return 0; } diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 611a5585a902..a84386204659 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -278,79 +278,6 @@ int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id) } EXPORT_SYMBOL_GPL(acpi_get_cpuid); -#ifdef CONFIG_ACPI_HOTPLUG_CPU -static bool __init -map_processor(acpi_handle handle, phys_cpuid_t *phys_id, int *cpuid) -{ - int type, id; - u32 acpi_id; - acpi_status status; - acpi_object_type acpi_type; - unsigned long long tmp; - union acpi_object object = { 0 }; - struct acpi_buffer buffer = { sizeof(union acpi_object), &object }; - - status = acpi_get_type(handle, &acpi_type); - if (ACPI_FAILURE(status)) - return false; - - switch (acpi_type) { - case ACPI_TYPE_PROCESSOR: - status = acpi_evaluate_object(handle, NULL, NULL, &buffer); - if (ACPI_FAILURE(status)) - return false; - acpi_id = object.processor.proc_id; - - /* validate the acpi_id */ - if(acpi_processor_validate_proc_id(acpi_id)) - return false; - break; - case ACPI_TYPE_DEVICE: - status = acpi_evaluate_integer(handle, "_UID", NULL, &tmp); - if (ACPI_FAILURE(status)) - return false; - acpi_id = tmp; - break; - default: - return false; - } - - type = (acpi_type == ACPI_TYPE_DEVICE) ? 1 : 0; - - *phys_id = __acpi_get_phys_id(handle, type, acpi_id, false); - id = acpi_map_cpuid(*phys_id, acpi_id); - - if (id < 0) - return false; - *cpuid = id; - return true; -} - -static acpi_status __init -set_processor_node_mapping(acpi_handle handle, u32 lvl, void *context, - void **rv) -{ - phys_cpuid_t phys_id; - int cpu_id; - - if (!map_processor(handle, &phys_id, &cpu_id)) - return AE_ERROR; - - acpi_map_cpu2node(handle, cpu_id, phys_id); - return AE_OK; -} - -void __init acpi_set_processor_mapping(void) -{ - /* Set persistent cpu <-> node mapping for all processors. */ - acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, set_processor_node_mapping, - NULL, NULL, NULL); -} -#else -void __init acpi_set_processor_mapping(void) {} -#endif /* CONFIG_ACPI_HOTPLUG_CPU */ - #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC static int get_ioapic_id(struct acpi_subtable_header *entry, u32 gsi_base, u64 *phys_addr, int *ioapic_id) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 673acda012af..63a7519b00cc 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -294,11 +294,8 @@ bool acpi_processor_validate_proc_id(int proc_id); int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int *pcpu); int acpi_unmap_cpu(int cpu); -int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid); #endif /* CONFIG_ACPI_HOTPLUG_CPU */ -void acpi_set_processor_mapping(void); - #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr); #endif -- cgit v1.2.3 From 09c3f2bd5c7e5f18687663acb6adc6b167484ca5 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Fri, 3 Mar 2017 16:02:24 +0800 Subject: Revert"x86/acpi: Enable MADT APIs to return disabled apicids" Revert: 8ad893faf2ea ("x86/acpi: Enable MADT APIs to return disabled apicids") Remove the leftovers of the boot time 'cpuid <-> nodeid' mapping approach. Signed-off-by: Dou Liyang Tested-by: Xiaolong Ye Cc: rjw@rjwysocki.net Cc: linux-acpi@vger.kernel.org Cc: guzheng1@huawei.com Cc: izumi.taku@jp.fujitsu.com Cc: lenb@kernel.org Link: http://lkml.kernel.org/r/1488528147-2279-3-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner --- drivers/acpi/processor_core.c | 60 ++++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 38 deletions(-) diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index a84386204659..b933061b6b60 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -32,12 +32,12 @@ static struct acpi_table_madt *get_madt_table(void) } static int map_lapic_id(struct acpi_subtable_header *entry, - u32 acpi_id, phys_cpuid_t *apic_id, bool ignore_disabled) + u32 acpi_id, phys_cpuid_t *apic_id) { struct acpi_madt_local_apic *lapic = container_of(entry, struct acpi_madt_local_apic, header); - if (ignore_disabled && !(lapic->lapic_flags & ACPI_MADT_ENABLED)) + if (!(lapic->lapic_flags & ACPI_MADT_ENABLED)) return -ENODEV; if (lapic->processor_id != acpi_id) @@ -48,13 +48,12 @@ static int map_lapic_id(struct acpi_subtable_header *entry, } static int map_x2apic_id(struct acpi_subtable_header *entry, - int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id, - bool ignore_disabled) + int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id) { struct acpi_madt_local_x2apic *apic = container_of(entry, struct acpi_madt_local_x2apic, header); - if (ignore_disabled && !(apic->lapic_flags & ACPI_MADT_ENABLED)) + if (!(apic->lapic_flags & ACPI_MADT_ENABLED)) return -ENODEV; if (device_declaration && (apic->uid == acpi_id)) { @@ -66,13 +65,12 @@ static int map_x2apic_id(struct acpi_subtable_header *entry, } static int map_lsapic_id(struct acpi_subtable_header *entry, - int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id, - bool ignore_disabled) + int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id) { struct acpi_madt_local_sapic *lsapic = container_of(entry, struct acpi_madt_local_sapic, header); - if (ignore_disabled && !(lsapic->lapic_flags & ACPI_MADT_ENABLED)) + if (!(lsapic->lapic_flags & ACPI_MADT_ENABLED)) return -ENODEV; if (device_declaration) { @@ -89,13 +87,12 @@ static int map_lsapic_id(struct acpi_subtable_header *entry, * Retrieve the ARM CPU physical identifier (MPIDR) */ static int map_gicc_mpidr(struct acpi_subtable_header *entry, - int device_declaration, u32 acpi_id, phys_cpuid_t *mpidr, - bool ignore_disabled) + int device_declaration, u32 acpi_id, phys_cpuid_t *mpidr) { struct acpi_madt_generic_interrupt *gicc = container_of(entry, struct acpi_madt_generic_interrupt, header); - if (ignore_disabled && !(gicc->flags & ACPI_MADT_ENABLED)) + if (!(gicc->flags & ACPI_MADT_ENABLED)) return -ENODEV; /* device_declaration means Device object in DSDT, in the @@ -112,7 +109,7 @@ static int map_gicc_mpidr(struct acpi_subtable_header *entry, } static phys_cpuid_t map_madt_entry(struct acpi_table_madt *madt, - int type, u32 acpi_id, bool ignore_disabled) + int type, u32 acpi_id) { unsigned long madt_end, entry; phys_cpuid_t phys_id = PHYS_CPUID_INVALID; /* CPU hardware ID */ @@ -130,20 +127,16 @@ static phys_cpuid_t map_madt_entry(struct acpi_table_madt *madt, struct acpi_subtable_header *header = (struct acpi_subtable_header *)entry; if (header->type == ACPI_MADT_TYPE_LOCAL_APIC) { - if (!map_lapic_id(header, acpi_id, &phys_id, - ignore_disabled)) + if (!map_lapic_id(header, acpi_id, &phys_id)) break; } else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC) { - if (!map_x2apic_id(header, type, acpi_id, &phys_id, - ignore_disabled)) + if (!map_x2apic_id(header, type, acpi_id, &phys_id)) break; } else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) { - if (!map_lsapic_id(header, type, acpi_id, &phys_id, - ignore_disabled)) + if (!map_lsapic_id(header, type, acpi_id, &phys_id)) break; } else if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT) { - if (!map_gicc_mpidr(header, type, acpi_id, &phys_id, - ignore_disabled)) + if (!map_gicc_mpidr(header, type, acpi_id, &phys_id)) break; } entry += header->length; @@ -161,15 +154,14 @@ phys_cpuid_t __init acpi_map_madt_entry(u32 acpi_id) if (!madt) return PHYS_CPUID_INVALID; - rv = map_madt_entry(madt, 1, acpi_id, true); + rv = map_madt_entry(madt, 1, acpi_id); acpi_put_table((struct acpi_table_header *)madt); return rv; } -static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id, - bool ignore_disabled) +static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id) { struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *obj; @@ -190,38 +182,30 @@ static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id, header = (struct acpi_subtable_header *)obj->buffer.pointer; if (header->type == ACPI_MADT_TYPE_LOCAL_APIC) - map_lapic_id(header, acpi_id, &phys_id, ignore_disabled); + map_lapic_id(header, acpi_id, &phys_id); else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) - map_lsapic_id(header, type, acpi_id, &phys_id, ignore_disabled); + map_lsapic_id(header, type, acpi_id, &phys_id); else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC) - map_x2apic_id(header, type, acpi_id, &phys_id, ignore_disabled); + map_x2apic_id(header, type, acpi_id, &phys_id); else if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT) - map_gicc_mpidr(header, type, acpi_id, &phys_id, - ignore_disabled); + map_gicc_mpidr(header, type, acpi_id, &phys_id); exit: kfree(buffer.pointer); return phys_id; } -static phys_cpuid_t __acpi_get_phys_id(acpi_handle handle, int type, - u32 acpi_id, bool ignore_disabled) +phys_cpuid_t acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id) { phys_cpuid_t phys_id; - phys_id = map_mat_entry(handle, type, acpi_id, ignore_disabled); + phys_id = map_mat_entry(handle, type, acpi_id); if (invalid_phys_cpuid(phys_id)) - phys_id = map_madt_entry(get_madt_table(), type, acpi_id, - ignore_disabled); + phys_id = map_madt_entry(get_madt_table(), type, acpi_id); return phys_id; } -phys_cpuid_t acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id) -{ - return __acpi_get_phys_id(handle, type, acpi_id, true); -} - int acpi_map_cpuid(phys_cpuid_t phys_id, u32 acpi_id) { #ifdef CONFIG_SMP -- cgit v1.2.3 From 2b85b3d22920db7473e5fed5719e7955c0ec323e Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Fri, 3 Mar 2017 16:02:25 +0800 Subject: x86/acpi: Restore the order of CPU IDs The following commits: f7c28833c2 ("x86/acpi: Enable acpi to register all possible cpus at boot time") and 8f54969dc8 ("x86/acpi: Introduce persistent storage for cpuid <-> apicid mapping") ... registered all the possible CPUs at boot time via ACPI tables to make the mapping of cpuid <-> apicid fixed. Both enabled and disabled CPUs could have a logical CPU ID after boot time. But, ACPI tables are unreliable. the number amd order of Local APIC entries which depends on the firmware is often inconsistent with the physical devices. Even if they are consistent, The disabled CPUs which take up some logical CPU IDs will also make the order discontinuous. Revert the part of disabled CPUs registration, keep the allocation logic of logical CPU IDs and also keep some code location changes. Signed-off-by: Dou Liyang Tested-by: Xiaolong Ye Cc: rjw@rjwysocki.net Cc: linux-acpi@vger.kernel.org Cc: guzheng1@huawei.com Cc: izumi.taku@jp.fujitsu.com Cc: lenb@kernel.org Link: http://lkml.kernel.org/r/1488528147-2279-4-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/boot.c | 7 ++++++- arch/x86/kernel/apic/apic.c | 26 +++++++------------------- 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index f6b0e87d2388..b2879cc23db4 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -179,10 +179,15 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled) return -EINVAL; } + if (!enabled) { + ++disabled_cpus; + return -EINVAL; + } + if (boot_cpu_physical_apicid != -1U) ver = boot_cpu_apic_version; - cpu = __generic_processor_info(id, ver, enabled); + cpu = generic_processor_info(id, ver); if (cpu >= 0) early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index aee7deddabd0..8ccb7ef512e0 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2063,7 +2063,7 @@ static int allocate_logical_cpuid(int apicid) return nr_logical_cpuids++; } -int __generic_processor_info(int apicid, int version, bool enabled) +int generic_processor_info(int apicid, int version) { int cpu, max = nr_cpu_ids; bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid, @@ -2121,11 +2121,9 @@ int __generic_processor_info(int apicid, int version, bool enabled) if (num_processors >= nr_cpu_ids) { int thiscpu = max + disabled_cpus; - if (enabled) { - pr_warning("APIC: NR_CPUS/possible_cpus limit of %i " - "reached. Processor %d/0x%x ignored.\n", - max, thiscpu, apicid); - } + pr_warning("APIC: NR_CPUS/possible_cpus limit of %i " + "reached. Processor %d/0x%x ignored.\n", + max, thiscpu, apicid); disabled_cpus++; return -EINVAL; @@ -2177,23 +2175,13 @@ int __generic_processor_info(int apicid, int version, bool enabled) apic->x86_32_early_logical_apicid(cpu); #endif set_cpu_possible(cpu, true); - - if (enabled) { - num_processors++; - physid_set(apicid, phys_cpu_present_map); - set_cpu_present(cpu, true); - } else { - disabled_cpus++; - } + physid_set(apicid, phys_cpu_present_map); + set_cpu_present(cpu, true); + num_processors++; return cpu; } -int generic_processor_info(int apicid, int version) -{ - return __generic_processor_info(apicid, version, true); -} - int hard_smp_processor_id(void) { return read_apic_id(); -- cgit v1.2.3 From 8c8cb30f49b86333d8e036e1945cf1a78c03577e Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Fri, 3 Mar 2017 16:02:26 +0800 Subject: acpi/processor: Implement DEVICE operator for processor enumeration ACPI allows to declare processors either with the PROCESSOR or with the DEVICE operator. The current implementation handles only the PROCESSOR operator. On a system which uses the DEVICE operator for processor enumeration the evaluation fails. Check for the ACPI type of the ACPI handle and evaluate PROCESSOR and DEVICE types separately. Signed-off-by: Dou Liyang Tested-by: Xiaolong Ye Cc: rjw@rjwysocki.net Cc: linux-acpi@vger.kernel.org Cc: guzheng1@huawei.com Cc: izumi.taku@jp.fujitsu.com Cc: lenb@kernel.org Link: http://lkml.kernel.org/r/1488528147-2279-5-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner --- drivers/acpi/acpi_processor.c | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 5d208a99d0c9..9a98d7e00200 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -633,25 +633,50 @@ static acpi_status __init acpi_processor_ids_walk(acpi_handle handle, void **rv) { acpi_status status; + acpi_object_type acpi_type; + unsigned long long uid; union acpi_object object = { 0 }; struct acpi_buffer buffer = { sizeof(union acpi_object), &object }; - status = acpi_evaluate_object(handle, NULL, NULL, &buffer); + status = acpi_get_type(handle, &acpi_type); if (ACPI_FAILURE(status)) - acpi_handle_info(handle, "Not get the processor object\n"); - else - processor_validated_ids_update(object.processor.proc_id); + return false; + + switch (acpi_type) { + case ACPI_TYPE_PROCESSOR: + status = acpi_evaluate_object(handle, NULL, NULL, &buffer); + if (ACPI_FAILURE(status)) + goto err; + uid = object.processor.proc_id; + break; + + case ACPI_TYPE_DEVICE: + status = acpi_evaluate_integer(handle, "_UID", NULL, &uid); + if (ACPI_FAILURE(status)) + goto err; + break; + default: + goto err; + } + + processor_validated_ids_update(uid); + return true; + +err: + acpi_handle_info(handle, "Invalid processor object\n"); + return false; - return AE_OK; } -static void __init acpi_processor_check_duplicates(void) +void __init acpi_processor_check_duplicates(void) { - /* Search all processor nodes in ACPI namespace */ + /* check the correctness for all processors in ACPI namespace */ acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, acpi_processor_ids_walk, NULL, NULL, NULL); + acpi_get_devices(ACPI_PROCESSOR_DEVICE_HID, acpi_processor_ids_walk, + NULL, NULL); } bool __init acpi_processor_validate_proc_id(int proc_id) -- cgit v1.2.3 From a77d6cd968497792e072b74dff45b891ba778ddb Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Fri, 3 Mar 2017 16:02:27 +0800 Subject: acpi/processor: Check for duplicate processor ids at hotplug time The check for duplicate processor ids happens at boot time based on the ACPI table contents, but the final sanity checks for a processor happen at hotplug time. At hotplug time, where the physical information is available, which might differ from the ACPI table information, a check for duplicate processor ids is missing. Add it to the hotplug checks and rename the function so it better reflects its purpose. Signed-off-by: Dou Liyang Tested-by: Xiaolong Ye Cc: rjw@rjwysocki.net Cc: linux-acpi@vger.kernel.org Cc: guzheng1@huawei.com Cc: izumi.taku@jp.fujitsu.com Cc: lenb@kernel.org Link: http://lkml.kernel.org/r/1488528147-2279-6-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner --- drivers/acpi/acpi_processor.c | 13 ++++++++++--- include/linux/acpi.h | 2 +- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 9a98d7e00200..0143135b3abe 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -280,6 +280,13 @@ static int acpi_processor_get_info(struct acpi_device *device) pr->acpi_id = value; } + if (acpi_duplicate_processor_id(pr->acpi_id)) { + dev_err(&device->dev, + "Failed to get unique processor _UID (0x%x)\n", + pr->acpi_id); + return -ENODEV; + } + pr->phys_id = acpi_get_phys_id(pr->handle, device_declaration, pr->acpi_id); if (invalid_phys_cpuid(pr->phys_id)) @@ -580,7 +587,7 @@ static struct acpi_scan_handler processor_container_handler = { static int nr_unique_ids __initdata; /* The number of the duplicate processor IDs */ -static int nr_duplicate_ids __initdata; +static int nr_duplicate_ids; /* Used to store the unique processor IDs */ static int unique_processor_ids[] __initdata = { @@ -588,7 +595,7 @@ static int unique_processor_ids[] __initdata = { }; /* Used to store the duplicate processor IDs */ -static int duplicate_processor_ids[] __initdata = { +static int duplicate_processor_ids[] = { [0 ... NR_CPUS - 1] = -1, }; @@ -679,7 +686,7 @@ void __init acpi_processor_check_duplicates(void) NULL, NULL); } -bool __init acpi_processor_validate_proc_id(int proc_id) +bool acpi_duplicate_processor_id(int proc_id) { int i; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 63a7519b00cc..9b05886f9773 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -287,7 +287,7 @@ static inline bool invalid_phys_cpuid(phys_cpuid_t phys_id) } /* Validate the processor object's proc_id */ -bool acpi_processor_validate_proc_id(int proc_id); +bool acpi_duplicate_processor_id(int proc_id); #ifdef CONFIG_ACPI_HOTPLUG_CPU /* Arch dependent functions for cpu hotplug support */ -- cgit v1.2.3 From 33d8c15559df4f0bce25d7e16ebb5879e249f2e7 Mon Sep 17 00:00:00 2001 From: Romain Izard Date: Thu, 9 Mar 2017 17:58:38 +0100 Subject: Revert "clocksource/drivers/tcb_clksrc: Use 32 bit tcb as sched_clock" This reverts commit 7b9f1d16e6d1 ("clocksource/drivers/tcb_clksrc: Use 32 bit tcb as sched_clock"). In the current state, the kernel warns against a late registration of the new sched_clock, the printk clock resets after only a few minutes, and it seems that scheduling can be affected as well. Signed-off-by: Romain Izard Signed-off-by: Daniel Lezcano --- drivers/clocksource/tcb_clksrc.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c index 745844ee973e..d4ca9962a759 100644 --- a/drivers/clocksource/tcb_clksrc.c +++ b/drivers/clocksource/tcb_clksrc.c @@ -10,7 +10,6 @@ #include #include #include -#include /* @@ -57,14 +56,9 @@ static u64 tc_get_cycles(struct clocksource *cs) return (upper << 16) | lower; } -static u32 tc_get_cv32(void) -{ - return __raw_readl(tcaddr + ATMEL_TC_REG(0, CV)); -} - static u64 tc_get_cycles32(struct clocksource *cs) { - return tc_get_cv32(); + return __raw_readl(tcaddr + ATMEL_TC_REG(0, CV)); } static struct clocksource clksrc = { @@ -75,11 +69,6 @@ static struct clocksource clksrc = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -static u64 notrace tc_read_sched_clock(void) -{ - return tc_get_cv32(); -} - #ifdef CONFIG_GENERIC_CLOCKEVENTS struct tc_clkevt_device { @@ -350,9 +339,6 @@ static int __init tcb_clksrc_init(void) clksrc.read = tc_get_cycles32; /* setup ony channel 0 */ tcb_setup_single_chan(tc, best_divisor_idx); - - /* register sched_clock on chips with single 32 bit counter */ - sched_clock_register(tc_read_sched_clock, 32, divided_rate); } else { /* tclib will give us three clocks no matter what the * underlying platform supports. -- cgit v1.2.3 From 088db931e065bd3b159fa2e588eab859ef0d9d23 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 10 Mar 2017 18:33:28 +0000 Subject: thermal: Fix potential deadlock in cpu_cooling cooling_list_lock is covering not just cpufreq_dev_count, but also the calls to cpufreq_register_notifier() and cpufreq_unregister_notifier(). Since cooling_list_lock is also used within cpufreq_thermal_notifier(), lockdep reports a potential deadlock. Fix it by testing the condition under cooling_list_lock and dropping the lock before calling cpufreq_register_notifier(). And variable cpufreq_dev_count is removed at the same time, because it's no longer needed after the fix. Fixes: ae606089621e ("thermal: convert cpu_cooling to use an IDA") Reported-and-Tested-by: Russell King Signed-off-by: Matthew Wilcox Acked-by: Rafael J. Wysocki --- drivers/thermal/cpu_cooling.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 91048eeca28b..11b5ea685518 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -107,8 +107,6 @@ struct cpufreq_cooling_device { }; static DEFINE_IDA(cpufreq_ida); -static unsigned int cpufreq_dev_count; - static DEFINE_MUTEX(cooling_list_lock); static LIST_HEAD(cpufreq_dev_list); @@ -771,6 +769,7 @@ __cpufreq_cooling_register(struct device_node *np, unsigned int freq, i, num_cpus; int ret; struct thermal_cooling_device_ops *cooling_ops; + bool first; if (!alloc_cpumask_var(&temp_mask, GFP_KERNEL)) return ERR_PTR(-ENOMEM); @@ -874,13 +873,14 @@ __cpufreq_cooling_register(struct device_node *np, cpufreq_dev->cool_dev = cool_dev; mutex_lock(&cooling_list_lock); + /* Register the notifier for first cpufreq cooling device */ + first = list_empty(&cpufreq_dev_list); list_add(&cpufreq_dev->node, &cpufreq_dev_list); + mutex_unlock(&cooling_list_lock); - /* Register the notifier for first cpufreq cooling device */ - if (!cpufreq_dev_count++) + if (first) cpufreq_register_notifier(&thermal_cpufreq_notifier_block, CPUFREQ_POLICY_NOTIFIER); - mutex_unlock(&cooling_list_lock); goto put_policy; @@ -1021,6 +1021,7 @@ EXPORT_SYMBOL(of_cpufreq_power_cooling_register); void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) { struct cpufreq_cooling_device *cpufreq_dev; + bool last; if (!cdev) return; @@ -1028,14 +1029,15 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) cpufreq_dev = cdev->devdata; mutex_lock(&cooling_list_lock); + list_del(&cpufreq_dev->node); /* Unregister the notifier for the last cpufreq cooling device */ - if (!--cpufreq_dev_count) + last = list_empty(&cpufreq_dev_list); + mutex_unlock(&cooling_list_lock); + + if (last) cpufreq_unregister_notifier(&thermal_cpufreq_notifier_block, CPUFREQ_POLICY_NOTIFIER); - list_del(&cpufreq_dev->node); - mutex_unlock(&cooling_list_lock); - thermal_cooling_device_unregister(cpufreq_dev->cool_dev); ida_simple_remove(&cpufreq_ida, cpufreq_dev->id); kfree(cpufreq_dev->dyn_power_table); -- cgit v1.2.3 From a4e49c9bc98d099b2a19933b9da55cd0ad1da421 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 7 Feb 2017 09:40:01 +0530 Subject: thermal: devfreq: Simplify expression There is no need to check for IS_ERR() as we are looking for a very particular error value here. Drop the first check. Reported-by: Dan Carpenter Signed-off-by: Viresh Kumar Signed-off-by: Zhang Rui --- drivers/thermal/devfreq_cooling.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c index 7743a78d4723..4793fc7b06dd 100644 --- a/drivers/thermal/devfreq_cooling.c +++ b/drivers/thermal/devfreq_cooling.c @@ -186,7 +186,7 @@ get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq) return 0; opp = dev_pm_opp_find_freq_exact(dev, freq, true); - if (IS_ERR(opp) && (PTR_ERR(opp) == -ERANGE)) + if (PTR_ERR(opp) == -ERANGE) opp = dev_pm_opp_find_freq_exact(dev, freq, false); voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */ -- cgit v1.2.3 From 8327b830f293ff48eaade64b4fc2e247b7655615 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 7 Feb 2017 09:40:02 +0530 Subject: thermal: devfreq_cooling: Replace dev_warn with dev_err There isn't much the user can do on seeing this warning, as the hardware is actually okay. dev_err suits much better here. Signed-off-by: Viresh Kumar Signed-off-by: Zhang Rui --- drivers/thermal/devfreq_cooling.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c index 4793fc7b06dd..e99d0e026a53 100644 --- a/drivers/thermal/devfreq_cooling.c +++ b/drivers/thermal/devfreq_cooling.c @@ -193,9 +193,9 @@ get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq) dev_pm_opp_put(opp); if (voltage == 0) { - dev_warn_ratelimited(dev, - "Failed to get voltage for frequency %lu: %ld\n", - freq, IS_ERR(opp) ? PTR_ERR(opp) : 0); + dev_err_ratelimited(dev, + "Failed to get voltage for frequency %lu: %ld\n", + freq, IS_ERR(opp) ? PTR_ERR(opp) : 0); return 0; } -- cgit v1.2.3 From afd1f4e0a76f0c56422af7056942f28578f1643e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 7 Feb 2017 09:40:03 +0530 Subject: thermal: devfreq: Check OPP for errors It is possible for dev_pm_opp_find_freq_exact() to return errors. It was all fine earlier as dev_pm_opp_get_voltage() had a check within it to check for invalid OPPs, but dev_pm_opp_put() doesn't have any similar checks and the callers need to make sure OPP is valid before calling them. Also update the later dev_warn_ratelimited() to not print the error message as the OPP is guaranteed to be valid now. Reported-by: Dan Carpenter Signed-off-by: Viresh Kumar Signed-off-by: Zhang Rui --- drivers/thermal/devfreq_cooling.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c index e99d0e026a53..4bf4ad58cffd 100644 --- a/drivers/thermal/devfreq_cooling.c +++ b/drivers/thermal/devfreq_cooling.c @@ -189,13 +189,19 @@ get_static_power(struct devfreq_cooling_device *dfc, unsigned long freq) if (PTR_ERR(opp) == -ERANGE) opp = dev_pm_opp_find_freq_exact(dev, freq, false); + if (IS_ERR(opp)) { + dev_err_ratelimited(dev, "Failed to find OPP for frequency %lu: %ld\n", + freq, PTR_ERR(opp)); + return 0; + } + voltage = dev_pm_opp_get_voltage(opp) / 1000; /* mV */ dev_pm_opp_put(opp); if (voltage == 0) { dev_err_ratelimited(dev, - "Failed to get voltage for frequency %lu: %ld\n", - freq, IS_ERR(opp) ? PTR_ERR(opp) : 0); + "Failed to get voltage for frequency %lu\n", + freq); return 0; } -- cgit v1.2.3 From 9aec9082bf3c0fb83020a0f562c9cc8078ac91f1 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 7 Feb 2017 09:40:04 +0530 Subject: thermal: cpu_cooling: Replace dev_warn with dev_err There isn't much the user can do on seeing these warnings, as the hardware is actually okay. dev_err suits much better here. Signed-off-by: Viresh Kumar Signed-off-by: Zhang Rui --- drivers/thermal/cpu_cooling.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 11b5ea685518..59f0bd53f329 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -397,9 +397,9 @@ static int get_static_power(struct cpufreq_cooling_device *cpufreq_device, dev_pm_opp_put(opp); if (voltage == 0) { - dev_warn_ratelimited(cpufreq_device->cpu_dev, - "Failed to get voltage for frequency %lu: %ld\n", - freq_hz, IS_ERR(opp) ? PTR_ERR(opp) : 0); + dev_err_ratelimited(cpufreq_device->cpu_dev, + "Failed to get voltage for frequency %lu: %ld\n", + freq_hz, IS_ERR(opp) ? PTR_ERR(opp) : 0); return -EINVAL; } @@ -691,9 +691,9 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev, *state = cpufreq_cooling_get_level(cpu, target_freq); if (*state == THERMAL_CSTATE_INVALID) { - dev_warn_ratelimited(&cdev->device, - "Failed to convert %dKHz for cpu %d into a cdev state\n", - target_freq, cpu); + dev_err_ratelimited(&cdev->device, + "Failed to convert %dKHz for cpu %d into a cdev state\n", + target_freq, cpu); return -EINVAL; } -- cgit v1.2.3 From 3ea3217cf91804be6ed9b368ef4ac7911eb1dadc Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 7 Feb 2017 09:40:05 +0530 Subject: thermal: cpu_cooling: Check OPP for errors It is possible for dev_pm_opp_find_freq_exact() to return errors. It was all fine earlier as dev_pm_opp_get_voltage() had a check within it to check for invalid OPPs, but dev_pm_opp_put() doesn't have any similar checks and the callers need to make sure OPP is valid before calling them. Also update the later dev_warn_ratelimited() to not print the error message as the OPP is guaranteed to be valid now. Reported-by: Dan Carpenter Signed-off-by: Viresh Kumar Signed-off-by: Zhang Rui --- drivers/thermal/cpu_cooling.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 59f0bd53f329..69d0f430b2d1 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -393,13 +393,20 @@ static int get_static_power(struct cpufreq_cooling_device *cpufreq_device, opp = dev_pm_opp_find_freq_exact(cpufreq_device->cpu_dev, freq_hz, true); + if (IS_ERR(opp)) { + dev_warn_ratelimited(cpufreq_device->cpu_dev, + "Failed to find OPP for frequency %lu: %ld\n", + freq_hz, PTR_ERR(opp)); + return -EINVAL; + } + voltage = dev_pm_opp_get_voltage(opp); dev_pm_opp_put(opp); if (voltage == 0) { dev_err_ratelimited(cpufreq_device->cpu_dev, - "Failed to get voltage for frequency %lu: %ld\n", - freq_hz, IS_ERR(opp) ? PTR_ERR(opp) : 0); + "Failed to get voltage for frequency %lu\n", + freq_hz); return -EINVAL; } -- cgit v1.2.3 From bafa687dccbe16cbf9b1824409836d79d6dc6543 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 23 Feb 2017 12:31:54 +0200 Subject: extcon: int3496: Propagate error code of gpiod_to_irq() gpiod_to_irq() doesn't return 0. Thus, we just adjust condition and replace -EINVAL by actual error code it returns. Signed-off-by: Andy Shevchenko Signed-off-by: Chanwoo Choi --- drivers/extcon/extcon-intel-int3496.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/extcon/extcon-intel-int3496.c b/drivers/extcon/extcon-intel-int3496.c index a3131b036de6..38eb6cab938f 100644 --- a/drivers/extcon/extcon-intel-int3496.c +++ b/drivers/extcon/extcon-intel-int3496.c @@ -100,9 +100,9 @@ static int int3496_probe(struct platform_device *pdev) } data->usb_id_irq = gpiod_to_irq(data->gpio_usb_id); - if (data->usb_id_irq <= 0) { + if (data->usb_id_irq < 0) { dev_err(dev, "can't get USB ID IRQ: %d\n", data->usb_id_irq); - return -EINVAL; + return data->usb_id_irq; } data->gpio_vbus_en = devm_gpiod_get_index(dev, "vbus en", -- cgit v1.2.3 From 80354c29025833acd72ddac1ffa21c6cb50128cd Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Sun, 12 Mar 2017 17:07:44 +0200 Subject: x86/platform/intel-mid: Correct MSI IRQ line for watchdog device The interrupt line used for the watchdog is 12, according to the official Intel Edison BSP code. And indeed after fixing it we start getting an interrupt and thus the watchdog starts working again: [ 191.699951] Kernel panic - not syncing: Kernel Watchdog Signed-off-by: Andy Shevchenko Cc: Borislav Petkov Cc: David Cohen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 78a3bb9e408b ("x86: intel-mid: add watchdog platform code for Merrifield") Link: http://lkml.kernel.org/r/20170312150744.45493-1-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c index 86edd1e941eb..9e304e2ea4f5 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c @@ -19,7 +19,7 @@ #include #include -#define TANGIER_EXT_TIMER0_MSI 15 +#define TANGIER_EXT_TIMER0_MSI 12 static struct platform_device wdt_dev = { .name = "intel_mid_wdt", -- cgit v1.2.3 From 9fa1d7537242bd580ffa99c4725a0407096aad26 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 28 Feb 2017 10:11:45 +0200 Subject: drm/omap: fix dmabuf mmap for dma_alloc'ed buffers omap_gem_dmabuf_mmap() returns an error (with a WARN) when called for a buffer which is allocated with dma_alloc_*(). This prevents dmabuf mmap from working on SoCs without DMM, e.g. AM4 and OMAP3. I could not find any reason for omap_gem_dmabuf_mmap() rejecting such buffers, and just removing the if() fixes the limitation. Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c index af267c35d813..ee5883f59be5 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c +++ b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c @@ -147,9 +147,6 @@ static int omap_gem_dmabuf_mmap(struct dma_buf *buffer, struct drm_gem_object *obj = buffer->priv; int ret = 0; - if (WARN_ON(!obj->filp)) - return -EINVAL; - ret = drm_gem_mmap_obj(obj, omap_gem_mmap_size(obj), vma); if (ret < 0) return ret; -- cgit v1.2.3 From 337ba7fbf0fbc12242359c4af114878618b90951 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Sat, 25 Feb 2017 16:29:50 +0300 Subject: uapi: fix drm/omap_drm.h userspace compilation errors Consistently use types from linux/types.h like in other uapi drm/*_drm.h header files to fix the following drm/omap_drm.h userspace compilation errors: /usr/include/drm/omap_drm.h:36:2: error: unknown type name 'uint64_t' uint64_t param; /* in */ /usr/include/drm/omap_drm.h:37:2: error: unknown type name 'uint64_t' uint64_t value; /* in (set_param), out (get_param) */ /usr/include/drm/omap_drm.h:56:2: error: unknown type name 'uint32_t' uint32_t bytes; /* (for non-tiled formats) */ /usr/include/drm/omap_drm.h:58:3: error: unknown type name 'uint16_t' uint16_t width; /usr/include/drm/omap_drm.h:59:3: error: unknown type name 'uint16_t' uint16_t height; /usr/include/drm/omap_drm.h:65:2: error: unknown type name 'uint32_t' uint32_t flags; /* in */ /usr/include/drm/omap_drm.h:66:2: error: unknown type name 'uint32_t' uint32_t handle; /* out */ /usr/include/drm/omap_drm.h:67:2: error: unknown type name 'uint32_t' uint32_t __pad; /usr/include/drm/omap_drm.h:77:2: error: unknown type name 'uint32_t' uint32_t handle; /* buffer handle (in) */ /usr/include/drm/omap_drm.h:78:2: error: unknown type name 'uint32_t' uint32_t op; /* mask of omap_gem_op (in) */ /usr/include/drm/omap_drm.h:82:2: error: unknown type name 'uint32_t' uint32_t handle; /* buffer handle (in) */ /usr/include/drm/omap_drm.h:83:2: error: unknown type name 'uint32_t' uint32_t op; /* mask of omap_gem_op (in) */ /usr/include/drm/omap_drm.h:88:2: error: unknown type name 'uint32_t' uint32_t nregions; /usr/include/drm/omap_drm.h:89:2: error: unknown type name 'uint32_t' uint32_t __pad; /usr/include/drm/omap_drm.h:93:2: error: unknown type name 'uint32_t' uint32_t handle; /* buffer handle (in) */ /usr/include/drm/omap_drm.h:94:2: error: unknown type name 'uint32_t' uint32_t pad; /usr/include/drm/omap_drm.h:95:2: error: unknown type name 'uint64_t' uint64_t offset; /* mmap offset (out) */ /usr/include/drm/omap_drm.h:102:2: error: unknown type name 'uint32_t' uint32_t size; /* virtual size for mmap'ing (out) */ /usr/include/drm/omap_drm.h:103:2: error: unknown type name 'uint32_t' uint32_t __pad; Fixes: ef6503e89194 ("drm: Kbuild: add omap_drm.h to the installed headers") Signed-off-by: Dmitry V. Levin Signed-off-by: Tomi Valkeinen --- include/uapi/drm/omap_drm.h | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/include/uapi/drm/omap_drm.h b/include/uapi/drm/omap_drm.h index 407cb55df6ac..7fb97863c945 100644 --- a/include/uapi/drm/omap_drm.h +++ b/include/uapi/drm/omap_drm.h @@ -33,8 +33,8 @@ extern "C" { #define OMAP_PARAM_CHIPSET_ID 1 /* ie. 0x3430, 0x4430, etc */ struct drm_omap_param { - uint64_t param; /* in */ - uint64_t value; /* in (set_param), out (get_param) */ + __u64 param; /* in */ + __u64 value; /* in (set_param), out (get_param) */ }; #define OMAP_BO_SCANOUT 0x00000001 /* scanout capable (phys contiguous) */ @@ -53,18 +53,18 @@ struct drm_omap_param { #define OMAP_BO_TILED (OMAP_BO_TILED_8 | OMAP_BO_TILED_16 | OMAP_BO_TILED_32) union omap_gem_size { - uint32_t bytes; /* (for non-tiled formats) */ + __u32 bytes; /* (for non-tiled formats) */ struct { - uint16_t width; - uint16_t height; + __u16 width; + __u16 height; } tiled; /* (for tiled formats) */ }; struct drm_omap_gem_new { union omap_gem_size size; /* in */ - uint32_t flags; /* in */ - uint32_t handle; /* out */ - uint32_t __pad; + __u32 flags; /* in */ + __u32 handle; /* out */ + __u32 __pad; }; /* mask of operations: */ @@ -74,33 +74,33 @@ enum omap_gem_op { }; struct drm_omap_gem_cpu_prep { - uint32_t handle; /* buffer handle (in) */ - uint32_t op; /* mask of omap_gem_op (in) */ + __u32 handle; /* buffer handle (in) */ + __u32 op; /* mask of omap_gem_op (in) */ }; struct drm_omap_gem_cpu_fini { - uint32_t handle; /* buffer handle (in) */ - uint32_t op; /* mask of omap_gem_op (in) */ + __u32 handle; /* buffer handle (in) */ + __u32 op; /* mask of omap_gem_op (in) */ /* TODO maybe here we pass down info about what regions are touched * by sw so we can be clever about cache ops? For now a placeholder, * set to zero and we just do full buffer flush.. */ - uint32_t nregions; - uint32_t __pad; + __u32 nregions; + __u32 __pad; }; struct drm_omap_gem_info { - uint32_t handle; /* buffer handle (in) */ - uint32_t pad; - uint64_t offset; /* mmap offset (out) */ + __u32 handle; /* buffer handle (in) */ + __u32 pad; + __u64 offset; /* mmap offset (out) */ /* note: in case of tiled buffers, the user virtual size can be * different from the physical size (ie. how many pages are needed * to back the object) which is returned in DRM_IOCTL_GEM_OPEN.. * This size here is the one that should be used if you want to * mmap() the buffer: */ - uint32_t size; /* virtual size for mmap'ing (out) */ - uint32_t __pad; + __u32 size; /* virtual size for mmap'ing (out) */ + __u32 __pad; }; #define DRM_OMAP_GET_PARAM 0x00 -- cgit v1.2.3 From fd89b23a4632d3cbdee398048497e026edadfb71 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Mon, 6 Mar 2017 00:02:52 +0800 Subject: netfilter: nft_set_bitmap: fetch the element key based on the set->klen Currently we just assume the element key as a u32 integer, regardless of the set key length. This is incorrect, for example, the tcp port number is only 16 bits. So when we use the nft_payload expr to get the tcp dport and store it to dreg, the dport will be stored at 0~15 bits, and 16~31 bits will be padded with zero. So the reg->data[dreg] will be looked like as below: 0 15 31 +-+-+-+-+-+-+-+-+-+-+-+-+ | tcp dport | 0 | +-+-+-+-+-+-+-+-+-+-+-+-+ But for these big-endian systems, if we treate this register as a u32 integer, the element key will be larger than 65535, so the following lookup in bitmap set will cause out of bound access. Another issue is that if we add element with comments in bitmap set(although the comments will be ignored eventually), the element will vanish strangely. Because we treate the element key as a u32 integer, so the comments will become the part of the element key, then the element key will also be larger than 65535 and out of bound access will happen: # nft add element t s { 1 comment test } Since set->klen is 1 or 2, it's fine to treate the element key as a u8 or u16 integer. Fixes: 665153ff5752 ("netfilter: nf_tables: add bitmap set type") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_bitmap.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index 152d226552c1..9b024e22717b 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -45,9 +45,17 @@ struct nft_bitmap { u8 bitmap[]; }; -static inline void nft_bitmap_location(u32 key, u32 *idx, u32 *off) +static inline void nft_bitmap_location(const struct nft_set *set, + const void *key, + u32 *idx, u32 *off) { - u32 k = (key << 1); + u32 k; + + if (set->klen == 2) + k = *(u16 *)key; + else + k = *(u8 *)key; + k <<= 1; *idx = k / BITS_PER_BYTE; *off = k % BITS_PER_BYTE; @@ -69,7 +77,7 @@ static bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set, u8 genmask = nft_genmask_cur(net); u32 idx, off; - nft_bitmap_location(*key, &idx, &off); + nft_bitmap_location(set, key, &idx, &off); return nft_bitmap_active(priv->bitmap, idx, off, genmask); } @@ -83,7 +91,7 @@ static int nft_bitmap_insert(const struct net *net, const struct nft_set *set, u8 genmask = nft_genmask_next(net); u32 idx, off; - nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off); + nft_bitmap_location(set, nft_set_ext_key(ext), &idx, &off); if (nft_bitmap_active(priv->bitmap, idx, off, genmask)) return -EEXIST; @@ -102,7 +110,7 @@ static void nft_bitmap_remove(const struct net *net, u8 genmask = nft_genmask_next(net); u32 idx, off; - nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off); + nft_bitmap_location(set, nft_set_ext_key(ext), &idx, &off); /* Enter 00 state. */ priv->bitmap[idx] &= ~(genmask << off); } @@ -116,7 +124,7 @@ static void nft_bitmap_activate(const struct net *net, u8 genmask = nft_genmask_next(net); u32 idx, off; - nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off); + nft_bitmap_location(set, nft_set_ext_key(ext), &idx, &off); /* Enter 11 state. */ priv->bitmap[idx] |= (genmask << off); } @@ -128,7 +136,7 @@ static bool nft_bitmap_flush(const struct net *net, u8 genmask = nft_genmask_next(net); u32 idx, off; - nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off); + nft_bitmap_location(set, nft_set_ext_key(ext), &idx, &off); /* Enter 10 state, similar to deactivation. */ priv->bitmap[idx] &= ~(genmask << off); @@ -161,10 +169,9 @@ static void *nft_bitmap_deactivate(const struct net *net, struct nft_bitmap *priv = nft_set_priv(set); u8 genmask = nft_genmask_next(net); struct nft_set_ext *ext; - u32 idx, off, key = 0; + u32 idx, off; - memcpy(&key, elem->key.val.data, set->klen); - nft_bitmap_location(key, &idx, &off); + nft_bitmap_location(set, elem->key.val.data, &idx, &off); if (!nft_bitmap_active(priv->bitmap, idx, off, genmask)) return NULL; -- cgit v1.2.3 From 10596608c4d62cb8c1c2b806debcbd32fe657e71 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Wed, 8 Mar 2017 22:54:18 +0800 Subject: netfilter: nf_tables: fix mismatch in big-endian system Currently, there are two different methods to store an u16 integer to the u32 data register. For example: u32 *dest = ®s->data[priv->dreg]; 1. *dest = 0; *(u16 *) dest = val_u16; 2. *dest = val_u16; For method 1, the u16 value will be stored like this, either in big-endian or little-endian system: 0 15 31 +-+-+-+-+-+-+-+-+-+-+-+-+ | Value | 0 | +-+-+-+-+-+-+-+-+-+-+-+-+ For method 2, in little-endian system, the u16 value will be the same as listed above. But in big-endian system, the u16 value will be stored like this: 0 15 31 +-+-+-+-+-+-+-+-+-+-+-+-+ | 0 | Value | +-+-+-+-+-+-+-+-+-+-+-+-+ So later we use "memcmp(®s->data[priv->sreg], data, 2);" to do compare in nft_cmp, nft_lookup expr ..., method 2 will get the wrong result in big-endian system, as 0~15 bits will always be zero. For the similar reason, when loading an u16 value from the u32 data register, we should use "*(u16 *) sreg;" instead of "(u16)*sreg;", the 2nd method will get the wrong value in the big-endian system. So introduce some wrapper functions to store/load an u8 or u16 integer to/from the u32 data register, and use them in the right place. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 29 +++++++++++++++++++++++++++ net/ipv4/netfilter/nft_masq_ipv4.c | 8 ++++---- net/ipv4/netfilter/nft_redir_ipv4.c | 8 ++++---- net/ipv6/netfilter/nft_masq_ipv6.c | 8 ++++---- net/ipv6/netfilter/nft_redir_ipv6.c | 8 ++++---- net/netfilter/nft_ct.c | 18 +++++++++-------- net/netfilter/nft_meta.c | 40 +++++++++++++++++++------------------ net/netfilter/nft_nat.c | 8 ++++---- 8 files changed, 80 insertions(+), 47 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 2aa8a9d80fbe..70c5ca0c60b1 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -103,6 +103,35 @@ struct nft_regs { }; }; +/* Store/load an u16 or u8 integer to/from the u32 data register. + * + * Note, when using concatenations, register allocation happens at 32-bit + * level. So for store instruction, pad the rest part with zero to avoid + * garbage values. + */ + +static inline void nft_reg_store16(u32 *dreg, u16 val) +{ + *dreg = 0; + *(u16 *)dreg = val; +} + +static inline void nft_reg_store8(u32 *dreg, u8 val) +{ + *dreg = 0; + *(u8 *)dreg = val; +} + +static inline u16 nft_reg_load16(u32 *sreg) +{ + return *(u16 *)sreg; +} + +static inline u8 nft_reg_load8(u32 *sreg) +{ + return *(u8 *)sreg; +} + static inline void nft_data_copy(u32 *dst, const struct nft_data *src, unsigned int len) { diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c index a0ea8aad1bf1..f18677277119 100644 --- a/net/ipv4/netfilter/nft_masq_ipv4.c +++ b/net/ipv4/netfilter/nft_masq_ipv4.c @@ -26,10 +26,10 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr, memset(&range, 0, sizeof(range)); range.flags = priv->flags; if (priv->sreg_proto_min) { - range.min_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_min]; - range.max_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_max]; + range.min_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_max]); } regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, nft_hook(pkt), &range, nft_out(pkt)); diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c index 1650ed23c15d..5120be1d3118 100644 --- a/net/ipv4/netfilter/nft_redir_ipv4.c +++ b/net/ipv4/netfilter/nft_redir_ipv4.c @@ -26,10 +26,10 @@ static void nft_redir_ipv4_eval(const struct nft_expr *expr, memset(&mr, 0, sizeof(mr)); if (priv->sreg_proto_min) { - mr.range[0].min.all = - *(__be16 *)®s->data[priv->sreg_proto_min]; - mr.range[0].max.all = - *(__be16 *)®s->data[priv->sreg_proto_max]; + mr.range[0].min.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_min]); + mr.range[0].max.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_max]); mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c index 6c5b5b1830a7..4146536e9c15 100644 --- a/net/ipv6/netfilter/nft_masq_ipv6.c +++ b/net/ipv6/netfilter/nft_masq_ipv6.c @@ -27,10 +27,10 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr, memset(&range, 0, sizeof(range)); range.flags = priv->flags; if (priv->sreg_proto_min) { - range.min_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_min]; - range.max_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_max]; + range.min_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_max]); } regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, nft_out(pkt)); diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c index f5ac080fc084..a27e424f690d 100644 --- a/net/ipv6/netfilter/nft_redir_ipv6.c +++ b/net/ipv6/netfilter/nft_redir_ipv6.c @@ -26,10 +26,10 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr, memset(&range, 0, sizeof(range)); if (priv->sreg_proto_min) { - range.min_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_min], - range.max_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_max], + range.min_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_max]); range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index bf548a7a71ec..91585b5e5307 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -83,7 +83,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr, switch (priv->key) { case NFT_CT_DIRECTION: - *dest = CTINFO2DIR(ctinfo); + nft_reg_store8(dest, CTINFO2DIR(ctinfo)); return; case NFT_CT_STATUS: *dest = ct->status; @@ -151,20 +151,22 @@ static void nft_ct_get_eval(const struct nft_expr *expr, return; } case NFT_CT_L3PROTOCOL: - *dest = nf_ct_l3num(ct); + nft_reg_store8(dest, nf_ct_l3num(ct)); return; case NFT_CT_PROTOCOL: - *dest = nf_ct_protonum(ct); + nft_reg_store8(dest, nf_ct_protonum(ct)); return; #ifdef CONFIG_NF_CONNTRACK_ZONES case NFT_CT_ZONE: { const struct nf_conntrack_zone *zone = nf_ct_zone(ct); + u16 zoneid; if (priv->dir < IP_CT_DIR_MAX) - *dest = nf_ct_zone_id(zone, priv->dir); + zoneid = nf_ct_zone_id(zone, priv->dir); else - *dest = zone->id; + zoneid = zone->id; + nft_reg_store16(dest, zoneid); return; } #endif @@ -183,10 +185,10 @@ static void nft_ct_get_eval(const struct nft_expr *expr, nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); return; case NFT_CT_PROTO_SRC: - *dest = (__force __u16)tuple->src.u.all; + nft_reg_store16(dest, (__force u16)tuple->src.u.all); return; case NFT_CT_PROTO_DST: - *dest = (__force __u16)tuple->dst.u.all; + nft_reg_store16(dest, (__force u16)tuple->dst.u.all); return; default: break; @@ -205,7 +207,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, const struct nft_ct *priv = nft_expr_priv(expr); struct sk_buff *skb = pkt->skb; enum ip_conntrack_info ctinfo; - u16 value = regs->data[priv->sreg]; + u16 value = nft_reg_load16(®s->data[priv->sreg]); struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index e1f5ca9b423b..7b60e01f38ff 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -45,16 +45,15 @@ void nft_meta_get_eval(const struct nft_expr *expr, *dest = skb->len; break; case NFT_META_PROTOCOL: - *dest = 0; - *(__be16 *)dest = skb->protocol; + nft_reg_store16(dest, (__force u16)skb->protocol); break; case NFT_META_NFPROTO: - *dest = nft_pf(pkt); + nft_reg_store8(dest, nft_pf(pkt)); break; case NFT_META_L4PROTO: if (!pkt->tprot_set) goto err; - *dest = pkt->tprot; + nft_reg_store8(dest, pkt->tprot); break; case NFT_META_PRIORITY: *dest = skb->priority; @@ -85,14 +84,12 @@ void nft_meta_get_eval(const struct nft_expr *expr, case NFT_META_IIFTYPE: if (in == NULL) goto err; - *dest = 0; - *(u16 *)dest = in->type; + nft_reg_store16(dest, in->type); break; case NFT_META_OIFTYPE: if (out == NULL) goto err; - *dest = 0; - *(u16 *)dest = out->type; + nft_reg_store16(dest, out->type); break; case NFT_META_SKUID: sk = skb_to_full_sk(skb); @@ -142,19 +139,19 @@ void nft_meta_get_eval(const struct nft_expr *expr, #endif case NFT_META_PKTTYPE: if (skb->pkt_type != PACKET_LOOPBACK) { - *dest = skb->pkt_type; + nft_reg_store8(dest, skb->pkt_type); break; } switch (nft_pf(pkt)) { case NFPROTO_IPV4: if (ipv4_is_multicast(ip_hdr(skb)->daddr)) - *dest = PACKET_MULTICAST; + nft_reg_store8(dest, PACKET_MULTICAST); else - *dest = PACKET_BROADCAST; + nft_reg_store8(dest, PACKET_BROADCAST); break; case NFPROTO_IPV6: - *dest = PACKET_MULTICAST; + nft_reg_store8(dest, PACKET_MULTICAST); break; case NFPROTO_NETDEV: switch (skb->protocol) { @@ -168,14 +165,14 @@ void nft_meta_get_eval(const struct nft_expr *expr, goto err; if (ipv4_is_multicast(iph->daddr)) - *dest = PACKET_MULTICAST; + nft_reg_store8(dest, PACKET_MULTICAST); else - *dest = PACKET_BROADCAST; + nft_reg_store8(dest, PACKET_BROADCAST); break; } case htons(ETH_P_IPV6): - *dest = PACKET_MULTICAST; + nft_reg_store8(dest, PACKET_MULTICAST); break; default: WARN_ON_ONCE(1); @@ -230,7 +227,9 @@ void nft_meta_set_eval(const struct nft_expr *expr, { const struct nft_meta *meta = nft_expr_priv(expr); struct sk_buff *skb = pkt->skb; - u32 value = regs->data[meta->sreg]; + u32 *sreg = ®s->data[meta->sreg]; + u32 value = *sreg; + u8 pkt_type; switch (meta->key) { case NFT_META_MARK: @@ -240,9 +239,12 @@ void nft_meta_set_eval(const struct nft_expr *expr, skb->priority = value; break; case NFT_META_PKTTYPE: - if (skb->pkt_type != value && - skb_pkt_type_ok(value) && skb_pkt_type_ok(skb->pkt_type)) - skb->pkt_type = value; + pkt_type = nft_reg_load8(sreg); + + if (skb->pkt_type != pkt_type && + skb_pkt_type_ok(pkt_type) && + skb_pkt_type_ok(skb->pkt_type)) + skb->pkt_type = pkt_type; break; case NFT_META_NFTRACE: skb->nf_trace = !!value; diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 19a7bf3236f9..439e0bd152a0 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -65,10 +65,10 @@ static void nft_nat_eval(const struct nft_expr *expr, } if (priv->sreg_proto_min) { - range.min_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_min]; - range.max_proto.all = - *(__be16 *)®s->data[priv->sreg_proto_max]; + range.min_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_min]); + range.max_proto.all = (__force __be16)nft_reg_load16( + ®s->data[priv->sreg_proto_max]); range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } -- cgit v1.2.3 From 4ca60d08cbe65f501baad64af50fceba79c19fbb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 9 Mar 2017 23:22:30 +0100 Subject: netfilter: bridge: honor frag_max_size when refragmenting consider a bridge with mtu 9000, but end host sending smaller packets to another host with mtu < 9000. In this case, after reassembly, bridge+defrag would refragment, and then attempt to send the reassembled packet as long as it was below 9k. Instead we have to cap by the largest fragment size seen. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter_hooks.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 95087e6e8258..3c5185021c1c 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -721,18 +721,20 @@ static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb) static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge; - unsigned int mtu_reserved; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); + unsigned int mtu, mtu_reserved; mtu_reserved = nf_bridge_mtu_reduction(skb); + mtu = skb->dev->mtu; + + if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu) + mtu = nf_bridge->frag_max_size; - if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) { + if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) { nf_bridge_info_free(skb); return br_dev_queue_push_xmit(net, sk, skb); } - nf_bridge = nf_bridge_info_get(skb); - /* This is wrong! We should preserve the original fragment * boundaries by preserving frag_list rather than refragmenting. */ -- cgit v1.2.3 From 170a1fb9c01bc40b7e8fd57a32ac9a0e131ec5b6 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sat, 11 Mar 2017 00:25:26 -0500 Subject: netfilter: Force fake conntrack entry to be at least 8 bytes aligned Since the nfct and nfctinfo have been combined, the nf_conn structure must be at least 8 bytes aligned, as the 3 LSB bits are used for the nfctinfo. But there's a fake nf_conn structure to denote untracked connections, which is created by a PER_CPU construct. This does not guarantee that it will be 8 bytes aligned and can break the logic in determining the correct nfctinfo. I triggered this on a 32bit machine with the following error: BUG: unable to handle kernel NULL pointer dereference at 00000af4 IP: nf_ct_deliver_cached_events+0x1b/0xfb *pdpt = 0000000031962001 *pde = 0000000000000000 Oops: 0000 [#1] SMP [Modules linked in: ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_filter ip6_tables ipv6 crc_ccitt ppdev r8169 parport_pc parport OK ] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.10.0-test+ #75 Hardware name: MSI MS-7823/CSM-H87M-G43 (MS-7823), BIOS V1.6 02/22/2014 task: c126ec00 task.stack: c1258000 EIP: nf_ct_deliver_cached_events+0x1b/0xfb EFLAGS: 00010202 CPU: 0 EAX: 0021cd01 EBX: 00000000 ECX: 27b0c767 EDX: 32bcb17a ESI: f34135c0 EDI: f34135c0 EBP: f2debd60 ESP: f2debd3c DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 CR0: 80050033 CR2: 00000af4 CR3: 309a0440 CR4: 001406f0 Call Trace: ? ipv6_skip_exthdr+0xac/0xcb ipv6_confirm+0x10c/0x119 [nf_conntrack_ipv6] nf_hook_slow+0x22/0xc7 nf_hook+0x9a/0xad [ipv6] ? ip6t_do_table+0x356/0x379 [ip6_tables] ? ip6_fragment+0x9e9/0x9e9 [ipv6] ip6_output+0xee/0x107 [ipv6] ? ip6_fragment+0x9e9/0x9e9 [ipv6] dst_output+0x36/0x4d [ipv6] NF_HOOK.constprop.37+0xb2/0xba [ipv6] ? icmp6_dst_alloc+0x2c/0xfd [ipv6] ? local_bh_enable+0x14/0x14 [ipv6] mld_sendpack+0x1c5/0x281 [ipv6] ? mark_held_locks+0x40/0x5c mld_ifc_timer_expire+0x1f6/0x21e [ipv6] call_timer_fn+0x135/0x283 ? detach_if_pending+0x55/0x55 ? mld_dad_timer_expire+0x3e/0x3e [ipv6] __run_timers+0x111/0x14b ? mld_dad_timer_expire+0x3e/0x3e [ipv6] run_timer_softirq+0x1c/0x36 __do_softirq+0x185/0x37c ? test_ti_thread_flag.constprop.19+0xd/0xd do_softirq_own_stack+0x22/0x28 irq_exit+0x5a/0xa4 smp_apic_timer_interrupt+0x2a/0x34 apic_timer_interrupt+0x37/0x3c By using DEFINE/DECLARE_PER_CPU_ALIGNED we can enforce at least 8 byte alignment as all cache line sizes are at least 8 bytes or more. Fixes: a9e419dc7be6 ("netfilter: merge ctinfo into nfct pointer storage area") Signed-off-by: Steven Rostedt (VMware) Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 2 +- net/netfilter/nf_conntrack_core.c | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index f540f9ad2af4..19605878da47 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -244,7 +244,7 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct, u32 seq); /* Fake conntrack entry for untracked connections */ -DECLARE_PER_CPU(struct nf_conn, nf_conntrack_untracked); +DECLARE_PER_CPU_ALIGNED(struct nf_conn, nf_conntrack_untracked); static inline struct nf_conn *nf_ct_untracked_get(void) { return raw_cpu_ptr(&nf_conntrack_untracked); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 071b97fcbefb..ffb78e5f7b70 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -181,7 +181,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); unsigned int nf_conntrack_max __read_mostly; seqcount_t nf_conntrack_generation __read_mostly; -DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked); +/* nf_conn must be 8 bytes aligned, as the 3 LSB bits are used + * for the nfctinfo. We cheat by (ab)using the PER CPU cache line + * alignment to enforce this. + */ +DEFINE_PER_CPU_ALIGNED(struct nf_conn, nf_conntrack_untracked); EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); static unsigned int nf_conntrack_hash_rnd __read_mostly; -- cgit v1.2.3 From e920dde5160887d07b738f5a7f593b1fa9b1e32e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 10 Mar 2017 18:32:31 +0100 Subject: netfilter: nft_set_bitmap: keep a list of dummy elements Element comments may come without any prior set flag, so we have to keep a list of dummy struct nft_set_ext to keep this information around. This is only useful for set dumps to userspace. From the packet path, this set type relies on the bitmap representation. This patch simplifies the logic since we don't need to allocate the dummy nft_set_ext structure anymore on the fly at the cost of increasing memory consumption because of the list of dummy struct nft_set_ext. Fixes: 665153ff5752 ("netfilter: nf_tables: add bitmap set type") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_bitmap.c | 146 +++++++++++++++++++---------------------- 1 file changed, 66 insertions(+), 80 deletions(-) diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index 9b024e22717b..8ebbc2940f4c 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -15,6 +15,11 @@ #include #include +struct nft_bitmap_elem { + struct list_head head; + struct nft_set_ext ext; +}; + /* This bitmap uses two bits to represent one element. These two bits determine * the element state in the current and the future generation. * @@ -41,8 +46,9 @@ * restore its previous state. */ struct nft_bitmap { - u16 bitmap_size; - u8 bitmap[]; + struct list_head list; + u16 bitmap_size; + u8 bitmap[]; }; static inline void nft_bitmap_location(const struct nft_set *set, @@ -82,21 +88,43 @@ static bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set, return nft_bitmap_active(priv->bitmap, idx, off, genmask); } +static struct nft_bitmap_elem * +nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this, + u8 genmask) +{ + const struct nft_bitmap *priv = nft_set_priv(set); + struct nft_bitmap_elem *be; + + list_for_each_entry_rcu(be, &priv->list, head) { + if (memcmp(nft_set_ext_key(&be->ext), + nft_set_ext_key(&this->ext), set->klen) || + !nft_set_elem_active(&be->ext, genmask)) + continue; + + return be; + } + return NULL; +} + static int nft_bitmap_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, - struct nft_set_ext **_ext) + struct nft_set_ext **ext) { struct nft_bitmap *priv = nft_set_priv(set); - struct nft_set_ext *ext = elem->priv; + struct nft_bitmap_elem *new = elem->priv, *be; u8 genmask = nft_genmask_next(net); u32 idx, off; - nft_bitmap_location(set, nft_set_ext_key(ext), &idx, &off); - if (nft_bitmap_active(priv->bitmap, idx, off, genmask)) + be = nft_bitmap_elem_find(set, new, genmask); + if (be) { + *ext = &be->ext; return -EEXIST; + } + nft_bitmap_location(set, nft_set_ext_key(&new->ext), &idx, &off); /* Enter 01 state. */ priv->bitmap[idx] |= (genmask << off); + list_add_tail_rcu(&new->head, &priv->list); return 0; } @@ -106,13 +134,14 @@ static void nft_bitmap_remove(const struct net *net, const struct nft_set_elem *elem) { struct nft_bitmap *priv = nft_set_priv(set); - struct nft_set_ext *ext = elem->priv; + struct nft_bitmap_elem *be = elem->priv; u8 genmask = nft_genmask_next(net); u32 idx, off; - nft_bitmap_location(set, nft_set_ext_key(ext), &idx, &off); + nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off); /* Enter 00 state. */ priv->bitmap[idx] &= ~(genmask << off); + list_del_rcu(&be->head); } static void nft_bitmap_activate(const struct net *net, @@ -120,73 +149,52 @@ static void nft_bitmap_activate(const struct net *net, const struct nft_set_elem *elem) { struct nft_bitmap *priv = nft_set_priv(set); - struct nft_set_ext *ext = elem->priv; + struct nft_bitmap_elem *be = elem->priv; u8 genmask = nft_genmask_next(net); u32 idx, off; - nft_bitmap_location(set, nft_set_ext_key(ext), &idx, &off); + nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off); /* Enter 11 state. */ priv->bitmap[idx] |= (genmask << off); + nft_set_elem_change_active(net, set, &be->ext); } static bool nft_bitmap_flush(const struct net *net, - const struct nft_set *set, void *ext) + const struct nft_set *set, void *_be) { struct nft_bitmap *priv = nft_set_priv(set); u8 genmask = nft_genmask_next(net); + struct nft_bitmap_elem *be = _be; u32 idx, off; - nft_bitmap_location(set, nft_set_ext_key(ext), &idx, &off); + nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off); /* Enter 10 state, similar to deactivation. */ priv->bitmap[idx] &= ~(genmask << off); + nft_set_elem_change_active(net, set, &be->ext); return true; } -static struct nft_set_ext *nft_bitmap_ext_alloc(const struct nft_set *set, - const struct nft_set_elem *elem) -{ - struct nft_set_ext_tmpl tmpl; - struct nft_set_ext *ext; - - nft_set_ext_prepare(&tmpl); - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); - - ext = kzalloc(tmpl.len, GFP_KERNEL); - if (!ext) - return NULL; - - nft_set_ext_init(ext, &tmpl); - memcpy(nft_set_ext_key(ext), elem->key.val.data, set->klen); - - return ext; -} - static void *nft_bitmap_deactivate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_bitmap *priv = nft_set_priv(set); + struct nft_bitmap_elem *this = elem->priv, *be; u8 genmask = nft_genmask_next(net); - struct nft_set_ext *ext; u32 idx, off; nft_bitmap_location(set, elem->key.val.data, &idx, &off); - if (!nft_bitmap_active(priv->bitmap, idx, off, genmask)) - return NULL; - - /* We have no real set extension since this is a bitmap, allocate this - * dummy object that is released from the commit/abort path. - */ - ext = nft_bitmap_ext_alloc(set, elem); - if (!ext) + be = nft_bitmap_elem_find(set, this, genmask); + if (!be) return NULL; /* Enter 10 state. */ priv->bitmap[idx] &= ~(genmask << off); + nft_set_elem_change_active(net, set, &be->ext); - return ext; + return be; } static void nft_bitmap_walk(const struct nft_ctx *ctx, @@ -194,47 +202,23 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx, struct nft_set_iter *iter) { const struct nft_bitmap *priv = nft_set_priv(set); - struct nft_set_ext_tmpl tmpl; + struct nft_bitmap_elem *be; struct nft_set_elem elem; - struct nft_set_ext *ext; - int idx, off; - u16 key; - - nft_set_ext_prepare(&tmpl); - nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen); - - for (idx = 0; idx < priv->bitmap_size; idx++) { - for (off = 0; off < BITS_PER_BYTE; off += 2) { - if (iter->count < iter->skip) - goto cont; - - if (!nft_bitmap_active(priv->bitmap, idx, off, - iter->genmask)) - goto cont; - - ext = kzalloc(tmpl.len, GFP_KERNEL); - if (!ext) { - iter->err = -ENOMEM; - return; - } - nft_set_ext_init(ext, &tmpl); - key = ((idx * BITS_PER_BYTE) + off) >> 1; - memcpy(nft_set_ext_key(ext), &key, set->klen); - - elem.priv = ext; - iter->err = iter->fn(ctx, set, iter, &elem); - - /* On set flush, this dummy extension object is released - * from the commit/abort path. - */ - if (!iter->flush) - kfree(ext); - - if (iter->err < 0) - return; + + list_for_each_entry_rcu(be, &priv->list, head) { + if (iter->count < iter->skip) + goto cont; + if (!nft_set_elem_active(&be->ext, iter->genmask)) + goto cont; + + elem.priv = be; + + iter->err = iter->fn(ctx, set, iter, &elem); + + if (iter->err < 0) + return; cont: - iter->count++; - } + iter->count++; } } @@ -265,6 +249,7 @@ static int nft_bitmap_init(const struct nft_set *set, { struct nft_bitmap *priv = nft_set_priv(set); + INIT_LIST_HEAD(&priv->list); priv->bitmap_size = nft_bitmap_size(set->klen); return 0; @@ -290,6 +275,7 @@ static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features, static struct nft_set_ops nft_bitmap_ops __read_mostly = { .privsize = nft_bitmap_privsize, + .elemsize = offsetof(struct nft_bitmap_elem, ext), .estimate = nft_bitmap_estimate, .init = nft_bitmap_init, .destroy = nft_bitmap_destroy, -- cgit v1.2.3 From a1c2ff53726907aff5feb37e4cfd45c1ff626431 Mon Sep 17 00:00:00 2001 From: Hiroyuki Yokoyama Date: Wed, 1 Mar 2017 03:51:00 +0000 Subject: ASoC: rsnd: fix sound route path when using SRC6/SRC9 This patch fixes the problem that the missing value of the route path setting table and incorrect values are set in the CMD_ROUTE_SELECT register. Signed-off-by: Hiroyuki Yokoyama [Kuninori: shared data on MIX and non-MIX case] Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/sh/rcar/cmd.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/sound/soc/sh/rcar/cmd.c b/sound/soc/sh/rcar/cmd.c index abb5eaac854a..7d92a24b7cfa 100644 --- a/sound/soc/sh/rcar/cmd.c +++ b/sound/soc/sh/rcar/cmd.c @@ -31,23 +31,24 @@ static int rsnd_cmd_init(struct rsnd_mod *mod, struct rsnd_mod *mix = rsnd_io_to_mod_mix(io); struct device *dev = rsnd_priv_to_dev(priv); u32 data; + u32 path[] = { + [1] = 1 << 0, + [5] = 1 << 8, + [6] = 1 << 12, + [9] = 1 << 15, + }; if (!mix && !dvc) return 0; + if (ARRAY_SIZE(path) < rsnd_mod_id(mod) + 1) + return -ENXIO; + if (mix) { struct rsnd_dai *rdai; struct rsnd_mod *src; struct rsnd_dai_stream *tio; int i; - u32 path[] = { - [0] = 0, - [1] = 1 << 0, - [2] = 0, - [3] = 0, - [4] = 0, - [5] = 1 << 8 - }; /* * it is assuming that integrater is well understanding about @@ -70,16 +71,19 @@ static int rsnd_cmd_init(struct rsnd_mod *mod, } else { struct rsnd_mod *src = rsnd_io_to_mod_src(io); - u32 path[] = { - [0] = 0x30000, - [1] = 0x30001, - [2] = 0x40000, - [3] = 0x10000, - [4] = 0x20000, - [5] = 0x40100 + u8 cmd_case[] = { + [0] = 0x3, + [1] = 0x3, + [2] = 0x4, + [3] = 0x1, + [4] = 0x2, + [5] = 0x4, + [6] = 0x1, + [9] = 0x2, }; - data = path[rsnd_mod_id(src)]; + data = path[rsnd_mod_id(src)] | + cmd_case[rsnd_mod_id(src)] << 16; } dev_dbg(dev, "ctu/mix path = 0x%08x", data); -- cgit v1.2.3 From 6f1f622019f95d79d6e2f8bb3781144ad0aff75f Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Mon, 6 Mar 2017 17:49:42 +0800 Subject: nfs4: fix a typo of NFS_ATTR_FATTR_GROUP_NAME This typo cause a memory leak, and a bad client's group id. unreferenced object 0xffff96d8073998d0 (size 8): comm "kworker/0:3", pid 34224, jiffies 4295361338 (age 761.752s) hex dump (first 8 bytes): 30 00 39 07 d8 96 ff ff 0.9..... backtrace: [] kmemleak_alloc+0x4a/0xa0 [] __kmalloc+0x140/0x220 [] xdr_stream_decode_string_dup+0x7c/0x110 [sunrpc] [] decode_getfattr_attrs+0x940/0x1630 [nfsv4] [] decode_getfattr_generic.constprop.108+0x9b/0x100 [nfsv4] [] nfs4_xdr_dec_open+0xcf/0x100 [nfsv4] [] rpcauth_unwrap_resp+0xa7/0xe0 [sunrpc] [] call_decode+0x1e0/0x810 [sunrpc] [] __rpc_execute+0x8d/0x420 [sunrpc] [] rpc_async_schedule+0x12/0x20 [sunrpc] [] process_one_work+0x197/0x430 [] worker_thread+0x4e/0x4a0 [] kthread+0x101/0x140 [] ret_from_fork+0x2c/0x40 [] 0xffffffffffffffff Fixes: 686a816ab6 ("NFSv4: Clean up owner/group attribute decode") Signed-off-by: Kinglong Mee Signed-off-by: Anna Schumaker --- fs/nfs/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index f0369e362753..80ce289eea05 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3942,7 +3942,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, if (len <= 0) goto out; dprintk("%s: name=%s\n", __func__, group_name->data); - return NFS_ATTR_FATTR_OWNER_NAME; + return NFS_ATTR_FATTR_GROUP_NAME; } else { len = xdr_stream_decode_opaque_inline(xdr, (void **)&p, XDR_MAX_NETOBJ); -- cgit v1.2.3 From 366a1569bff3fe14abfdf9285e31e05e091745f5 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Mon, 6 Mar 2017 22:29:14 +0800 Subject: NFSv4: fix a reference leak caused WARNING messages Because nfs4_opendata_access() has close the state when access is denied, so the state isn't leak. Rather than revert the commit a974deee47, I'd like clean the strange state close. [ 1615.094218] ------------[ cut here ]------------ [ 1615.094607] WARNING: CPU: 0 PID: 23702 at lib/list_debug.c:31 __list_add_valid+0x8e/0xa0 [ 1615.094913] list_add double add: new=ffff9d7901d9f608, prev=ffff9d7901d9f608, next=ffff9d7901ee8dd0. [ 1615.095458] Modules linked in: nfsv4(E) nfs(E) nfsd(E) tun bridge stp llc fuse ip_set nfnetlink vmw_vsock_vmci_transport vsock f2fs snd_seq_midi snd_seq_midi_event fscrypto coretemp ppdev crct10dif_pclmul crc32_pclmul ghash_clmulni_intel intel_rapl_perf vmw_balloon snd_ens1371 joydev gameport snd_ac97_codec ac97_bus snd_seq snd_pcm snd_rawmidi snd_timer snd_seq_device snd soundcore nfit parport_pc parport acpi_cpufreq tpm_tis tpm_tis_core tpm i2c_piix4 vmw_vmci shpchp auth_rpcgss nfs_acl lockd(E) grace sunrpc(E) xfs libcrc32c vmwgfx drm_kms_helper ttm drm crc32c_intel mptspi e1000 serio_raw scsi_transport_spi mptscsih mptbase ata_generic pata_acpi fjes [last unloaded: nfs] [ 1615.097663] CPU: 0 PID: 23702 Comm: fstest Tainted: G W E 4.11.0-rc1+ #517 [ 1615.098015] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015 [ 1615.098807] Call Trace: [ 1615.099183] dump_stack+0x63/0x86 [ 1615.099578] __warn+0xcb/0xf0 [ 1615.099967] warn_slowpath_fmt+0x5f/0x80 [ 1615.100370] __list_add_valid+0x8e/0xa0 [ 1615.100760] nfs4_put_state_owner+0x75/0xc0 [nfsv4] [ 1615.101136] __nfs4_close+0x109/0x140 [nfsv4] [ 1615.101524] nfs4_close_state+0x15/0x20 [nfsv4] [ 1615.101949] nfs4_close_context+0x21/0x30 [nfsv4] [ 1615.102691] __put_nfs_open_context+0xb8/0x110 [nfs] [ 1615.103155] put_nfs_open_context+0x10/0x20 [nfs] [ 1615.103586] nfs4_file_open+0x13b/0x260 [nfsv4] [ 1615.103978] do_dentry_open+0x20a/0x2f0 [ 1615.104369] ? nfs4_copy_file_range+0x30/0x30 [nfsv4] [ 1615.104739] vfs_open+0x4c/0x70 [ 1615.105106] ? may_open+0x5a/0x100 [ 1615.105469] path_openat+0x623/0x1420 [ 1615.105823] do_filp_open+0x91/0x100 [ 1615.106174] ? __alloc_fd+0x3f/0x170 [ 1615.106568] do_sys_open+0x130/0x220 [ 1615.106920] ? __put_cred+0x3d/0x50 [ 1615.107256] SyS_open+0x1e/0x20 [ 1615.107588] entry_SYSCALL_64_fastpath+0x1a/0xa9 [ 1615.107922] RIP: 0033:0x7fab599069b0 [ 1615.108247] RSP: 002b:00007ffcf0600d78 EFLAGS: 00000246 ORIG_RAX: 0000000000000002 [ 1615.108575] RAX: ffffffffffffffda RBX: 00007fab59bcfae0 RCX: 00007fab599069b0 [ 1615.108896] RDX: 0000000000000200 RSI: 0000000000000200 RDI: 00007ffcf060255e [ 1615.109211] RBP: 0000000000040010 R08: 0000000000000000 R09: 0000000000000016 [ 1615.109515] R10: 00000000000006a1 R11: 0000000000000246 R12: 0000000000041000 [ 1615.109806] R13: 0000000000040010 R14: 0000000000001000 R15: 0000000000002710 [ 1615.110152] ---[ end trace 96ed63b1306bf2f3 ]--- Fixes: a974deee47 ("NFSv4: Fix memory and state leak in...") Signed-off-by: Kinglong Mee Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1b183686c6d4..c1f5369cd339 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2258,8 +2258,6 @@ static int nfs4_opendata_access(struct rpc_cred *cred, if ((mask & ~cache.mask & (MAY_READ | MAY_EXEC)) == 0) return 0; - /* even though OPEN succeeded, access is denied. Close the file */ - nfs4_close_state(state, fmode); return -EACCES; } -- cgit v1.2.3 From aac66bf5f916f645bd57029490a72c3f91f2c274 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 6 Mar 2017 23:54:01 +0000 Subject: drm/i915: use correct node for handling cache domain eviction It looks like we were incorrectly comparing vma->node against itself instead of the target node, when evicting for a node on systems where we need guard pages between regions with different cache domains. As a consequence we can end up trying to needlessly evict neighbouring nodes, even if they have the same cache domain, and if they were pinned we would fail the eviction. Fixes: 625d988acc28 ("drm/i915: Extract reserving space in the GTT to a helper") Signed-off-by: Matthew Auld Cc: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170306235414.23407-3-matthew.auld@intel.com Signed-off-by: Chris Wilson (cherry picked from commit fe65cbdbc97929e4a522716ed279a36783656142) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_evict.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index c181b1bb3d2c..3be2503aa042 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -293,12 +293,12 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, * those as well to make room for our guard pages. */ if (check_color) { - if (vma->node.start + vma->node.size == node->start) { - if (vma->node.color == node->color) + if (node->start + node->size == target->start) { + if (node->color == target->color) continue; } - if (vma->node.start == node->start + node->size) { - if (vma->node.color == node->color) + if (node->start == target->start + target->size) { + if (node->color == target->color) continue; } } -- cgit v1.2.3 From 3a0d137de035cc8c70194d9988ded61825b5ff8a Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 8 Mar 2017 13:00:07 +0100 Subject: drm/i915: Nuke skl_update_plane debug message from the pipe update critical section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit printks are slow so we should not be doing them from the vblank evade critical section. These could explain why we sometimes seem to blow past our 100 usec deadline. The problem has been there ever since commit c331879ce8ea ("drm/i915: skylake sprite plane scaling using shared scalers.") but it may not have been readily visible until commit e1edbd44e23b ("drm/i915: Complain if we take too long under vblank evasion.") increased our chances of noticing it. Signed-off-by: Maarten Lankhorst Cc: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1488974407-25175-1-git-send-email-maarten.lankhorst@linux.intel.com Fixes: c331879ce8ea ("drm/i915: skylake sprite plane scaling using shared scalers") Cc: # v4.2+ Reviewed-by: Ville Syrjälä [mlankhorst: Add missing tags, point to the correct offending commit] (cherry picked from commit d38146b9ee16264ff9a88bf3391ab9f2f5af3646) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_sprite.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 9ef54688872a..9481ca9a3ae7 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -254,9 +254,6 @@ skl_update_plane(struct drm_plane *drm_plane, int scaler_id = plane_state->scaler_id; const struct intel_scaler *scaler; - DRM_DEBUG_KMS("plane = %d PS_PLANE_SEL(plane) = 0x%x\n", - plane_id, PS_PLANE_SEL(plane_id)); - scaler = &crtc_state->scaler_state.scalers[scaler_id]; I915_WRITE(SKL_PS_CTRL(pipe, scaler_id), -- cgit v1.2.3 From 6aef660370a9c246956ba6d01eebd8063c4214cb Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 10 Mar 2017 09:32:49 +0000 Subject: drm/i915: Fix forcewake active domain tracking In commit 003342a50021 ("drm/i915: Keep track of active forcewake domains in a bitmask") I forgot to adjust the newly introduce fw_domains_active state across reset. This caused the assert_forcewakes_inactive to trigger during suspend and resume if there were user held forcewakes. v2: Bitmask checks are required since vfuncs are not always present. v3: Move bitmask tracking to get/put vfunc for simplicity. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Fixes: 003342a50021 ("drm/i915: Keep track of active forcewake domains in a bitmask") Testcase: igt/drv_suspend/forcewake Cc: Tvrtko Ursulin Cc: "Paneri, Praveen" Cc: Chris Wilson Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: v4.10+ Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170310093249.4484-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit b8473050805f35add97f3ff57570d55a01808df5) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_uncore.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index abe08885a5ba..b7ff592b14f5 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -119,6 +119,8 @@ fw_domains_get(struct drm_i915_private *dev_priv, enum forcewake_domains fw_doma for_each_fw_domain_masked(d, fw_domains, dev_priv) fw_domain_wait_ack(d); + + dev_priv->uncore.fw_domains_active |= fw_domains; } static void @@ -130,6 +132,8 @@ fw_domains_put(struct drm_i915_private *dev_priv, enum forcewake_domains fw_doma fw_domain_put(d); fw_domain_posting_read(d); } + + dev_priv->uncore.fw_domains_active &= ~fw_domains; } static void @@ -240,10 +244,8 @@ intel_uncore_fw_release_timer(struct hrtimer *timer) if (WARN_ON(domain->wake_count == 0)) domain->wake_count++; - if (--domain->wake_count == 0) { + if (--domain->wake_count == 0) dev_priv->uncore.funcs.force_wake_put(dev_priv, domain->mask); - dev_priv->uncore.fw_domains_active &= ~domain->mask; - } spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); @@ -454,10 +456,8 @@ static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv, fw_domains &= ~domain->mask; } - if (fw_domains) { + if (fw_domains) dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains); - dev_priv->uncore.fw_domains_active |= fw_domains; - } } /** @@ -968,7 +968,6 @@ static noinline void ___force_wake_auto(struct drm_i915_private *dev_priv, fw_domain_arm_timer(domain); dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains); - dev_priv->uncore.fw_domains_active |= fw_domains; } static inline void __force_wake_auto(struct drm_i915_private *dev_priv, -- cgit v1.2.3 From 04166f48d9593af4513ae06c0f966c0cee300a20 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 13 Mar 2017 13:24:03 +0100 Subject: Revert "netfilter: nf_tables: add flush field to struct nft_set_iter" This reverts commit 1f48ff6c5393aa7fe290faf5d633164f105b0aa7. This patch is not required anymore now that we keep a dummy list of set elements in the bitmap set implementation, so revert this before we forget this code has no clients. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 1 - net/netfilter/nf_tables_api.c | 4 ---- 2 files changed, 5 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 70c5ca0c60b1..0136028652bd 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -232,7 +232,6 @@ struct nft_set_elem { struct nft_set; struct nft_set_iter { u8 genmask; - bool flush; unsigned int count; unsigned int skip; int err; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5e0ccfd5bb37..434c739dfeca 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3145,7 +3145,6 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, iter.count = 0; iter.err = 0; iter.fn = nf_tables_bind_check_setelem; - iter.flush = false; set->ops->walk(ctx, set, &iter); if (iter.err < 0) @@ -3399,7 +3398,6 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) args.iter.count = 0; args.iter.err = 0; args.iter.fn = nf_tables_dump_setelem; - args.iter.flush = false; set->ops->walk(&ctx, set, &args.iter); nla_nest_end(skb, nest); @@ -3963,7 +3961,6 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, struct nft_set_iter iter = { .genmask = genmask, .fn = nft_flush_set, - .flush = true, }; set->ops->walk(&ctx, set, &iter); @@ -5114,7 +5111,6 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, iter.count = 0; iter.err = 0; iter.fn = nf_tables_loop_check_setelem; - iter.flush = false; set->ops->walk(ctx, set, &iter); if (iter.err < 0) -- cgit v1.2.3 From c5f7c5a9a0f84c511a8a336491f9b8a3060b6517 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Mon, 6 Mar 2017 16:21:16 +0200 Subject: drivers, xen: convert grant_map.users from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: Boris Ostrovsky --- drivers/xen/gntdev.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index c77a0751a311..f3bf8f4e2d6c 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -86,7 +87,7 @@ struct grant_map { int index; int count; int flags; - atomic_t users; + refcount_t users; struct unmap_notify notify; struct ioctl_gntdev_grant_ref *grants; struct gnttab_map_grant_ref *map_ops; @@ -166,7 +167,7 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) add->index = 0; add->count = count; - atomic_set(&add->users, 1); + refcount_set(&add->users, 1); return add; @@ -212,7 +213,7 @@ static void gntdev_put_map(struct gntdev_priv *priv, struct grant_map *map) if (!map) return; - if (!atomic_dec_and_test(&map->users)) + if (!refcount_dec_and_test(&map->users)) return; atomic_sub(map->count, &pages_mapped); @@ -400,7 +401,7 @@ static void gntdev_vma_open(struct vm_area_struct *vma) struct grant_map *map = vma->vm_private_data; pr_debug("gntdev_vma_open %p\n", vma); - atomic_inc(&map->users); + refcount_inc(&map->users); } static void gntdev_vma_close(struct vm_area_struct *vma) @@ -1004,7 +1005,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) goto unlock_out; } - atomic_inc(&map->users); + refcount_inc(&map->users); vma->vm_ops = &gntdev_vmops; -- cgit v1.2.3 From 44fee88cea43d3c2cac962e0439cb10a3cabff6d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 13 Mar 2017 15:57:12 +0100 Subject: x86/tsc: Fix ART for TSC_KNOWN_FREQ Subhransu reported that convert_art_to_tsc() isn't working for him. The ART to TSC relation is only set up for systems which use the refined TSC calibration. Systems with known TSC frequency (available via CPUID 15) are not using the refined calibration and therefor the ART to TSC relation is never established. Add the setup to the known frequency init path which skips ART calibration. The init code needs to be duplicated as for systems which use refined calibration the ART setup must be delayed until calibration has been done. The problem has been there since the ART support was introdduced, but only detected now because Subhransu tested the first time on hardware which has TSC frequency enumerated via CPUID 15. Note for stable: The conditional has changed from TSC_RELIABLE to TSC_KNOWN_FREQUENCY. [ tglx: Rewrote changelog and identified the proper 'Fixes' commit ] Fixes: f9677e0f8308 ("x86/tsc: Always Running Timer (ART) correlated clocksource") Reported-by: "Prusty, Subhransu S" Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Cc: christopher.s.hall@intel.com Cc: kevin.b.stanton@intel.com Cc: john.stultz@linaro.org Cc: akataria@vmware.com Link: http://lkml.kernel.org/r/20170313145712.GI3312@twins.programming.kicks-ass.net Signed-off-by: Thomas Gleixner --- arch/x86/kernel/tsc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 4f7a9833d8e5..c73a7f9e881a 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1333,6 +1333,8 @@ static int __init init_tsc_clocksource(void) * the refined calibration and directly register it as a clocksource. */ if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) { + if (boot_cpu_has(X86_FEATURE_ART)) + art_related_clocksource = &clocksource_tsc; clocksource_register_khz(&clocksource_tsc, tsc_khz); return 0; } -- cgit v1.2.3 From 0d443b70cc92d741cbc1dcbf1079897b3d8bc3cc Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 7 Mar 2017 15:08:42 -0600 Subject: x86/platform: Remove warning message for duplicate NMI handlers Remove the WARNING message associated with multiple NMI handlers as there are at least two that are legitimate. These are the KGDB and the UV handlers and both want to be called if the NMI has not been claimed by any other NMI handler. Use of the UNKNOWN NMI call chain dramatically lowers the NMI call rate when high frequency NMI tools are in use, notably the perf tools. It is required on systems that cannot sustain a high NMI call rate without adversely affecting the system operation. Signed-off-by: Mike Travis Reviewed-by: Dimitri Sivanich Cc: Don Zickus Cc: Peter Zijlstra Cc: Russ Anderson Cc: Frank Ramsay Cc: Tony Ernst Link: http://lkml.kernel.org/r/20170307210841.730959611@asylum.americas.sgi.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/nmi.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index f088ea4c66e7..a723ae9440ab 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -166,11 +166,9 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action) spin_lock_irqsave(&desc->lock, flags); /* - * most handlers of type NMI_UNKNOWN never return because - * they just assume the NMI is theirs. Just a sanity check - * to manage expectations + * Indicate if there are multiple registrations on the + * internal NMI handler call chains (SERR and IO_CHECK). */ - WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head)); WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head)); WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head)); -- cgit v1.2.3 From c42f8218610aa09d7d3795e5810387673c1f84b6 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Thu, 9 Mar 2017 17:20:04 +0100 Subject: iio: sw-device: Fix config group initialization Use the IS_ENABLED() helper macro to ensure that the configfs group is initialized either when configfs is built-in or when configfs is built as a module. Otherwise software device creation will result in undefined behaviour when configfs is built as a module since the configfs group for the device not properly initialized. Similar to commit b2f0c09664b7 ("iio: sw-trigger: Fix config group initialization"). Fixes: 0f3a8c3f34f7 ("iio: Add support for creating IIO devices via configfs") Reported-by: Miguel Robles Signed-off-by: Lars-Peter Clausen Acked-by: Daniel Baluta Cc: Signed-off-by: Jonathan Cameron --- include/linux/iio/sw_device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/iio/sw_device.h b/include/linux/iio/sw_device.h index 23ca41515527..fa7931933067 100644 --- a/include/linux/iio/sw_device.h +++ b/include/linux/iio/sw_device.h @@ -62,7 +62,7 @@ void iio_swd_group_init_type_name(struct iio_sw_device *d, const char *name, struct config_item_type *type) { -#ifdef CONFIG_CONFIGFS_FS +#if IS_ENABLED(CONFIG_CONFIGFS_FS) config_group_init_type_name(&d->group, name, type); #endif } -- cgit v1.2.3 From c836e5cf0d0880d6668966476c4ffe727f29f69a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 8 Mar 2017 13:24:21 +0200 Subject: x86/platform/intel-mid: Use common power off sequence Intel Medfield may use common for Intel MID devices power sequence. Remove unneded custom power off stub. While here, remove function forward declaration. Signed-off-by: Andy Shevchenko Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170308112422.67533-1-andriy.shevchenko@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/platform/intel-mid/mfld.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c index e793fe509971..e42978d4deaf 100644 --- a/arch/x86/platform/intel-mid/mfld.c +++ b/arch/x86/platform/intel-mid/mfld.c @@ -17,16 +17,6 @@ #include "intel_mid_weak_decls.h" -static void penwell_arch_setup(void); -/* penwell arch ops */ -static struct intel_mid_ops penwell_ops = { - .arch_setup = penwell_arch_setup, -}; - -static void mfld_power_off(void) -{ -} - static unsigned long __init mfld_calibrate_tsc(void) { unsigned long fast_calibrate; @@ -63,9 +53,12 @@ static unsigned long __init mfld_calibrate_tsc(void) static void __init penwell_arch_setup(void) { x86_platform.calibrate_tsc = mfld_calibrate_tsc; - pm_power_off = mfld_power_off; } +static struct intel_mid_ops penwell_ops = { + .arch_setup = penwell_arch_setup, +}; + void *get_penwell_ops(void) { return &penwell_ops; -- cgit v1.2.3 From 859bb6d59066b96341020e0991f191631cabe59d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 8 Mar 2017 13:24:22 +0200 Subject: x86/platform/intel-mid: Add power button support for Merrifield Intel Merrifield platform has a Basin Cove PMIC to handle in particular power button events. Add necessary bits to enable it. Signed-off-by: Andy Shevchenko Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170308112422.67533-2-andriy.shevchenko@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/platform/intel-mid/device_libs/Makefile | 1 + .../device_libs/platform_mrfld_power_btn.c | 82 ++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile index a7dbec4dce27..3dbde04febdc 100644 --- a/arch/x86/platform/intel-mid/device_libs/Makefile +++ b/arch/x86/platform/intel-mid/device_libs/Makefile @@ -26,5 +26,6 @@ obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_pcal9555a.o obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o # MISC Devices obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o +obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_mrfld_power_btn.o obj-$(subst m,y,$(CONFIG_RTC_DRV_CMOS)) += platform_mrfld_rtc.o obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_mrfld_wdt.o diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c new file mode 100644 index 000000000000..a6c3705a28ad --- /dev/null +++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c @@ -0,0 +1,82 @@ +/* + * Intel Merrifield power button support + * + * (C) Copyright 2017 Intel Corporation + * + * Author: Andy Shevchenko + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include +#include +#include +#include + +#include +#include + +static struct resource mrfld_power_btn_resources[] = { + { + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device mrfld_power_btn_dev = { + .name = "msic_power_btn", + .id = PLATFORM_DEVID_NONE, + .num_resources = ARRAY_SIZE(mrfld_power_btn_resources), + .resource = mrfld_power_btn_resources, +}; + +static int mrfld_power_btn_scu_status_change(struct notifier_block *nb, + unsigned long code, void *data) +{ + if (code == SCU_DOWN) { + platform_device_unregister(&mrfld_power_btn_dev); + return 0; + } + + return platform_device_register(&mrfld_power_btn_dev); +} + +static struct notifier_block mrfld_power_btn_scu_notifier = { + .notifier_call = mrfld_power_btn_scu_status_change, +}; + +static int __init register_mrfld_power_btn(void) +{ + if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER) + return -ENODEV; + + /* + * We need to be sure that the SCU IPC is ready before + * PMIC power button device can be registered: + */ + intel_scu_notifier_add(&mrfld_power_btn_scu_notifier); + + return 0; +} +arch_initcall(register_mrfld_power_btn); + +static void __init *mrfld_power_btn_platform_data(void *info) +{ + struct resource *res = mrfld_power_btn_resources; + struct sfi_device_table_entry *pentry = info; + + res->start = res->end = pentry->irq; + return NULL; +} + +static const struct devs_id mrfld_power_btn_dev_id __initconst = { + .name = "bcove_power_btn", + .type = SFI_DEV_TYPE_IPC, + .delay = 1, + .msic = 1, + .get_platform_data = &mrfld_power_btn_platform_data, +}; + +sfi_device(mrfld_power_btn_dev_id); -- cgit v1.2.3 From 9aea151f282df2b82a44fd7058a239334437c266 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 26 Feb 2017 01:02:09 +0100 Subject: ARM: dts: add the AB8500 clocks to the device tree This adds the AB8500 clocks to the device tree using the new bindings from the clk subsystem, making audio work again. Cc: Lee Jones Cc: Ulf Hansson Signed-off-by: Linus Walleij Signed-off-by: Olof Johansson --- arch/arm/boot/dts/ste-dbx5x0.dtsi | 19 +++++++++++++++++++ arch/arm/boot/dts/ste-href.dtsi | 9 --------- arch/arm/boot/dts/ste-snowball.dts | 9 --------- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/arch/arm/boot/dts/ste-dbx5x0.dtsi b/arch/arm/boot/dts/ste-dbx5x0.dtsi index 82d8c4771293..162e1eb5373d 100644 --- a/arch/arm/boot/dts/ste-dbx5x0.dtsi +++ b/arch/arm/boot/dts/ste-dbx5x0.dtsi @@ -14,6 +14,7 @@ #include #include #include +#include #include "skeleton.dtsi" / { @@ -603,6 +604,11 @@ interrupt-controller; #interrupt-cells = <2>; + ab8500_clock: clock-controller { + compatible = "stericsson,ab8500-clk"; + #clock-cells = <1>; + }; + ab8500_gpio: ab8500-gpio { compatible = "stericsson,ab8500-gpio"; gpio-controller; @@ -686,6 +692,8 @@ ab8500-pwm { compatible = "stericsson,ab8500-pwm"; + clocks = <&ab8500_clock AB8500_SYSCLK_INT>; + clock-names = "intclk"; }; ab8500-debugfs { @@ -700,6 +708,9 @@ V-AMIC2-supply = <&ab8500_ldo_anamic2_reg>; V-DMIC-supply = <&ab8500_ldo_dmic_reg>; + clocks = <&ab8500_clock AB8500_SYSCLK_AUDIO>; + clock-names = "audioclk"; + stericsson,earpeice-cmv = <950>; /* Units in mV. */ }; @@ -1095,6 +1106,14 @@ status = "disabled"; }; + sound { + compatible = "stericsson,snd-soc-mop500"; + stericsson,cpu-dai = <&msp1 &msp3>; + stericsson,audio-codec = <&codec>; + clocks = <&prcmu_clk PRCMU_SYSCLK>, <&ab8500_clock AB8500_SYSCLK_ULP>, <&ab8500_clock AB8500_SYSCLK_INT>; + clock-names = "sysclk", "ulpclk", "intclk"; + }; + msp0: msp@80123000 { compatible = "stericsson,ux500-msp-i2s"; reg = <0x80123000 0x1000>; diff --git a/arch/arm/boot/dts/ste-href.dtsi b/arch/arm/boot/dts/ste-href.dtsi index f37f9e10713c..9e359e4f342e 100644 --- a/arch/arm/boot/dts/ste-href.dtsi +++ b/arch/arm/boot/dts/ste-href.dtsi @@ -186,15 +186,6 @@ status = "okay"; }; - sound { - compatible = "stericsson,snd-soc-mop500"; - - stericsson,cpu-dai = <&msp1 &msp3>; - stericsson,audio-codec = <&codec>; - clocks = <&prcmu_clk PRCMU_SYSCLK>; - clock-names = "sysclk"; - }; - msp0: msp@80123000 { pinctrl-names = "default"; pinctrl-0 = <&msp0_default_mode>; diff --git a/arch/arm/boot/dts/ste-snowball.dts b/arch/arm/boot/dts/ste-snowball.dts index dd5514def604..ade1d0d4e5f4 100644 --- a/arch/arm/boot/dts/ste-snowball.dts +++ b/arch/arm/boot/dts/ste-snowball.dts @@ -159,15 +159,6 @@ "", "", "", "", "", "", "", ""; }; - sound { - compatible = "stericsson,snd-soc-mop500"; - - stericsson,cpu-dai = <&msp1 &msp3>; - stericsson,audio-codec = <&codec>; - clocks = <&prcmu_clk PRCMU_SYSCLK>; - clock-names = "sysclk"; - }; - msp0: msp@80123000 { pinctrl-names = "default"; pinctrl-0 = <&msp0_default_mode>; -- cgit v1.2.3 From 6e7408acd04d06c04981c0c0fb5a2462b16fae4f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 12 Mar 2017 18:12:56 +0100 Subject: cpufreq: intel_pstate: Update pid_params.sample_rate_ns in pid_param_set() Fix the debugfs interface for PID tuning to actually update pid_params.sample_rate_ns on PID parameters updates, as changing pid_params.sample_rate_ms via debugfs has no effect now. Fixes: a4675fbc4a7a (cpufreq: intel_pstate: Replace timers with utilization update callbacks) Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/intel_pstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 3d37219a0dd7..f9fe910f3b83 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -989,6 +989,7 @@ static void intel_pstate_update_policies(void) static int pid_param_set(void *data, u64 val) { *(u32 *)data = val; + pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC; intel_pstate_reset_all_pid(); return 0; } -- cgit v1.2.3 From be3606ff739d1c1be36389f8737c577ad87e1f57 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Mon, 13 Mar 2017 19:33:37 +0300 Subject: x86/kasan: Fix boot with KASAN=y and PROFILE_ANNOTATED_BRANCHES=y The kernel doesn't boot with both PROFILE_ANNOTATED_BRANCHES=y and KASAN=y options selected. With branch profiling enabled we end up calling ftrace_likely_update() before kasan_early_init(). ftrace_likely_update() is built with KASAN instrumentation, so calling it before kasan has been initialized leads to crash. Use DISABLE_BRANCH_PROFILING define to make sure that we don't call ftrace_likely_update() from early code before kasan_early_init(). Fixes: ef7f0d6a6ca8 ("x86_64: add KASan support") Reported-by: Fengguang Wu Signed-off-by: Andrey Ryabinin Cc: kasan-dev@googlegroups.com Cc: Alexander Potapenko Cc: stable@vger.kernel.org Cc: Andrew Morton Cc: lkp@01.org Cc: Dmitry Vyukov Link: http://lkml.kernel.org/r/20170313163337.1704-1-aryabinin@virtuozzo.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/head64.c | 1 + arch/x86/mm/kasan_init_64.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 54a2372f5dbb..b5785c197e53 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -4,6 +4,7 @@ * Copyright (C) 2000 Andrea Arcangeli SuSE */ +#define DISABLE_BRANCH_PROFILING #include #include #include diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 8d63d7a104c3..4c90cfdc128b 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -1,3 +1,4 @@ +#define DISABLE_BRANCH_PROFILING #define pr_fmt(fmt) "kasan: " fmt #include #include -- cgit v1.2.3 From 09498087f922bb623b66db9e89c6fd11a3799867 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Thu, 9 Mar 2017 07:41:16 +0100 Subject: serial: 8250_dw: Honor clk_round_rate errors in dw8250_set_termios clk_round_rate returns a signed long and may possibly return errors in it, for example if there is no possible rate. Till now dw8250_set_termios ignored any error, the signednes and would just use the value as input to clk_set_rate. This of course falls apart if there is an actual error, so check for errors and only try to set a rate if the value is actually valid. This turned up on some Rockchip platforms after commit 6a171b299379 ("serial: 8250_dw: Allow hardware flow control to be used") enabled set_termios callback in all cases, not only ACPI. Fixes: 6a171b299379 ("serial: 8250_dw: Allow hardware flow control to be used") Signed-off-by: Heiko Stuebner Reviewed-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_dw.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index 6ee55a2d47bb..223ac234ddb2 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -257,7 +257,7 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios, { unsigned int baud = tty_termios_baud_rate(termios); struct dw8250_data *d = p->private_data; - unsigned int rate; + long rate; int ret; if (IS_ERR(d->clk) || !old) @@ -265,7 +265,10 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios, clk_disable_unprepare(d->clk); rate = clk_round_rate(d->clk, baud * 16); - ret = clk_set_rate(d->clk, rate); + if (rate < 0) + ret = rate; + else + ret = clk_set_rate(d->clk, rate); clk_prepare_enable(d->clk); if (!ret) -- cgit v1.2.3 From b15bfbe6427712d1b992bf806a6df9c05002a0a4 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Sat, 4 Mar 2017 13:09:58 +0000 Subject: serial: 8250_dw: Fix breakage when HAVE_CLK=n Commit 6a171b299379 ("serial: 8250_dw: Allow hardware flow control to be used") recently broke the 8250_dw driver on platforms which don't select HAVE_CLK, as dw8250_set_termios() gets confused by the behaviour of the fallback HAVE_CLK=n clock API in linux/clk.h which pretends everything is fine but returns (valid) NULL clocks and 0 HZ clock rates. That 0 rate is written into the uartclk resulting in a crash at boot, e.g. on Cavium Octeon III based UTM-8 we get something like this: 1180000000800.serial: ttyS0 at MMIO 0x1180000000800 (irq = 41, base_baud = 25000000) is a OCTEON ------------[ cut here ]------------ WARNING: CPU: 2 PID: 1 at drivers/tty/serial/serial_core.c:441 uart_get_baud_rate+0xfc/0x1f0 ... Call Trace: ... [] uart_get_baud_rate+0xfc/0x1f0 [] serial8250_do_set_termios+0xb0/0x440 [] uart_set_options+0xe8/0x190 [] serial8250_console_setup+0x84/0x158 [] univ8250_console_setup+0x54/0x70 [] register_console+0x1c8/0x418 [] uart_add_one_port+0x434/0x4b0 [] serial8250_register_8250_port+0x2d8/0x440 [] dw8250_probe+0x388/0x5e8 ... The clock API is defined such that NULL is a valid clock handle so it wouldn't be right to check explicitly for NULL. Instead treat a clk_round_rate() return value of 0 as an error which prevents uartclk being overwritten. Fixes: 6a171b299379 ("serial: 8250_dw: Allow hardware flow control to be used") Signed-off-by: James Hogan Cc: Kefeng Wang Cc: David Daney Cc: Russell King Cc: linux-serial@vger.kernel.org Cc: linux-clk@vger.kernel.org Cc: linux-mips@linux-mips.org Cc: bcm-kernel-feedback-list@broadcom.com Reviewed-by: Andy Shevchenko Reviewed-by: Jason Uy Reviewed-by: Heiko Stuebner Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_dw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index 223ac234ddb2..e65808c482f1 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -267,6 +267,8 @@ static void dw8250_set_termios(struct uart_port *p, struct ktermios *termios, rate = clk_round_rate(d->clk, baud * 16); if (rate < 0) ret = rate; + else if (rate == 0) + ret = -ENOENT; else ret = clk_set_rate(d->clk, rate); clk_prepare_enable(d->clk); -- cgit v1.2.3 From 542ed784671d4678406c77ed6dd01593a0cdbea1 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Tue, 28 Feb 2017 14:30:33 -0600 Subject: tty: acpi/spcr: QDF2400 E44 checks for wrong OEM revision For Qualcomm Technologies QDF2400 SOCs that are affected by erratum E44, the ACPI oem_revision field is actually set to 1, not 0. Fixes: d8a4995bcea1 ("tty: pl011: Work around QDF2400 E44 stuck BUSY bit") Tested-by: Manoj Iyer Signed-off-by: Timur Tabi Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/spcr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/spcr.c b/drivers/acpi/spcr.c index 01c94669a2b0..3afa8c1fa127 100644 --- a/drivers/acpi/spcr.c +++ b/drivers/acpi/spcr.c @@ -30,7 +30,7 @@ static bool qdf2400_erratum_44_present(struct acpi_table_header *h) return true; if (!memcmp(h->oem_table_id, "QDF2400 ", ACPI_OEM_TABLE_ID_SIZE) && - h->oem_revision == 0) + h->oem_revision == 1) return true; return false; -- cgit v1.2.3 From 3f8ed54aee491bbb83656592c2d0ad7b78d045ca Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 13 Mar 2017 18:30:12 -0700 Subject: cpufreq: intel_pstate: Correct frequency setting in the HWP mode In the functions intel_pstate_hwp_set(), min/max range from HWP capability MSR along with max_perf_pct and min_perf_pct, is used to set the HWP request MSR. In some cases this doesn't result in the correct HWP max/min in HWP request. For example: In the following case: HWP capabilities from MSR 0x771 0x70a1220 Here cpufreq min/max frequencies from above MSR dump are 700MHz and 3.2GHz respectively. This will result in hwp_min = 0x07 hwp_max = 0x20 To limit max frequency to 2GHz: perf_limits->max_perf_pct = 63 (2GHz as a percent of 3.2GHz rounded up) With the current calculation: adj_range = max_perf_pct * range / 100; adj_range = 63 * (32 - 7) / 100 adj_range = 15 max = hw_min + adj_range; max = 7 + 15 = 22 This will result in HWP request of 0x160f, which will result in a frequency cap of 2.2GHz not 2GHz. The problem with the above calculation is that hwp_min of 7 is treated as 0% in the range. But max_perf_pct is calculated with respect to minimum as 0 and max as 3.2GHz or hwp_max, so adding hwp_min to it will result in more than the desired. Since the min_perf_pct and max_perf_pct is already a percent of max frequency or hwp_max, this min/max HWP request value can be calculated directly applying these percentage to hwp_max. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index f9fe910f3b83..ee12641ee010 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -845,7 +845,7 @@ static struct freq_attr *hwp_cpufreq_attrs[] = { static void intel_pstate_hwp_set(struct cpufreq_policy *policy) { - int min, hw_min, max, hw_max, cpu, range, adj_range; + int min, hw_min, max, hw_max, cpu; struct perf_limits *perf_limits = limits; u64 value, cap; @@ -863,20 +863,17 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy) hw_max = HWP_GUARANTEED_PERF(cap); else hw_max = HWP_HIGHEST_PERF(cap); - range = hw_max - hw_min; max_perf_pct = perf_limits->max_perf_pct; min_perf_pct = perf_limits->min_perf_pct; + min = hw_max * min_perf_pct / 100; rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); - adj_range = min_perf_pct * range / 100; - min = hw_min + adj_range; + value &= ~HWP_MIN_PERF(~0L); value |= HWP_MIN_PERF(min); - adj_range = max_perf_pct * range / 100; - max = hw_min + adj_range; - + max = hw_max * max_perf_pct / 100; value &= ~HWP_MAX_PERF(~0L); value |= HWP_MAX_PERF(max); -- cgit v1.2.3 From 64ff64b90e62c860772fd0be50b7cfcef1d8a9b2 Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Fri, 10 Mar 2017 03:22:12 -0800 Subject: scsi: megaraid_sas: enable intx only if msix request fails Without this fix, driver will enable INTx Interrupt pin even though MSI-x vectors are enabled. See below lspci output. DisINTx is unset for MSIx setup. lspci -s 85:00.0 -vvv |grep INT |grep Control Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- DisINTx- After applying this fix, driver will enable INTx Interrupt pin only if Legacy interrupt method is required. See below lspci output. DisINTx is set for MSIx setup. lspci -s 85:00.0 -vvv |grep INT |grep Control Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- DisINTx+ Signed-off-by: Kashyap Desai Reviewed-by: Tomas Henzl Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 7ac9a9ee9bd4..016ffcebaf66 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -5034,10 +5034,12 @@ megasas_setup_irqs_msix(struct megasas_instance *instance, u8 is_probe) &instance->irq_context[j]); /* Retry irq register for IO_APIC*/ instance->msix_vectors = 0; - if (is_probe) + if (is_probe) { + pci_free_irq_vectors(instance->pdev); return megasas_setup_irqs_ioapic(instance); - else + } else { return -1; + } } } return 0; @@ -5277,9 +5279,11 @@ static int megasas_init_fw(struct megasas_instance *instance) MPI2_REPLY_POST_HOST_INDEX_OFFSET); } - i = pci_alloc_irq_vectors(instance->pdev, 1, 1, PCI_IRQ_LEGACY); - if (i < 0) - goto fail_setup_irqs; + if (!instance->msix_vectors) { + i = pci_alloc_irq_vectors(instance->pdev, 1, 1, PCI_IRQ_LEGACY); + if (i < 0) + goto fail_setup_irqs; + } dev_info(&instance->pdev->dev, "firmware supports msix\t: (%d)", fw_msix_count); -- cgit v1.2.3 From 49524b3c6e12375627ddd870613fcc6b24909898 Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Fri, 10 Mar 2017 03:22:13 -0800 Subject: scsi: megaraid_sas: add correct return type check for ldio hint logic for raid1 Return value check of atomic_dec_if_positive is required as it returns old value minus one. Without this fix, driver will send small ios to firmware path and that will be a performance issue. Not critical, but good to have r1_ldio_hint as default value in sdev private. Signed-off-by: Kashyap Desai Signed-off-by: Shivasharan S Reviewed-by: Tomas Henzl Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 3 +++ drivers/scsi/megaraid/megaraid_sas_fusion.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 016ffcebaf66..0016f12cc563 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -1963,6 +1963,9 @@ scan_target: if (!mr_device_priv_data) return -ENOMEM; sdev->hostdata = mr_device_priv_data; + + atomic_set(&mr_device_priv_data->r1_ldio_hint, + instance->r1_ldio_hint_default); return 0; } diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 29650ba669da..ebd746e2d97c 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -2338,7 +2338,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, fp_possible = false; atomic_dec(&instance->fw_outstanding); } else if ((scsi_buff_len > MR_LARGE_IO_MIN_SIZE) || - atomic_dec_if_positive(&mrdev_priv->r1_ldio_hint)) { + (atomic_dec_if_positive(&mrdev_priv->r1_ldio_hint) > 0)) { fp_possible = false; atomic_dec(&instance->fw_outstanding); if (scsi_buff_len > MR_LARGE_IO_MIN_SIZE) -- cgit v1.2.3 From 874d025da667d19b728141437ccbefe9dbaf9e7b Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Fri, 10 Mar 2017 03:22:14 -0800 Subject: scsi: megaraid_sas: raid6 also require cpuSel check same as raid5 Without this fix, raid6 performance will not be optimal. Signed-off-by: Kashyap Desai Signed-off-by: Shivasharan S Reviewed-by: Tomas Henzl Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_fusion.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index ebd746e2d97c..f990ab4d45e1 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -2159,7 +2159,7 @@ megasas_set_raidflag_cpu_affinity(union RAID_CONTEXT_UNION *praid_context, cpu_sel = MR_RAID_CTX_CPUSEL_1; if (is_stream_detected(rctx_g35) && - (raid->level == 5) && + ((raid->level == 5) || (raid->level == 6)) && (raid->writeMode == MR_RL_WRITE_THROUGH_MODE) && (cpu_sel == MR_RAID_CTX_CPUSEL_FCFS)) cpu_sel = MR_RAID_CTX_CPUSEL_0; -- cgit v1.2.3 From 22487b66594f18f2a7c211f3ab8bb02dec74d37b Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Fri, 10 Mar 2017 03:22:15 -0800 Subject: scsi: megaraid_sas: Driver version upgrade Signed-off-by: Kashyap Desai Signed-off-by: Shivasharan S Reviewed-by: Tomas Henzl Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index e7e5974e1a2c..2b209bbb4c91 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -35,8 +35,8 @@ /* * MegaRAID SAS Driver meta data */ -#define MEGASAS_VERSION "07.701.16.00-rc1" -#define MEGASAS_RELDATE "February 2, 2017" +#define MEGASAS_VERSION "07.701.17.00-rc1" +#define MEGASAS_RELDATE "March 2, 2017" /* * Device IDs -- cgit v1.2.3 From 23f963e91fd81f44f6b316b1c24db563354c6be8 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Mon, 13 Mar 2017 14:30:29 -0700 Subject: dmaengine: Fix array index out of bounds warning in __get_unmap_pool() This fixes the following warning when building with clang and CONFIG_DMA_ENGINE_RAID=n : drivers/dma/dmaengine.c:1102:11: error: array index 2 is past the end of the array (which contains 1 element) [-Werror,-Warray-bounds] return &unmap_pool[2]; ^ ~ drivers/dma/dmaengine.c:1083:1: note: array 'unmap_pool' declared here static struct dmaengine_unmap_pool unmap_pool[] = { ^ drivers/dma/dmaengine.c:1104:11: error: array index 3 is past the end of the array (which contains 1 element) [-Werror,-Warray-bounds] return &unmap_pool[3]; ^ ~ drivers/dma/dmaengine.c:1083:1: note: array 'unmap_pool' declared here static struct dmaengine_unmap_pool unmap_pool[] = { Signed-off-by: Matthias Kaehlcke Reviewed-by: Dan Williams Signed-off-by: Vinod Koul --- drivers/dma/dmaengine.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 24e0221fd66d..d9118ec23025 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -1108,12 +1108,14 @@ static struct dmaengine_unmap_pool *__get_unmap_pool(int nr) switch (order) { case 0 ... 1: return &unmap_pool[0]; +#if IS_ENABLED(CONFIG_DMA_ENGINE_RAID) case 2 ... 4: return &unmap_pool[1]; case 5 ... 7: return &unmap_pool[2]; case 8: return &unmap_pool[3]; +#endif default: BUG(); return NULL; -- cgit v1.2.3 From b0addd3fa6bcd119be9428996d5d4522479ab240 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Mar 2017 13:47:48 +0100 Subject: USB: idmouse: fix NULL-deref at probe Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer should a malicious device lack endpoints. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/idmouse.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/misc/idmouse.c b/drivers/usb/misc/idmouse.c index 8b9fd7534f69..502bfe30a077 100644 --- a/drivers/usb/misc/idmouse.c +++ b/drivers/usb/misc/idmouse.c @@ -347,6 +347,9 @@ static int idmouse_probe(struct usb_interface *interface, if (iface_desc->desc.bInterfaceClass != 0x0A) return -ENODEV; + if (iface_desc->desc.bNumEndpoints < 1) + return -ENODEV; + /* allocate memory for our device state and initialize it */ dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (dev == NULL) -- cgit v1.2.3 From 1dc56c52d2484be09c7398a5207d6b11a4256be9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Mar 2017 13:47:49 +0100 Subject: USB: lvtest: fix NULL-deref at probe Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer should the probed device lack endpoints. Note that this driver does not bind to any devices by default. Fixes: ce21bfe603b3 ("USB: Add LVS Test device driver") Cc: stable # 3.17 Cc: Pratyush Anand Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/lvstest.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/misc/lvstest.c b/drivers/usb/misc/lvstest.c index 77176511658f..d3d124753266 100644 --- a/drivers/usb/misc/lvstest.c +++ b/drivers/usb/misc/lvstest.c @@ -366,6 +366,10 @@ static int lvs_rh_probe(struct usb_interface *intf, hdev = interface_to_usbdev(intf); desc = intf->cur_altsetting; + + if (desc->desc.bNumEndpoints < 1) + return -ENODEV; + endpoint = &desc->endpoint[0].desc; /* valid only for SS root hub */ -- cgit v1.2.3 From f259ca3eed6e4b79ac3d5c5c9fb259fb46e86217 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Mar 2017 13:47:50 +0100 Subject: USB: uss720: fix NULL-deref at probe Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer or accessing memory beyond the endpoint array should a malicious device lack the expected endpoints. Note that the endpoint access that causes the NULL-deref is currently only used for debugging purposes during probe so the oops only happens when dynamic debugging is enabled. This means the driver could be rewritten to continue to accept device with only two endpoints, should such devices exist. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/uss720.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c index e45a3a680db8..07014cad6dbe 100644 --- a/drivers/usb/misc/uss720.c +++ b/drivers/usb/misc/uss720.c @@ -709,6 +709,11 @@ static int uss720_probe(struct usb_interface *intf, interface = intf->cur_altsetting; + if (interface->desc.bNumEndpoints < 3) { + usb_put_dev(usbdev); + return -ENODEV; + } + /* * Allocate parport interface */ -- cgit v1.2.3 From 03ace948a4eb89d1cf51c06afdfc41ebca5fdb27 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Mar 2017 13:47:51 +0100 Subject: USB: wusbcore: fix NULL-deref at probe Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer or accessing memory beyond the endpoint array should a malicious device lack the expected endpoints. This specifically fixes the NULL-pointer dereference when probing HWA HC devices. Fixes: df3654236e31 ("wusb: add the Wire Adapter (WA) core") Cc: stable # 2.6.28 Cc: Inaky Perez-Gonzalez Cc: David Vrabel Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/wusbcore/wa-hc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/wusbcore/wa-hc.c b/drivers/usb/wusbcore/wa-hc.c index 252c7bd9218a..d01496fd27fe 100644 --- a/drivers/usb/wusbcore/wa-hc.c +++ b/drivers/usb/wusbcore/wa-hc.c @@ -39,6 +39,9 @@ int wa_create(struct wahc *wa, struct usb_interface *iface, int result; struct device *dev = &iface->dev; + if (iface->cur_altsetting->desc.bNumEndpoints < 3) + return -ENODEV; + result = wa_rpipes_create(wa); if (result < 0) goto error_rpipes_create; -- cgit v1.2.3 From daf229b15907fbfdb6ee183aac8ca428cb57e361 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Mar 2017 13:47:52 +0100 Subject: uwb: hwa-rc: fix NULL-deref at probe Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer should a malicious device lack endpoints. Note that the dereference happens in the start callback which is called during probe. Fixes: de520b8bd552 ("uwb: add HWA radio controller driver") Cc: stable # 2.6.28 Cc: Inaky Perez-Gonzalez Cc: David Vrabel Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/uwb/hwa-rc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/uwb/hwa-rc.c b/drivers/uwb/hwa-rc.c index 0aa6c3c29d17..35a1e777b449 100644 --- a/drivers/uwb/hwa-rc.c +++ b/drivers/uwb/hwa-rc.c @@ -823,6 +823,9 @@ static int hwarc_probe(struct usb_interface *iface, struct hwarc *hwarc; struct device *dev = &iface->dev; + if (iface->cur_altsetting->desc.bNumEndpoints < 1) + return -ENODEV; + result = -ENOMEM; uwb_rc = uwb_rc_alloc(); if (uwb_rc == NULL) { -- cgit v1.2.3 From 4ce362711d78a4999011add3115b8f4b0bc25e8c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Mar 2017 13:47:53 +0100 Subject: uwb: i1480-dfu: fix NULL-deref at probe Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer should a malicious device lack endpoints. Note that the dereference happens in the cmd and wait_init_done callbacks which are called during probe. Fixes: 1ba47da52712 ("uwb: add the i1480 DFU driver") Cc: stable # 2.6.28 Cc: Inaky Perez-Gonzalez Cc: David Vrabel Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/uwb/i1480/dfu/usb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/uwb/i1480/dfu/usb.c b/drivers/uwb/i1480/dfu/usb.c index 2bfc846ac071..6345e85822a4 100644 --- a/drivers/uwb/i1480/dfu/usb.c +++ b/drivers/uwb/i1480/dfu/usb.c @@ -362,6 +362,9 @@ int i1480_usb_probe(struct usb_interface *iface, const struct usb_device_id *id) result); } + if (iface->cur_altsetting->desc.bNumEndpoints < 1) + return -ENODEV; + result = -ENOMEM; i1480_usb = kzalloc(sizeof(*i1480_usb), GFP_KERNEL); if (i1480_usb == NULL) { -- cgit v1.2.3 From 3243367b209faed5c320a4e5f9a565ee2a2ba958 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Mon, 13 Mar 2017 20:50:08 +0100 Subject: usb-core: Add LINEAR_FRAME_INTR_BINTERVAL USB quirk Some USB 2.0 devices erroneously report millisecond values in bInterval. The generic config code manages to catch most of them, but in some cases it's not completely enough. The case at stake here is a USB 2.0 braille device, which wants to announce 10ms and thus sets bInterval to 10, but with the USB 2.0 computation that yields to 64ms. It happens that one can type fast enough to reach this interval and get the device buffers overflown, leading to problematic latencies. The generic config code does not catch this case because the 64ms is considered a sane enough value. This change thus adds a USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL quirk to mark devices which actually report milliseconds in bInterval, and marks Vario Ultra devices as needing it. Signed-off-by: Samuel Thibault Acked-by: Alan Stern Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/config.c | 10 ++++++++++ drivers/usb/core/quirks.c | 8 ++++++++ include/linux/usb/quirks.h | 6 ++++++ 3 files changed, 24 insertions(+) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 25dbd8c7aec7..4be52c602e9b 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -280,6 +280,16 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, /* * Adjust bInterval for quirked devices. + */ + /* + * This quirk fixes bIntervals reported in ms. + */ + if (to_usb_device(ddev)->quirks & + USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL) { + n = clamp(fls(d->bInterval) + 3, i, j); + i = j = n; + } + /* * This quirk fixes bIntervals reported in * linear microframes. */ diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 24f9f98968a5..96b21b0dac1e 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -170,6 +170,14 @@ static const struct usb_device_id usb_quirk_list[] = { /* M-Systems Flash Disk Pioneers */ { USB_DEVICE(0x08ec, 0x1000), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Baum Vario Ultra */ + { USB_DEVICE(0x0904, 0x6101), .driver_info = + USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL }, + { USB_DEVICE(0x0904, 0x6102), .driver_info = + USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL }, + { USB_DEVICE(0x0904, 0x6103), .driver_info = + USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL }, + /* Keytouch QWERTY Panel keyboard */ { USB_DEVICE(0x0926, 0x3333), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h index 1d0043dc34e4..de2a722fe3cf 100644 --- a/include/linux/usb/quirks.h +++ b/include/linux/usb/quirks.h @@ -50,4 +50,10 @@ /* device can't handle Link Power Management */ #define USB_QUIRK_NO_LPM BIT(10) +/* + * Device reports its bInterval as linear frames instead of the + * USB 2.0 calculation. + */ +#define USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL BIT(11) + #endif /* __LINUX_USB_QUIRKS_H */ -- cgit v1.2.3 From 0090114d336a9604aa2d90bc83f20f7cd121b76c Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Fri, 10 Mar 2017 14:43:35 -0600 Subject: usb: musb: cppi41: don't check early-TX-interrupt for Isoch transfer The CPPI 4.1 driver polls register to workaround the premature TX interrupt issue, but it causes audio playback underrun when triggered in Isoch transfers. Isoch doesn't do back-to-back transfers, the TX should be done by the time the next transfer is scheduled. So skip this polling workaround for Isoch transfer. Fixes: a655f481d83d6 ("usb: musb: musb_cppi41: handle pre-mature TX complete interrupt") Cc: #4.1+ Reported-by: Alexandre Bailon Acked-by: Sebastian Andrzej Siewior Tested-by: Alexandre Bailon Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_cppi41.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/usb/musb/musb_cppi41.c b/drivers/usb/musb/musb_cppi41.c index 00e272bfee39..355655f8a3fb 100644 --- a/drivers/usb/musb/musb_cppi41.c +++ b/drivers/usb/musb/musb_cppi41.c @@ -238,8 +238,27 @@ static void cppi41_dma_callback(void *private_data, transferred < cppi41_channel->packet_sz) cppi41_channel->prog_len = 0; - if (cppi41_channel->is_tx) - empty = musb_is_tx_fifo_empty(hw_ep); + if (cppi41_channel->is_tx) { + u8 type; + + if (is_host_active(musb)) + type = hw_ep->out_qh->type; + else + type = hw_ep->ep_in.type; + + if (type == USB_ENDPOINT_XFER_ISOC) + /* + * Don't use the early-TX-interrupt workaround below + * for Isoch transfter. Since Isoch are periodic + * transfer, by the time the next transfer is + * scheduled, the current one should be done already. + * + * This avoids audio playback underrun issue. + */ + empty = true; + else + empty = musb_is_tx_fifo_empty(hw_ep); + } if (!cppi41_channel->is_tx || empty) { cppi41_trans_done(cppi41_channel); -- cgit v1.2.3 From 6b7ad496084e3d8ffa844a02e0f7f76f3eb82203 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Fri, 10 Mar 2017 14:43:36 -0600 Subject: usb: musb: dsps: fix iounmap in error and exit paths Cleanly iounmap the pointer in error and exit paths. Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_dsps.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c index 7c047c4a2565..9c7ee26ef388 100644 --- a/drivers/usb/musb/musb_dsps.c +++ b/drivers/usb/musb/musb_dsps.c @@ -933,7 +933,7 @@ static int dsps_probe(struct platform_device *pdev) if (usb_get_dr_mode(&pdev->dev) == USB_DR_MODE_PERIPHERAL) { ret = dsps_setup_optional_vbus_irq(pdev, glue); if (ret) - return ret; + goto err_iounmap; } platform_set_drvdata(pdev, glue); @@ -946,6 +946,8 @@ static int dsps_probe(struct platform_device *pdev) err: pm_runtime_disable(&pdev->dev); +err_iounmap: + iounmap(glue->usbss_base); return ret; } @@ -956,6 +958,7 @@ static int dsps_remove(struct platform_device *pdev) platform_device_unregister(glue->musb); pm_runtime_disable(&pdev->dev); + iounmap(glue->usbss_base); return 0; } -- cgit v1.2.3 From bc1e2154542071e3cfe1734b143af9b8bdacf8bd Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Fri, 10 Mar 2017 14:43:37 -0600 Subject: usb: musb: fix possible spinlock deadlock The DSPS glue calls del_timer_sync() in its musb_platform_disable() implementation, which requires the caller to not hold a lock. But musb_remove() calls musb_platform_disable() will musb->lock held. This could causes spinlock deadlock. So change musb_remove() to call musb_platform_disable() without holds musb->lock. This doesn't impact the musb_platform_disable implementation in other glue drivers. root@am335x-evm:~# modprobe -r musb-dsps [ 126.134879] musb-hdrc musb-hdrc.1: remove, state 1 [ 126.140465] usb usb2: USB disconnect, device number 1 [ 126.146178] usb 2-1: USB disconnect, device number 2 [ 126.416985] musb-hdrc musb-hdrc.1: USB bus 2 deregistered [ 126.423943] [ 126.425525] ====================================================== [ 126.431997] [ INFO: possible circular locking dependency detected ] [ 126.438564] 4.11.0-rc1-00003-g1557f13bca04-dirty #77 Not tainted [ 126.444852] ------------------------------------------------------- [ 126.451414] modprobe/778 is trying to acquire lock: [ 126.456523] (((&glue->timer))){+.-...}, at: [] del_timer_sync+0x0/0xd0 [ 126.464403] [ 126.464403] but task is already holding lock: [ 126.470511] (&(&musb->lock)->rlock){-.-...}, at: [] musb_remove+0x50/0x1 30 [musb_hdrc] [ 126.479965] [ 126.479965] which lock already depends on the new lock. [ 126.479965] [ 126.488531] [ 126.488531] the existing dependency chain (in reverse order) is: [ 126.496368] [ 126.496368] -> #1 (&(&musb->lock)->rlock){-.-...}: [ 126.502968] otg_timer+0x80/0xec [musb_dsps] [ 126.507990] call_timer_fn+0xb4/0x390 [ 126.512372] expire_timers+0xf0/0x1fc [ 126.516754] run_timer_softirq+0x80/0x178 [ 126.521511] __do_softirq+0xc4/0x554 [ 126.525802] irq_exit+0xe8/0x158 [ 126.529735] __handle_domain_irq+0x58/0xb8 [ 126.534583] __irq_usr+0x54/0x80 [ 126.538507] [ 126.538507] -> #0 (((&glue->timer))){+.-...}: [ 126.544636] del_timer_sync+0x40/0xd0 [ 126.549066] musb_remove+0x6c/0x130 [musb_hdrc] [ 126.554370] platform_drv_remove+0x24/0x3c [ 126.559206] device_release_driver_internal+0x14c/0x1e0 [ 126.565225] bus_remove_device+0xd8/0x108 [ 126.569970] device_del+0x1e4/0x308 [ 126.574170] platform_device_del+0x24/0x8c [ 126.579006] platform_device_unregister+0xc/0x20 [ 126.584394] dsps_remove+0x14/0x30 [musb_dsps] [ 126.589595] platform_drv_remove+0x24/0x3c [ 126.594432] device_release_driver_internal+0x14c/0x1e0 [ 126.600450] driver_detach+0x38/0x6c [ 126.604740] bus_remove_driver+0x4c/0xa0 [ 126.609407] SyS_delete_module+0x11c/0x1e4 [ 126.614252] __sys_trace_return+0x0/0x10 Fixes: ea2f35c01d5ea ("usb: musb: Fix sleeping function called from invalid context for hdrc glue") Cc: #4.9+ Acked-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index d8bae6ca8904..0c3664ab705e 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -2490,8 +2490,8 @@ static int musb_remove(struct platform_device *pdev) musb_host_cleanup(musb); musb_gadget_cleanup(musb); - spin_lock_irqsave(&musb->lock, flags); musb_platform_disable(musb); + spin_lock_irqsave(&musb->lock, flags); musb_disable_interrupts(musb); musb_writeb(musb->mregs, MUSB_DEVCTL, 0); spin_unlock_irqrestore(&musb->lock, flags); -- cgit v1.2.3 From 0043c1dfbec7b6e2427409059b05347d6f51aa9f Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 8 Feb 2017 09:24:25 +0000 Subject: serial: st-asc: Use new GPIOD API to obtain RTS pin The commits mentioned below adapt the GPIO API to allow more information to be passed directly through devm_get_gpiod_from_child() in the first instance. This facilitates the removal of subsequent calls, such as gpiod_direction_output(). This patch firstly moves to utilise the new API and secondly removes the now superfluous call do set the direction. Reported-by: Stephen Rothwell Suggested-by: Boris Brezillon Signed-off-by: Lee Jones [Also drop the header file dummies that only this driver was using] Acked-by: Greg Kroah-Hartman Signed-off-by: Linus Walleij --- drivers/tty/serial/st-asc.c | 11 ++++++----- include/linux/gpio/consumer.h | 16 ---------------- 2 files changed, 6 insertions(+), 21 deletions(-) diff --git a/drivers/tty/serial/st-asc.c b/drivers/tty/serial/st-asc.c index bcf1d33e6ffe..c334bcc59c64 100644 --- a/drivers/tty/serial/st-asc.c +++ b/drivers/tty/serial/st-asc.c @@ -575,12 +575,13 @@ static void asc_set_termios(struct uart_port *port, struct ktermios *termios, pinctrl_select_state(ascport->pinctrl, ascport->states[NO_HW_FLOWCTRL]); - gpiod = devm_get_gpiod_from_child(port->dev, "rts", - &np->fwnode); - if (!IS_ERR(gpiod)) { - gpiod_direction_output(gpiod, 0); + gpiod = devm_fwnode_get_gpiod_from_child(port->dev, + "rts", + &np->fwnode, + GPIOD_OUT_LOW, + np->name); + if (!IS_ERR(gpiod)) ascport->rts = gpiod; - } } } diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 2484b2fcc6eb..933d93656605 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -143,15 +143,6 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev, struct fwnode_handle *child, enum gpiod_flags flags, const char *label); -/* FIXME: delete this helper when users are switched over */ -static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev, - const char *con_id, struct fwnode_handle *child) -{ - return devm_fwnode_get_index_gpiod_from_child(dev, con_id, - 0, child, - GPIOD_ASIS, - "?"); -} #else /* CONFIG_GPIOLIB */ @@ -444,13 +435,6 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev, return ERR_PTR(-ENOSYS); } -/* FIXME: delete this when all users are switched over */ -static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev, - const char *con_id, struct fwnode_handle *child) -{ - return ERR_PTR(-ENOSYS); -} - #endif /* CONFIG_GPIOLIB */ static inline -- cgit v1.2.3 From 6e9f44eaaef0df7b846e9316fa9ca72a02025d44 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 9 Mar 2017 11:32:28 -0600 Subject: USB: serial: option: add Quectel UC15, UC20, EC21, and EC25 modems Add Quectel UC15, UC20, EC21, and EC25. The EC20 is handled by qcserial due to a USB VID/PID conflict with an existing Acer device. Signed-off-by: Dan Williams Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 42cc72e54c05..af67a0de6b5d 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -233,6 +233,14 @@ static void option_instat_callback(struct urb *urb); #define BANDRICH_PRODUCT_1012 0x1012 #define QUALCOMM_VENDOR_ID 0x05C6 +/* These Quectel products use Qualcomm's vendor ID */ +#define QUECTEL_PRODUCT_UC20 0x9003 +#define QUECTEL_PRODUCT_UC15 0x9090 + +#define QUECTEL_VENDOR_ID 0x2c7c +/* These Quectel products use Quectel's vendor ID */ +#define QUECTEL_PRODUCT_EC21 0x0121 +#define QUECTEL_PRODUCT_EC25 0x0125 #define CMOTECH_VENDOR_ID 0x16d8 #define CMOTECH_PRODUCT_6001 0x6001 @@ -1161,7 +1169,14 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x0023)}, /* ONYX 3G device */ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000)}, /* SIMCom SIM5218 */ - { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9003), /* Quectel UC20 */ + /* Quectel products using Qualcomm vendor ID */ + { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC15)}, + { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC20), + .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, + /* Quectel products using Quectel vendor ID */ + { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21), + .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, + { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC25), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, -- cgit v1.2.3 From 60b89f1928af80b546b5c3fd8714a62f6f4b8844 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Tue, 14 Mar 2017 09:38:04 +0100 Subject: ARM: at91: pm: cpu_idle: switch DDR to power-down mode On some DDR controllers, compatible with the sama5d3 one, the sequence to enter/exit/re-enter the self-refresh mode adds more constrains than what is currently written in the at91_idle driver. An actual access to the DDR chip is needed between exit and re-enter of this mode which is somehow difficult to implement. This sequence can completely hang the SoC. It is particularly experienced on parts which embed a L2 cache if the code run between IDLE calls fits in it... Moreover, as the intention is to enter and exit pretty rapidly from IDLE, the power-down mode is a good candidate. So now we use power-down instead of self-refresh. As we can simplify the code for sama5d3 compatible DDR controllers, we instantiate a new sama5d3_ddr_standby() function. Signed-off-by: Nicolas Ferre Cc: # v4.1+ Fixes: 017b5522d5e3 ("ARM: at91: Add new binding for sama5d3-ddramc") Signed-off-by: Alexandre Belloni --- arch/arm/mach-at91/pm.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index 3d89b7905bd9..a277981f414d 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -289,6 +289,22 @@ static void at91_ddr_standby(void) at91_ramc_write(1, AT91_DDRSDRC_LPR, saved_lpr1); } +static void sama5d3_ddr_standby(void) +{ + u32 lpr0; + u32 saved_lpr0; + + saved_lpr0 = at91_ramc_read(0, AT91_DDRSDRC_LPR); + lpr0 = saved_lpr0 & ~AT91_DDRSDRC_LPCB; + lpr0 |= AT91_DDRSDRC_LPCB_POWER_DOWN; + + at91_ramc_write(0, AT91_DDRSDRC_LPR, lpr0); + + cpu_do_idle(); + + at91_ramc_write(0, AT91_DDRSDRC_LPR, saved_lpr0); +} + /* We manage both DDRAM/SDRAM controllers, we need more than one value to * remember. */ @@ -323,7 +339,7 @@ static const struct of_device_id const ramc_ids[] __initconst = { { .compatible = "atmel,at91rm9200-sdramc", .data = at91rm9200_standby }, { .compatible = "atmel,at91sam9260-sdramc", .data = at91sam9_sdram_standby }, { .compatible = "atmel,at91sam9g45-ddramc", .data = at91_ddr_standby }, - { .compatible = "atmel,sama5d3-ddramc", .data = at91_ddr_standby }, + { .compatible = "atmel,sama5d3-ddramc", .data = sama5d3_ddr_standby }, { /*sentinel*/ } }; -- cgit v1.2.3 From da9a796f5475b4d3a339083af719982b7ab4a12b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Mar 2017 16:59:57 +0000 Subject: drm/i915: Split GEM resetting into 3 phases Currently we do a reset prepare/finish around the call to reset the GPU, but it looks like we need a later stage after the hw has been reinitialised to allow GEM to restart itself. Start by splitting the 2 GEM phases into 3: prepare - before the reset, check if GEM recovered, then stop GEM reset - after the reset, update GEM bookkeeping finish - after the re-initialisation following the reset, restart GEM Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170208143033.11651-2-chris@chris-wilson.co.uk Link: http://patchwork.freedesktop.org/patch/msgid/20170313165958.13970-1-chris@chris-wilson.co.uk (cherry picked from commit d80270931314a88d79d9bd5e0a5df93c12196375) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.c | 3 ++- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 7 ++++++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index e703556eba99..2093d203665d 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1788,7 +1788,7 @@ void i915_reset(struct drm_i915_private *dev_priv) goto error; } - i915_gem_reset_finish(dev_priv); + i915_gem_reset(dev_priv); intel_overlay_reset(dev_priv); /* Ok, now get things going again... */ @@ -1811,6 +1811,7 @@ void i915_reset(struct drm_i915_private *dev_priv) goto error; } + i915_gem_reset_finish(dev_priv); i915_queue_hangcheck(dev_priv); wakeup: diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7febe6eecf72..1d20c2d00f42 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3342,6 +3342,7 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error) } int i915_gem_reset_prepare(struct drm_i915_private *dev_priv); +void i915_gem_reset(struct drm_i915_private *dev_priv); void i915_gem_reset_finish(struct drm_i915_private *dev_priv); void i915_gem_set_wedged(struct drm_i915_private *dev_priv); void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 10777da73039..27fe5a9315f0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2834,7 +2834,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) engine->reset_hw(engine, request); } -void i915_gem_reset_finish(struct drm_i915_private *dev_priv) +void i915_gem_reset(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -2856,6 +2856,11 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv) } } +void i915_gem_reset_finish(struct drm_i915_private *dev_priv) +{ + lockdep_assert_held(&dev_priv->drm.struct_mutex); +} + static void nop_submit_request(struct drm_i915_gem_request *request) { dma_fence_set_error(&request->fence, -EIO); -- cgit v1.2.3 From 4565bf58d45b8aded94e446666839955cdb5030c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Mar 2017 16:59:58 +0000 Subject: drm/i915: Disable engine->irq_tasklet around resets When we restart the engines, and we have active requests, a request on the first engine may complete and queue a request to the second engine before we try to restart the second engine. That queueing of the request may race with the engine to restart, and so may corrupt the current state. Disabling the engine->irq_tasklet prevents the two paths from writing into ELSP simultaneously (and modifyin the execlists_port[] at the same time). Include fixup 1d309634bcf4 ("drm/i915: Kill the tasklet then disable") Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests") Testcase: igt/gem_exec_fence/await-hang Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170208143033.11651-3-chris@chris-wilson.co.uk Link: http://patchwork.freedesktop.org/patch/msgid/20170313165958.13970-2-chris@chris-wilson.co.uk (cherry picked from commit 1f7b847d72c3583df5048d83bd945d0c2c524c28) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 27fe5a9315f0..1051fdf37d20 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2719,7 +2719,16 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) { struct drm_i915_gem_request *request; + /* Prevent request submission to the hardware until we have + * completed the reset in i915_gem_reset_finish(). If a request + * is completed by one engine, it may then queue a request + * to a second via its engine->irq_tasklet *just* as we are + * calling engine->init_hw() and also writing the ELSP. + * Turning off the engine->irq_tasklet until the reset is over + * prevents the race. + */ tasklet_kill(&engine->irq_tasklet); + tasklet_disable(&engine->irq_tasklet); if (engine_stalled(engine)) { request = i915_gem_find_active_request(engine); @@ -2858,7 +2867,13 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) void i915_gem_reset_finish(struct drm_i915_private *dev_priv) { + struct intel_engine_cs *engine; + enum intel_engine_id id; + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + for_each_engine(engine, dev_priv, id) + tasklet_enable(&engine->irq_tasklet); } static void nop_submit_request(struct drm_i915_gem_request *request) -- cgit v1.2.3 From 35a3abfd198e6c69a6644784bb09a2d951fc6b21 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Mar 2017 17:02:31 +0000 Subject: drm/i915: Only enable hotplug interrupts if the display interrupts are enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to prevent accessing the hpd registers outside of the display power wells, we should refrain from writing to the registers before the display interrupts are enabled. [ 4.740136] WARNING: CPU: 1 PID: 221 at drivers/gpu/drm/i915/intel_uncore.c:795 __unclaimed_reg_debug+0x44/0x50 [i915] [ 4.740155] Unclaimed read from register 0x1e1110 [ 4.740168] Modules linked in: i915(+) intel_gtt drm_kms_helper prime_numbers [ 4.740190] CPU: 1 PID: 221 Comm: systemd-udevd Not tainted 4.10.0-rc6+ #384 [ 4.740203] Hardware name: / , BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015 [ 4.740220] Call Trace: [ 4.740236] dump_stack+0x4d/0x6f [ 4.740251] __warn+0xc1/0xe0 [ 4.740265] warn_slowpath_fmt+0x4a/0x50 [ 4.740281] ? insert_work+0x77/0xc0 [ 4.740355] ? fwtable_write32+0x90/0x130 [i915] [ 4.740431] __unclaimed_reg_debug+0x44/0x50 [i915] [ 4.740507] fwtable_read32+0xd8/0x130 [i915] [ 4.740575] i915_hpd_irq_setup+0xa5/0x100 [i915] [ 4.740649] intel_hpd_init+0x68/0x80 [i915] [ 4.740716] i915_driver_load+0xe19/0x1380 [i915] [ 4.740784] i915_pci_probe+0x32/0x90 [i915] [ 4.740799] pci_device_probe+0x8b/0xf0 [ 4.740815] driver_probe_device+0x2b6/0x450 [ 4.740828] __driver_attach+0xda/0xe0 [ 4.740841] ? driver_probe_device+0x450/0x450 [ 4.740853] bus_for_each_dev+0x5b/0x90 [ 4.740865] driver_attach+0x19/0x20 [ 4.740878] bus_add_driver+0x166/0x260 [ 4.740892] driver_register+0x5b/0xd0 [ 4.740906] ? 0xffffffffa0166000 [ 4.740920] __pci_register_driver+0x47/0x50 [ 4.740985] i915_init+0x5c/0x5e [i915] [ 4.740999] do_one_initcall+0x3e/0x160 [ 4.741015] ? __vunmap+0x7c/0xc0 [ 4.741029] ? kmem_cache_alloc+0xcf/0x120 [ 4.741045] do_init_module+0x55/0x1c4 [ 4.741060] load_module+0x1f3f/0x25b0 [ 4.741073] ? __symbol_put+0x40/0x40 [ 4.741086] ? kernel_read_file+0x100/0x190 [ 4.741100] SYSC_finit_module+0xbc/0xf0 [ 4.741112] SyS_finit_module+0x9/0x10 [ 4.741125] entry_SYSCALL_64_fastpath+0x17/0x98 [ 4.741135] RIP: 0033:0x7f8559a140f9 [ 4.741145] RSP: 002b:00007fff7509a3e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [ 4.741161] RAX: ffffffffffffffda RBX: 00007f855aba02d1 RCX: 00007f8559a140f9 [ 4.741172] RDX: 0000000000000000 RSI: 000055b6db0914f0 RDI: 0000000000000011 [ 4.741183] RBP: 0000000000020000 R08: 0000000000000000 R09: 000000000000000e [ 4.741193] R10: 0000000000000011 R11: 0000000000000246 R12: 000055b6db0854d0 [ 4.741204] R13: 000055b6db091150 R14: 0000000000000000 R15: 000055b6db035924 v2: Set dev_priv->display_irqs_enabled to true for all platforms other than vlv/chv that manually control the display power domain. Fixes: 19625e85c6ec ("drm/i915: Enable polling when we don't have hpd") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97798 Suggested-by: Ville Syrjälä Signed-off-by: Chris Wilson Cc: Lyude Cc: Daniel Vetter Cc: Ville Syrjälä Cc: Hans de Goede Cc: stable@vger.kernel.org Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170215131547.5064-1-chris@chris-wilson.co.uk Link: http://patchwork.freedesktop.org/patch/msgid/20170313170231.18633-1-chris@chris-wilson.co.uk (cherry picked from commit 262fd485ac6b476479f41f00bb104f6a1766ae66) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_irq.c | 10 ++++++++++ drivers/gpu/drm/i915/intel_hotplug.c | 14 ++++++++------ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index e6ffef2f707a..4fc8973744b4 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -4266,6 +4266,16 @@ void intel_irq_init(struct drm_i915_private *dev_priv) if (!IS_GEN2(dev_priv)) dev->vblank_disable_immediate = true; + /* Most platforms treat the display irq block as an always-on + * power domain. vlv/chv can disable it at runtime and need + * special care to avoid writing any of the display block registers + * outside of the power domain. We defer setting up the display irqs + * in this case to the runtime pm. + */ + dev_priv->display_irqs_enabled = true; + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + dev_priv->display_irqs_enabled = false; + dev->driver->get_vblank_timestamp = i915_get_vblank_timestamp; dev->driver->get_scanout_position = i915_get_crtc_scanoutpos; diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index b62e3f8ad415..54208bef7a83 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -219,7 +219,7 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work) } } } - if (dev_priv->display.hpd_irq_setup) + if (dev_priv->display_irqs_enabled && dev_priv->display.hpd_irq_setup) dev_priv->display.hpd_irq_setup(dev_priv); spin_unlock_irq(&dev_priv->irq_lock); @@ -425,7 +425,7 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, } } - if (storm_detected) + if (storm_detected && dev_priv->display_irqs_enabled) dev_priv->display.hpd_irq_setup(dev_priv); spin_unlock(&dev_priv->irq_lock); @@ -471,10 +471,12 @@ void intel_hpd_init(struct drm_i915_private *dev_priv) * Interrupt setup is already guaranteed to be single-threaded, this is * just to make the assert_spin_locked checks happy. */ - spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->display.hpd_irq_setup) - dev_priv->display.hpd_irq_setup(dev_priv); - spin_unlock_irq(&dev_priv->irq_lock); + if (dev_priv->display_irqs_enabled && dev_priv->display.hpd_irq_setup) { + spin_lock_irq(&dev_priv->irq_lock); + if (dev_priv->display_irqs_enabled) + dev_priv->display.hpd_irq_setup(dev_priv); + spin_unlock_irq(&dev_priv->irq_lock); + } } static void i915_hpd_poll_init_work(struct work_struct *work) -- cgit v1.2.3 From 0f5418e564ac6452b9086295646e602a9addc4bf Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 13 Mar 2017 17:04:33 +0000 Subject: drm/i915: Drop support for I915_EXEC_CONSTANTS_* execbuf parameters. This patch makes the I915_PARAM_HAS_EXEC_CONSTANTS getparam return 0 (indicating the optional feature is not supported), and makes execbuf always return -EINVAL if the flags are used. Apparently, no userspace ever shipped which used this optional feature: I checked the git history of Mesa, xf86-video-intel, libva, and Beignet, and there were zero commits showing a use of these flags. Kernel commit 72bfa19c8deb4 apparently introduced the feature prematurely. According to Chris, the intention was to use this in cairo-drm, but "the use was broken for gen6", so I don't think it ever happened. 'relative_constants_mode' has always been tracked per-device, but this has actually been wrong ever since hardware contexts were introduced, as the INSTPM register is saved (and automatically restored) as part of the render ring context. The software per-device value could therefore get out of sync with the hardware per-context value. This meant that using them is actually unsafe: a client which tried to use them could damage the state of other clients, causing the GPU to interpret their BO offsets as absolute pointers, leading to bogus memory reads. These flags were also never ported to execlist mode, making them no-ops on Gen9+ (which requires execlists), and Gen8 in the default mode. On Gen8+, userspace can write these registers directly, achieving the same effect. On Gen6-7.5, it likely makes sense to extend the command parser to support them. I don't think anyone wants this on Gen4-5. Based on a patch by Dave Gordon. v3: Return -ENODEV for the getparam, as this is what we do for other obsolete features. Suggested by Chris Wilson. Cc: stable@vger.kernel.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92448 Signed-off-by: Kenneth Graunke Reviewed-by: Joonas Lahtinen Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170215093446.21291-1-kenneth@whitecape.org Acked-by: Daniel Vetter Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170313170433.26843-1-chris@chris-wilson.co.uk (cherry picked from commit ef0f411f51475f4eebf9fc1b19a85be698af19ff) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.c | 4 +-- drivers/gpu/drm/i915/i915_drv.h | 2 -- drivers/gpu/drm/i915/i915_gem.c | 2 -- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 52 ++---------------------------- 4 files changed, 3 insertions(+), 57 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 2093d203665d..6cd78bb2064d 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -248,6 +248,7 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_IRQ_ACTIVE: case I915_PARAM_ALLOW_BATCHBUFFER: case I915_PARAM_LAST_DISPATCH: + case I915_PARAM_HAS_EXEC_CONSTANTS: /* Reject all old ums/dri params. */ return -ENODEV; case I915_PARAM_CHIPSET_ID: @@ -274,9 +275,6 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_BSD2: value = !!dev_priv->engine[VCS2]; break; - case I915_PARAM_HAS_EXEC_CONSTANTS: - value = INTEL_GEN(dev_priv) >= 4; - break; case I915_PARAM_HAS_LLC: value = HAS_LLC(dev_priv); break; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1d20c2d00f42..80be09831a52 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2064,8 +2064,6 @@ struct drm_i915_private { const struct intel_device_info info; - int relative_constants_mode; - void __iomem *regs; struct intel_uncore uncore; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1051fdf37d20..67b1fc5a0331 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4694,8 +4694,6 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) init_waitqueue_head(&dev_priv->gpu_error.wait_queue); init_waitqueue_head(&dev_priv->gpu_error.reset_queue); - dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; - init_waitqueue_head(&dev_priv->pending_flip_queue); dev_priv->mm.interruptible = true; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index d02cfaefe1c8..30e0675fd7da 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1408,10 +1408,7 @@ execbuf_submit(struct i915_execbuffer_params *params, struct drm_i915_gem_execbuffer2 *args, struct list_head *vmas) { - struct drm_i915_private *dev_priv = params->request->i915; u64 exec_start, exec_len; - int instp_mode; - u32 instp_mask; int ret; ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas); @@ -1422,56 +1419,11 @@ execbuf_submit(struct i915_execbuffer_params *params, if (ret) return ret; - instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; - instp_mask = I915_EXEC_CONSTANTS_MASK; - switch (instp_mode) { - case I915_EXEC_CONSTANTS_REL_GENERAL: - case I915_EXEC_CONSTANTS_ABSOLUTE: - case I915_EXEC_CONSTANTS_REL_SURFACE: - if (instp_mode != 0 && params->engine->id != RCS) { - DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); - return -EINVAL; - } - - if (instp_mode != dev_priv->relative_constants_mode) { - if (INTEL_INFO(dev_priv)->gen < 4) { - DRM_DEBUG("no rel constants on pre-gen4\n"); - return -EINVAL; - } - - if (INTEL_INFO(dev_priv)->gen > 5 && - instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) { - DRM_DEBUG("rel surface constants mode invalid on gen5+\n"); - return -EINVAL; - } - - /* The HW changed the meaning on this bit on gen6 */ - if (INTEL_INFO(dev_priv)->gen >= 6) - instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; - } - break; - default: - DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode); + if (args->flags & I915_EXEC_CONSTANTS_MASK) { + DRM_DEBUG("I915_EXEC_CONSTANTS_* unsupported\n"); return -EINVAL; } - if (params->engine->id == RCS && - instp_mode != dev_priv->relative_constants_mode) { - struct intel_ring *ring = params->request->ring; - - ret = intel_ring_begin(params->request, 4); - if (ret) - return ret; - - intel_ring_emit(ring, MI_NOOP); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, INSTPM); - intel_ring_emit(ring, instp_mask << 16 | instp_mode); - intel_ring_advance(ring); - - dev_priv->relative_constants_mode = instp_mode; - } - if (args->flags & I915_EXEC_GEN7_SOL_RESET) { ret = i915_reset_gen7_sol_offsets(params->request); if (ret) -- cgit v1.2.3 From 8f68d591d4765b2e1ce9d916ac7bc5583285c4ad Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Mar 2017 17:06:17 +0000 Subject: drm/i915: Stop using RP_DOWN_EI on Baytrail On Baytrail, we manually calculate busyness over the evaluation interval to avoid issues with miscaluations with RC6 enabled. However, it turns out that the DOWN_EI interrupt generator is completely bust - it operates in two modes, continuous or never. Neither of which are conducive to good behaviour. Stop unmask the DOWN_EI interrupt and just compute everything from the UP_EI which does seem to correspond to the desired interval. v2: Fixup gen6_rps_pm_mask() as well v3: Inline vlv_c0_above() to combine the now identical elapsed calculation for up/down and simplify the threshold testing Fixes: 43cf3bf084ba ("drm/i915: Improved w/a for rps on Baytrail") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: # v4.1+ Link: http://patchwork.freedesktop.org/patch/msgid/20170309211232.28878-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170313170617.31564-1-chris@chris-wilson.co.uk (cherry picked from commit e0e8c7cb6eb68e9256de2d8cbeb481d3701c05ac) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_irq.c | 73 ++++++++++++++++------------------------- drivers/gpu/drm/i915/intel_pm.c | 5 +-- 3 files changed, 32 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 80be09831a52..1e53c31b6826 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1325,7 +1325,7 @@ struct intel_gen6_power_mgmt { unsigned boosts; /* manual wa residency calculations */ - struct intel_rps_ei up_ei, down_ei; + struct intel_rps_ei ei; /* * Protects RPS/RC6 register access and PCU communication. diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 4fc8973744b4..b6c886ac901b 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1046,68 +1046,51 @@ static void vlv_c0_read(struct drm_i915_private *dev_priv, ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT); } -static bool vlv_c0_above(struct drm_i915_private *dev_priv, - const struct intel_rps_ei *old, - const struct intel_rps_ei *now, - int threshold) -{ - u64 time, c0; - unsigned int mul = 100; - - if (old->cz_clock == 0) - return false; - - if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH) - mul <<= 8; - - time = now->cz_clock - old->cz_clock; - time *= threshold * dev_priv->czclk_freq; - - /* Workload can be split between render + media, e.g. SwapBuffers - * being blitted in X after being rendered in mesa. To account for - * this we need to combine both engines into our activity counter. - */ - c0 = now->render_c0 - old->render_c0; - c0 += now->media_c0 - old->media_c0; - c0 *= mul * VLV_CZ_CLOCK_TO_MILLI_SEC; - - return c0 >= time; -} - void gen6_rps_reset_ei(struct drm_i915_private *dev_priv) { - vlv_c0_read(dev_priv, &dev_priv->rps.down_ei); - dev_priv->rps.up_ei = dev_priv->rps.down_ei; + memset(&dev_priv->rps.ei, 0, sizeof(dev_priv->rps.ei)); } static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) { + const struct intel_rps_ei *prev = &dev_priv->rps.ei; struct intel_rps_ei now; u32 events = 0; - if ((pm_iir & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) == 0) + if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0) return 0; vlv_c0_read(dev_priv, &now); if (now.cz_clock == 0) return 0; - if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) { - if (!vlv_c0_above(dev_priv, - &dev_priv->rps.down_ei, &now, - dev_priv->rps.down_threshold)) - events |= GEN6_PM_RP_DOWN_THRESHOLD; - dev_priv->rps.down_ei = now; - } + if (prev->cz_clock) { + u64 time, c0; + unsigned int mul; - if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) { - if (vlv_c0_above(dev_priv, - &dev_priv->rps.up_ei, &now, - dev_priv->rps.up_threshold)) - events |= GEN6_PM_RP_UP_THRESHOLD; - dev_priv->rps.up_ei = now; + mul = VLV_CZ_CLOCK_TO_MILLI_SEC * 100; /* scale to threshold% */ + if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH) + mul <<= 8; + + time = now.cz_clock - prev->cz_clock; + time *= dev_priv->czclk_freq; + + /* Workload can be split between render + media, + * e.g. SwapBuffers being blitted in X after being rendered in + * mesa. To account for this we need to combine both engines + * into our activity counter. + */ + c0 = now.render_c0 - prev->render_c0; + c0 += now.media_c0 - prev->media_c0; + c0 *= mul; + + if (c0 > time * dev_priv->rps.up_threshold) + events = GEN6_PM_RP_UP_THRESHOLD; + else if (c0 < time * dev_priv->rps.down_threshold) + events = GEN6_PM_RP_DOWN_THRESHOLD; } + dev_priv->rps.ei = now; return events; } @@ -4228,7 +4211,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv) /* Let's track the enabled rps events */ if (IS_VALLEYVIEW(dev_priv)) /* WaGsvRC0ResidencyMethod:vlv */ - dev_priv->pm_rps_events = GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED; + dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED; else dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 940bab22d464..6a29784d2b41 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4928,8 +4928,9 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) { u32 mask = 0; + /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */ if (val > dev_priv->rps.min_freq_softlimit) - mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; + mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; if (val < dev_priv->rps.max_freq_softlimit) mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; @@ -5039,7 +5040,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv) { mutex_lock(&dev_priv->rps.hw_lock); if (dev_priv->rps.enabled) { - if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) + if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED) gen6_rps_reset_ei(dev_priv); I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); -- cgit v1.2.3 From abf8315f71dc5a2ee56fb60830dcb2861982dc91 Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Tue, 31 Jan 2017 16:18:42 +0200 Subject: drm/tilcdc: Fix hardcoded fail-return value in tilcdc_crtc_create() Fix badly hardcoded return return value under fail-label. All goto branches to the label set the "ret"-variable accordingly. Signed-off-by: Jyri Sarha Reviewed-by: Gabriel Krisman Bertazi --- drivers/gpu/drm/tilcdc/tilcdc_crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c index f80bf9385e41..abcbcd9f5851 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c @@ -1036,5 +1036,5 @@ int tilcdc_crtc_create(struct drm_device *dev) fail: tilcdc_crtc_destroy(crtc); - return -ENOMEM; + return ret; } -- cgit v1.2.3 From 11abbc9f39e002a2b25657e00abac8056cb39e93 Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Wed, 1 Mar 2017 10:30:28 +0200 Subject: drm/tilcdc: Set framebuffer DMA address to HW only if CRTC is enabled Touching HW while clocks are off is a serious error and for instance breaks suspend functionality. After this patch tilcdc_crtc_update_fb() always updates the primary plane's framebuffer pointer, increases fb's reference count and stores vblank event. tilcdc_crtc_update_fb() only writes the fb's DMA address to HW if the crtc is enabled, as tilcdc_crtc_enable() takes care of writing the address on enable. This patch also refactors the tilcdc_crtc_update_fb() a bit. Number of subsequent small changes had made it almost unreadable. There should be no other functional changes but checking the CRTC's enable state. However, the locking goes a bit differently and some of the redundant checks have been removed in this new version. The enable_lock should be enough to protect the access to tilcdc_crtc->enabled. The irq_lock protects the access to last_vblank and next_fb. The check for vrefresh and last_vblank being valid is redundant, as the vrefresh should be always valid if the CRTC is enabled and now last_vblank should be too, because it is initialized to current time when CRTC raster is enabled. If for some reason the values are not correctly initialized the division by zero warning is quite appropriate. Signed-off-by: Jyri Sarha Reviewed-by: Tomi Valkeinen --- drivers/gpu/drm/tilcdc/tilcdc_crtc.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c index abcbcd9f5851..d745e8b50fb8 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c @@ -464,6 +464,7 @@ static void tilcdc_crtc_enable(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc); + unsigned long flags; WARN_ON(!drm_modeset_is_locked(&crtc->mutex)); mutex_lock(&tilcdc_crtc->enable_lock); @@ -484,7 +485,17 @@ static void tilcdc_crtc_enable(struct drm_crtc *crtc) tilcdc_write_mask(dev, LCDC_RASTER_CTRL_REG, LCDC_PALETTE_LOAD_MODE(DATA_ONLY), LCDC_PALETTE_LOAD_MODE_MASK); + + /* There is no real chance for a race here as the time stamp + * is taken before the raster DMA is started. The spin-lock is + * taken to have a memory barrier after taking the time-stamp + * and to avoid a context switch between taking the stamp and + * enabling the raster. + */ + spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags); + tilcdc_crtc->last_vblank = ktime_get(); tilcdc_set(dev, LCDC_RASTER_CTRL_REG, LCDC_RASTER_ENABLE); + spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags); drm_crtc_vblank_on(crtc); @@ -539,7 +550,6 @@ static void tilcdc_crtc_off(struct drm_crtc *crtc, bool shutdown) } drm_flip_work_commit(&tilcdc_crtc->unref_work, priv->wq); - tilcdc_crtc->last_vblank = 0; tilcdc_crtc->enabled = false; mutex_unlock(&tilcdc_crtc->enable_lock); @@ -602,7 +612,6 @@ int tilcdc_crtc_update_fb(struct drm_crtc *crtc, { struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc); struct drm_device *dev = crtc->dev; - unsigned long flags; WARN_ON(!drm_modeset_is_locked(&crtc->mutex)); @@ -614,28 +623,30 @@ int tilcdc_crtc_update_fb(struct drm_crtc *crtc, drm_framebuffer_reference(fb); crtc->primary->fb = fb; + tilcdc_crtc->event = event; - spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags); + mutex_lock(&tilcdc_crtc->enable_lock); - if (crtc->hwmode.vrefresh && ktime_to_ns(tilcdc_crtc->last_vblank)) { + if (tilcdc_crtc->enabled) { + unsigned long flags; ktime_t next_vblank; s64 tdiff; - next_vblank = ktime_add_us(tilcdc_crtc->last_vblank, - 1000000 / crtc->hwmode.vrefresh); + spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags); + next_vblank = ktime_add_us(tilcdc_crtc->last_vblank, + 1000000 / crtc->hwmode.vrefresh); tdiff = ktime_to_us(ktime_sub(next_vblank, ktime_get())); if (tdiff < TILCDC_VBLANK_SAFETY_THRESHOLD_US) tilcdc_crtc->next_fb = fb; - } - - if (tilcdc_crtc->next_fb != fb) - set_scanout(crtc, fb); + else + set_scanout(crtc, fb); - tilcdc_crtc->event = event; + spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags); + } - spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags); + mutex_unlock(&tilcdc_crtc->enable_lock); return 0; } -- cgit v1.2.3 From 1b2e5ea0b7061be3ffdcd85918c2f428edace4ba Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 12 Feb 2017 17:19:59 +0000 Subject: drm/i915: Always call i915_gem_reset_finish() following i915_gem_reset_prepare() As i915_gem_reset_finish() undoes the steps from i915_gem_reset_prepare() to leave the system in a fully-working state, e.g. to be able to free the breadcrumb signal threads, make sure that we always call it even on the error path. Fixes: da9a796f5475 ("drm/i915: Split GEM resetting into 3 phases") Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170212172002.23072-2-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala (cherry picked from commit 8d613c539c74fa9055f88f4116196d7c820bd98f) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 6cd78bb2064d..1c75402a59c1 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1809,10 +1809,10 @@ void i915_reset(struct drm_i915_private *dev_priv) goto error; } - i915_gem_reset_finish(dev_priv); i915_queue_hangcheck(dev_priv); wakeup: + i915_gem_reset_finish(dev_priv); enable_irq(dev_priv->drm.irq); wake_up_bit(&error->flags, I915_RESET_IN_PROGRESS); return; -- cgit v1.2.3 From fa23b9d1b89fdc34f296f02e496a20aeff5736be Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 Mar 2017 13:54:12 +0100 Subject: irqchip/mvebu-odmi: Select GENERIC_MSI_IRQ_DOMAIN This driver uses the MSI domain but has no strict dependency on PCI_MSI, so we may run into a build failure when CONFIG_GENERIC_MSI_IRQ_DOMAIN is disabled: drivers/irqchip/irq-mvebu-odmi.c:152:15: error: variable 'odmi_msi_ops' has initializer but incomplete type static struct msi_domain_ops odmi_msi_ops = { ^~~~~~~~~~~~~~ drivers/irqchip/irq-mvebu-odmi.c:155:15: error: variable 'odmi_msi_domain_info' has initializer but incomplete type static struct msi_domain_info odmi_msi_domain_info = { ^~~~~~~~~~~~~~~ drivers/irqchip/irq-mvebu-odmi.c:156:3: error: 'struct msi_domain_info' has no member named 'flags' .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS), ^~~~~ drivers/irqchip/irq-mvebu-odmi.c:156:12: error: 'MSI_FLAG_USE_DEF_DOM_OPS' undeclared here (not in a function) .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS), ^~~~~~~~~~~~~~~~~~~~~~~~ drivers/irqchip/irq-mvebu-odmi.c:156:39: error: 'MSI_FLAG_USE_DEF_CHIP_OPS' undeclared here (not in a function); did you mean 'MSI_FLAG_USE_DEF_DOM_OPS'? Selecting the option from this driver seems to solve this nicely, though I could not find any other instance of this in irqchip drivers. Signed-off-by: Arnd Bergmann Acked-by: Thomas Petazzoni Signed-off-by: Marc Zyngier --- drivers/irqchip/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 125528f39e92..8162121bb1bc 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -262,6 +262,7 @@ config IRQ_MXS config MVEBU_ODMI bool + select GENERIC_MSI_IRQ_DOMAIN config MVEBU_PIC bool -- cgit v1.2.3 From 318465adace471387cfd5f768daa64bb4c623695 Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Mon, 13 Feb 2017 19:02:39 +0800 Subject: dt-bindings: rockchip-dw-mshc: rename RK1108 to RV1108 Rockchip finally named the SOC as RV1108, so change it. Signed-off-by: Andy Yan Acked-by: Rob Herring Reviewed-by: Heiko Stuebner Signed-off-by: Ulf Hansson --- Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt index ea9c1c9607f6..520d61dad6dd 100644 --- a/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt +++ b/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt @@ -13,7 +13,7 @@ Required Properties: - "rockchip,rk2928-dw-mshc": for Rockchip RK2928 and following, before RK3288 - "rockchip,rk3288-dw-mshc": for Rockchip RK3288 - - "rockchip,rk1108-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RK1108 + - "rockchip,rv1108-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RV1108 - "rockchip,rk3036-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RK3036 - "rockchip,rk3368-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RK3368 - "rockchip,rk3399-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RK3399 -- cgit v1.2.3 From 16681037e75ce08f2980ac5dbb03414429c7a55d Mon Sep 17 00:00:00 2001 From: Anssi Hannula Date: Mon, 13 Feb 2017 14:06:10 +0200 Subject: mmc: sdhci-of-arasan: fix incorrect timeout clock sdhci_arasan_get_timeout_clock() divides the frequency it has with (1 << (13 + divisor)). However, the divisor is not some Arasan-specific value, but instead is just the Data Timeout Counter Value from the SDHCI Timeout Control Register. Applying it here like this is wrong as the sdhci driver already takes that value into account when calculating timeouts, and in fact it *sets* that register value based on how long a timeout is wanted. Additionally, sdhci core interprets the .get_timeout_clock callback return value as if it were read from hardware registers, i.e. the unit should be kHz or MHz depending on SDHCI_TIMEOUT_CLK_UNIT capability bit. This bit is set at least on the tested Zynq-7000 SoC. With the tested hardware (SDHCI_TIMEOUT_CLK_UNIT set) this results in too high a timeout clock rate being reported, causing the core to use longer-than-needed timeouts. Additionally, on a partitioned MMC (therefore having erase_group_def bit set) mmc_calc_max_discard() disables discard support as it looks like controller does not support the long timeouts needed for that. Do not apply the extra divisor and return the timeout clock in the expected unit. Tested with a Zynq-7000 SoC and a partitioned Toshiba THGBMAG5A1JBAWR eMMC card. Signed-off-by: Anssi Hannula Fixes: e3ec3a3d11ad ("mmc: arasan: Add driver for Arasan SDHCI") Cc: Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-arasan.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c index 410a55b1c25f..1cfd7f900339 100644 --- a/drivers/mmc/host/sdhci-of-arasan.c +++ b/drivers/mmc/host/sdhci-of-arasan.c @@ -28,13 +28,9 @@ #include "sdhci-pltfm.h" #include -#define SDHCI_ARASAN_CLK_CTRL_OFFSET 0x2c #define SDHCI_ARASAN_VENDOR_REGISTER 0x78 #define VENDOR_ENHANCED_STROBE BIT(0) -#define CLK_CTRL_TIMEOUT_SHIFT 16 -#define CLK_CTRL_TIMEOUT_MASK (0xf << CLK_CTRL_TIMEOUT_SHIFT) -#define CLK_CTRL_TIMEOUT_MIN_EXP 13 #define PHY_CLK_TOO_SLOW_HZ 400000 @@ -163,15 +159,15 @@ static int sdhci_arasan_syscon_write(struct sdhci_host *host, static unsigned int sdhci_arasan_get_timeout_clock(struct sdhci_host *host) { - u32 div; unsigned long freq; struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); - div = readl(host->ioaddr + SDHCI_ARASAN_CLK_CTRL_OFFSET); - div = (div & CLK_CTRL_TIMEOUT_MASK) >> CLK_CTRL_TIMEOUT_SHIFT; + /* SDHCI timeout clock is in kHz */ + freq = DIV_ROUND_UP(clk_get_rate(pltfm_host->clk), 1000); - freq = clk_get_rate(pltfm_host->clk); - freq /= 1 << (CLK_CTRL_TIMEOUT_MIN_EXP + div); + /* or in MHz */ + if (host->caps & SDHCI_TIMEOUT_CLK_UNIT) + freq = DIV_ROUND_UP(freq, 1000); return freq; } -- cgit v1.2.3 From 9c31b087348cb2b5e668261f2eee2f224b3780b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 13 Feb 2017 19:58:18 +0200 Subject: drm/i915: Reject HDMI 12bpc if the sink doesn't indicate support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check that the sink really declared 12bpc support before we enable it. This should not actually never happen since it's mandatory for HDMI sinks to support 12bpc if they support any deep color modes. But reality disagrees with the theory and there are actually sinks in the wild that violate the spec. v2: Fix the output_types check Update commit message to state that these things are in fact real Cc: stable@vger.kernel.org Cc: Nicholas Sielicki Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99250 Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170213175818.24958-1-ville.syrjala@linux.intel.com Reviewed-by: Shashank Sharma (cherry picked from commit c750bdd3e7e204cc88b32806c3864487a03cd84b) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_hdmi.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index ebae2bd83918..24b2fa5b6282 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1298,16 +1298,34 @@ intel_hdmi_mode_valid(struct drm_connector *connector, static bool hdmi_12bpc_possible(struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc_state->base.crtc->dev; + struct drm_i915_private *dev_priv = + to_i915(crtc_state->base.crtc->dev); + struct drm_atomic_state *state = crtc_state->base.state; + struct drm_connector_state *connector_state; + struct drm_connector *connector; + int i; - if (HAS_GMCH_DISPLAY(to_i915(dev))) + if (HAS_GMCH_DISPLAY(dev_priv)) return false; /* * HDMI 12bpc affects the clocks, so it's only possible * when not cloning with other encoder types. */ - return crtc_state->output_types == 1 << INTEL_OUTPUT_HDMI; + if (crtc_state->output_types != 1 << INTEL_OUTPUT_HDMI) + return false; + + for_each_connector_in_state(state, connector, connector_state, i) { + const struct drm_display_info *info = &connector->display_info; + + if (connector_state->crtc != crtc_state->base.crtc) + continue; + + if ((info->edid_hdmi_dc_modes & DRM_EDID_HDMI_DC_36) == 0) + return false; + } + + return true; } bool intel_hdmi_compute_config(struct intel_encoder *encoder, -- cgit v1.2.3 From 773dc118756b1f38766063e90e582016be868f09 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 1 Mar 2017 14:11:47 -0800 Subject: mmc: core: Fix access to HS400-ES devices HS400-ES devices fail to initialize with the following error messages. mmc1: power class selection to bus width 8 ddr 0 failed mmc1: error -110 whilst initialising MMC card This was seen on Samsung Chromebook Plus. Code analysis points to commit 3d4ef329757c ("mmc: core: fix multi-bit bus width without high-speed mode"), which attempts to set the bus width for all but HS200 devices unconditionally. However, for HS400-ES, the bus width is already selected. Cc: Anssi Hannula Cc: Douglas Anderson Cc: Brian Norris Fixes: 3d4ef329757c ("mmc: core: fix multi-bit bus width ...") Signed-off-by: Guenter Roeck Reviewed-by: Douglas Anderson Reviewed-by: Shawn Lin Tested-by: Heiko Stuebner Cc: Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 7fd722868875..b502601df228 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1730,7 +1730,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, err = mmc_select_hs400(card); if (err) goto free_card; - } else { + } else if (!mmc_card_hs400es(card)) { /* Select the desired bus width optionally */ err = mmc_select_bus_width(card); if (err > 0 && mmc_card_hs(card)) { -- cgit v1.2.3 From 2602b740e45cc64feb55d5a9ee8db744ab3becbb Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 13 Mar 2017 14:36:32 +0200 Subject: mmc: block: Fix is_waiting_last_req set incorrectly Commit 15520111500c ("mmc: core: Further fix thread wake-up") allowed a queue to release the host with is_waiting_last_req set to true. A queue waiting to claim the host will not reset it, which can result in the queue getting stuck in a loop. Fixes: 15520111500c ("mmc: core: Further fix thread wake-up") Signed-off-by: Adrian Hunter Cc: stable@vger.kernel.org # v4.10+ Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 1621fa08e206..e59107ca512a 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1817,6 +1817,7 @@ void mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) mmc_blk_issue_flush(mq, req); } else { mmc_blk_issue_rw_rq(mq, req); + card->host->context_info.is_waiting_last_req = false; } out: -- cgit v1.2.3 From 8ecc34448e24e9e8a8f3a9b37be70b43c6af5288 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 13 Mar 2017 14:36:33 +0200 Subject: mmc: block: Fix cmd error reset failure path Commit 4e1f780032c5 ("mmc: block: break out mmc_blk_rw_cmd_abort()") assumed the request had not completed, but in one case it had. Fix that. Fixes: 4e1f780032c5 ("mmc: block: break out mmc_blk_rw_cmd_abort()") Signed-off-by: Adrian Hunter Reviewed-by: Linus Walleij Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index e59107ca512a..05afefcfb611 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1701,7 +1701,8 @@ static void mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *new_req) case MMC_BLK_CMD_ERR: req_pending = mmc_blk_rw_cmd_err(md, card, brq, old_req, req_pending); if (mmc_blk_reset(md, card->host, type)) { - mmc_blk_rw_cmd_abort(card, old_req); + if (req_pending) + mmc_blk_rw_cmd_abort(card, old_req); mmc_blk_rw_try_restart(mq, new_req); return; } -- cgit v1.2.3 From 0977762f6d15f13caccc20d71a5dec47d098907d Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 13 Mar 2017 13:44:35 -0700 Subject: md/r5cache: fix set_syndrome_sources() for data in cache Before this patch, device InJournal will be included in prexor (SYNDROME_SRC_WANT_DRAIN) but not in reconstruct (SYNDROME_SRC_WRITTEN). So it will break parity calculation. With srctype == SYNDROME_SRC_WRITTEN, we need include both dev with non-null ->written and dev with R5_InJournal. This fixes logic in 1e6d690(md/r5cache: caching phase of r5cache) Cc: stable@vger.kernel.org (v4.10+) Signed-off-by: Song Liu Signed-off-by: Shaohua Li --- drivers/md/raid5.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 6bfedfcf41c1..ed5cd705b985 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1401,7 +1401,8 @@ static int set_syndrome_sources(struct page **srcs, (test_bit(R5_Wantdrain, &dev->flags) || test_bit(R5_InJournal, &dev->flags))) || (srctype == SYNDROME_SRC_WRITTEN && - dev->written)) { + (dev->written || + test_bit(R5_InJournal, &dev->flags)))) { if (test_bit(R5_InJournal, &dev->flags)) srcs[slot] = sh->dev[i].orig_page; else -- cgit v1.2.3 From 8c53ad2139137dd4bf506a2c2b888de3816e8f75 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Mon, 13 Mar 2017 15:14:08 +0800 Subject: drm/amd/powerplay: fix copy error in smu7_clockpoweragting.c Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c index 8cf71f3c6d0e..261b828ad590 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c @@ -178,7 +178,7 @@ int smu7_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate) if (bgate) { cgs_set_powergating_state(hwmgr->device, AMD_IP_BLOCK_TYPE_VCE, - AMD_PG_STATE_UNGATE); + AMD_PG_STATE_GATE); cgs_set_clockgating_state(hwmgr->device, AMD_IP_BLOCK_TYPE_VCE, AMD_CG_STATE_GATE); -- cgit v1.2.3 From 11353b9d10392e79e32603d2178e75feb25eaf0d Mon Sep 17 00:00:00 2001 From: Zhilong Liu Date: Tue, 14 Mar 2017 15:52:26 +0800 Subject: md/raid1: fix a trivial typo in comments raid1.c: fix a trivial typo in comments of freeze_array(). Cc: Jack Wang Cc: Guoqing Jiang Cc: John Stoffel Acked-by: Coly Li Signed-off-by: Zhilong Liu Signed-off-by: Shaohua Li --- drivers/md/raid1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index c33e96e33b8e..a34f58772022 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1027,7 +1027,7 @@ static int get_unqueued_pending(struct r1conf *conf) static void freeze_array(struct r1conf *conf, int extra) { /* Stop sync I/O and normal I/O and wait for everything to - * go quite. + * go quiet. * This is called in two situations: * 1) management command handlers (reshape, remove disk, quiesce). * 2) one normal I/O request failed. -- cgit v1.2.3 From dc434e056fe1dada20df7ba07f32739d3a701adf Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 14 Mar 2017 16:06:45 +0100 Subject: cpu/hotplug: Serialize callback invocations proper The setup/remove_state/instance() functions in the hotplug core code are serialized against concurrent CPU hotplug, but unfortunately not serialized against themself. As a consequence a concurrent invocation of these function results in corruption of the callback machinery because two instances try to invoke callbacks on remote cpus at the same time. This results in missing callback invocations and initiator threads waiting forever on the completion. The obvious solution to replace get_cpu_online() with cpu_hotplug_begin() is not possible because at least one callsite calls into these functions from a get_online_cpu() locked region. Extend the protection scope of the cpuhp_state_mutex from solely protecting the state arrays to cover the callback invocation machinery as well. Fixes: 5b7aa87e0482 ("cpu/hotplug: Implement setup/removal interface") Reported-and-tested-by: Bart Van Assche Signed-off-by: Sebastian Andrzej Siewior Cc: hpa@zytor.com Cc: mingo@kernel.org Cc: akpm@linux-foundation.org Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/20170314150645.g4tdyoszlcbajmna@linutronix.de Signed-off-by: Thomas Gleixner --- kernel/cpu.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index f7c063239fa5..37b223e4fc05 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1335,26 +1335,21 @@ static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name, struct cpuhp_step *sp; int ret = 0; - mutex_lock(&cpuhp_state_mutex); - if (state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN) { ret = cpuhp_reserve_state(state); if (ret < 0) - goto out; + return ret; state = ret; } sp = cpuhp_get_step(state); - if (name && sp->name) { - ret = -EBUSY; - goto out; - } + if (name && sp->name) + return -EBUSY; + sp->startup.single = startup; sp->teardown.single = teardown; sp->name = name; sp->multi_instance = multi_instance; INIT_HLIST_HEAD(&sp->list); -out: - mutex_unlock(&cpuhp_state_mutex); return ret; } @@ -1428,6 +1423,7 @@ int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node, return -EINVAL; get_online_cpus(); + mutex_lock(&cpuhp_state_mutex); if (!invoke || !sp->startup.multi) goto add_node; @@ -1447,16 +1443,14 @@ int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node, if (ret) { if (sp->teardown.multi) cpuhp_rollback_install(cpu, state, node); - goto err; + goto unlock; } } add_node: ret = 0; - mutex_lock(&cpuhp_state_mutex); hlist_add_head(node, &sp->list); +unlock: mutex_unlock(&cpuhp_state_mutex); - -err: put_online_cpus(); return ret; } @@ -1491,6 +1485,7 @@ int __cpuhp_setup_state(enum cpuhp_state state, return -EINVAL; get_online_cpus(); + mutex_lock(&cpuhp_state_mutex); ret = cpuhp_store_callbacks(state, name, startup, teardown, multi_instance); @@ -1524,6 +1519,7 @@ int __cpuhp_setup_state(enum cpuhp_state state, } } out: + mutex_unlock(&cpuhp_state_mutex); put_online_cpus(); /* * If the requested state is CPUHP_AP_ONLINE_DYN, return the @@ -1547,6 +1543,8 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state, return -EINVAL; get_online_cpus(); + mutex_lock(&cpuhp_state_mutex); + if (!invoke || !cpuhp_get_teardown_cb(state)) goto remove; /* @@ -1563,7 +1561,6 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state, } remove: - mutex_lock(&cpuhp_state_mutex); hlist_del(node); mutex_unlock(&cpuhp_state_mutex); put_online_cpus(); @@ -1571,6 +1568,7 @@ remove: return 0; } EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance); + /** * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state * @state: The state to remove @@ -1589,6 +1587,7 @@ void __cpuhp_remove_state(enum cpuhp_state state, bool invoke) get_online_cpus(); + mutex_lock(&cpuhp_state_mutex); if (sp->multi_instance) { WARN(!hlist_empty(&sp->list), "Error: Removing state %d which has instances left.\n", @@ -1613,6 +1612,7 @@ void __cpuhp_remove_state(enum cpuhp_state state, bool invoke) } remove: cpuhp_store_callbacks(state, NULL, NULL, NULL, false); + mutex_unlock(&cpuhp_state_mutex); put_online_cpus(); } EXPORT_SYMBOL(__cpuhp_remove_state); -- cgit v1.2.3 From d434936e4cbb10181463622962f30b989d3e9e19 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 Mar 2017 13:12:53 +0100 Subject: mm, x86: Fix native_pud_clear build error We still get a build error in random configurations, after this has been modified a few times: In file included from include/linux/mm.h:68:0, from include/linux/suspend.h:8, from arch/x86/kernel/asm-offsets.c:12: arch/x86/include/asm/pgtable.h:66:26: error: redefinition of 'native_pud_clear' #define pud_clear(pud) native_pud_clear(pud) My interpretation is that the build error comes from a typo in __PAGETABLE_PUD_FOLDED, so fix that typo now, and remove the incorrect #ifdef around the native_pud_clear definition. Fixes: 3e761a42e19c ("mm, x86: fix HIGHMEM64 && PARAVIRT build config for native_pud_clear()") Fixes: a00cc7d9dd93 ("mm, x86: add support for PUD-sized transparent hugepages") Signed-off-by: Arnd Bergmann Acked-by: Dave Jiang Cc: Kees Cook Cc: Dave Hansen Cc: Hugh Dickins Cc: Andrew Morton Cc: Borislav Petkov Cc: Thomas Garnier Link: http://lkml.kernel.org/r/20170314121330.182155-1-arnd@arndb.de Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/pgtable-3level.h | 3 --- arch/x86/include/asm/pgtable.h | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 72277b1028a5..50d35e3185f5 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -121,12 +121,9 @@ static inline void native_pmd_clear(pmd_t *pmd) *(tmp + 1) = 0; } -#if !defined(CONFIG_SMP) || (defined(CONFIG_HIGHMEM64G) && \ - defined(CONFIG_PARAVIRT)) static inline void native_pud_clear(pud_t *pudp) { } -#endif static inline void pud_clear(pud_t *pudp) { diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 1cfb36b8c024..585ee0d42d18 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -62,7 +62,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page); # define set_pud(pudp, pud) native_set_pud(pudp, pud) #endif -#ifndef __PAGETABLE_PMD_FOLDED +#ifndef __PAGETABLE_PUD_FOLDED #define pud_clear(pud) native_pud_clear(pud) #endif -- cgit v1.2.3 From 87a6b2975f0d340c75b7488d22d61d2f98fb8abf Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 13 Mar 2017 23:27:47 -0500 Subject: x86/unwind: Fix last frame check for aligned function stacks Pavel Machek reported the following warning on x86-32: WARNING: kernel stack frame pointer at f50cdf98 in swapper/2:0 has bad value (null) The warning is caused by the unwinder not realizing that it reached the end of the stack, due to an unusual prologue which gcc sometimes generates for aligned stacks. The prologue is based on a gcc feature called the Dynamic Realign Argument Pointer (DRAP). It's almost always enabled for aligned stacks when -maccumulate-outgoing-args isn't set. This issue is similar to the one fixed by the following commit: 8023e0e2a48d ("x86/unwind: Adjust last frame check for aligned function stacks") ... but that fix was specific to x86-64. Make the fix more generic to cover x86-32 as well, and also ensure that the return address referred to by the frame pointer is a copy of the original return address. Fixes: acb4608ad186 ("x86/unwind: Create stack frames for saved syscall registers") Reported-by: Pavel Machek Signed-off-by: Josh Poimboeuf Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/50d4924db716c264b14f1633037385ec80bf89d2.1489465609.git.jpoimboe@redhat.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/unwind_frame.c | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 478d15dbaee4..08339262b666 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -82,19 +82,43 @@ static size_t regs_size(struct pt_regs *regs) return sizeof(*regs); } +#ifdef CONFIG_X86_32 +#define GCC_REALIGN_WORDS 3 +#else +#define GCC_REALIGN_WORDS 1 +#endif + static bool is_last_task_frame(struct unwind_state *state) { - unsigned long bp = (unsigned long)state->bp; - unsigned long regs = (unsigned long)task_pt_regs(state->task); + unsigned long *last_bp = (unsigned long *)task_pt_regs(state->task) - 2; + unsigned long *aligned_bp = last_bp - GCC_REALIGN_WORDS; /* * We have to check for the last task frame at two different locations * because gcc can occasionally decide to realign the stack pointer and - * change the offset of the stack frame by a word in the prologue of a - * function called by head/entry code. + * change the offset of the stack frame in the prologue of a function + * called by head/entry code. Examples: + * + * : + * push %edi + * lea 0x8(%esp),%edi + * and $0xfffffff8,%esp + * pushl -0x4(%edi) + * push %ebp + * mov %esp,%ebp + * + * : + * lea 0x8(%rsp),%r10 + * and $0xfffffffffffffff0,%rsp + * pushq -0x8(%r10) + * push %rbp + * mov %rsp,%rbp + * + * Note that after aligning the stack, it pushes a duplicate copy of + * the return address before pushing the frame pointer. */ - return bp == regs - FRAME_HEADER_SIZE || - bp == regs - FRAME_HEADER_SIZE - sizeof(long); + return (state->bp == last_bp || + (state->bp == aligned_bp && *(aligned_bp+1) == *(last_bp+1))); } /* -- cgit v1.2.3 From 49ec8f5b6ae3ab60385492cad900ffc8a523c895 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 14 Mar 2017 15:20:53 +0100 Subject: x86/intel_rdt: Put group node in rdtgroup_kn_unlock The rdtgroup_kn_unlock waits for the last user to release and put its node. But it's calling kernfs_put on the node which calls the rdtgroup_kn_unlock, which might not be the group's directory node, but another group's file node. This race could be easily reproduced by running 2 instances of following script: mount -t resctrl resctrl /sys/fs/resctrl/ pushd /sys/fs/resctrl/ mkdir krava echo "krava" > krava/schemata rmdir krava popd umount /sys/fs/resctrl It triggers the slub debug error message with following command line config: slub_debug=,kernfs_node_cache. Call kernfs_put on the group's node to fix it. Fixes: 60cf5e101fd4 ("x86/intel_rdt: Add mkdir to resctrl file system") Signed-off-by: Jiri Olsa Cc: Fenghua Yu Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Shaohua Li Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1489501253-20248-1-git-send-email-jolsa@kernel.org Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index c05509d38b1f..9ac2a5cdd9c2 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -727,7 +727,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn) if (atomic_dec_and_test(&rdtgrp->waitcount) && (rdtgrp->flags & RDT_DELETED)) { kernfs_unbreak_active_protection(kn); - kernfs_put(kn); + kernfs_put(rdtgrp->kn); kfree(rdtgrp); } else { kernfs_unbreak_active_protection(kn); -- cgit v1.2.3 From 655d9ca9ac075da1ef2a45012ba48a39f6eb1f58 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 Mar 2017 22:27:11 +0100 Subject: drm: amd: remove broken include path The AMD ACP driver adds "-I../acp -I../acp/include" to the gcc command line, which makes no sense, since these are evaluated relative to the build directory. When we build with "make W=1", they instead cause a warning: cc1: error: ../acp/: No such file or directory [-Werror=missing-include-dirs] cc1: error: ../acp/include: No such file or directory [-Werror=missing-include-dirs] cc1: all warnings being treated as errors ../scripts/Makefile.build:289: recipe for target 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.o' failed ../scripts/Makefile.build:289: recipe for target 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.o' failed ../scripts/Makefile.build:289: recipe for target 'drivers/gpu/drm/amd/amdgpu/amdgpu_kms.o' failed This removes the subdir-ccflags variable that evidently did not serve any purpose here. Signed-off-by: Arnd Bergmann Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/acp/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/acp/Makefile b/drivers/gpu/drm/amd/acp/Makefile index 8363cb57915b..8a08e81ee90d 100644 --- a/drivers/gpu/drm/amd/acp/Makefile +++ b/drivers/gpu/drm/amd/acp/Makefile @@ -3,6 +3,4 @@ # of AMDSOC/AMDGPU drm driver. # It provides the HW control for ACP related functionalities. -subdir-ccflags-y += -I$(AMDACPPATH)/ -I$(AMDACPPATH)/include - AMD_ACP_FILES := $(AMDACPPATH)/acp_hw.o -- cgit v1.2.3 From 630a04e79dd41ff746b545d4fc052e0abb836120 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 15 Mar 2017 00:24:25 -0700 Subject: xfs: verify inline directory data forks When we're reading or writing the data fork of an inline directory, check the contents to make sure we're not overflowing buffers or eating garbage data. xfs/348 corrupts an inline symlink into an inline directory, triggering a buffer overflow bug. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- v2: add more checks consistent with _dir2_sf_check and make the verifier usable from anywhere. --- fs/xfs/libxfs/xfs_dir2_priv.h | 2 + fs/xfs/libxfs/xfs_dir2_sf.c | 87 ++++++++++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_inode_fork.c | 26 +++++++++++-- fs/xfs/libxfs/xfs_inode_fork.h | 2 +- fs/xfs/xfs_dir2_readdir.c | 11 ------ fs/xfs/xfs_inode.c | 12 ++++-- 6 files changed, 122 insertions(+), 18 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h index d04547fcf274..eb00bc133bca 100644 --- a/fs/xfs/libxfs/xfs_dir2_priv.h +++ b/fs/xfs/libxfs/xfs_dir2_priv.h @@ -125,6 +125,8 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); extern int xfs_dir2_sf_removename(struct xfs_da_args *args); extern int xfs_dir2_sf_replace(struct xfs_da_args *args); +extern int xfs_dir2_sf_verify(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *sfp, + int size); /* xfs_dir2_readdir.c */ extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx, diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index c6809ff41197..96b45cd6c63f 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -629,6 +629,93 @@ xfs_dir2_sf_check( } #endif /* DEBUG */ +/* Verify the consistency of an inline directory. */ +int +xfs_dir2_sf_verify( + struct xfs_mount *mp, + struct xfs_dir2_sf_hdr *sfp, + int size) +{ + struct xfs_dir2_sf_entry *sfep; + struct xfs_dir2_sf_entry *next_sfep; + char *endp; + const struct xfs_dir_ops *dops; + xfs_ino_t ino; + int i; + int i8count; + int offset; + __uint8_t filetype; + + dops = xfs_dir_get_ops(mp, NULL); + + /* + * Give up if the directory is way too short. + */ + XFS_WANT_CORRUPTED_RETURN(mp, size > + offsetof(struct xfs_dir2_sf_hdr, parent)); + XFS_WANT_CORRUPTED_RETURN(mp, size >= + xfs_dir2_sf_hdr_size(sfp->i8count)); + + endp = (char *)sfp + size; + + /* Check .. entry */ + ino = dops->sf_get_parent_ino(sfp); + i8count = ino > XFS_DIR2_MAX_SHORT_INUM; + XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino)); + offset = dops->data_first_offset; + + /* Check all reported entries */ + sfep = xfs_dir2_sf_firstentry(sfp); + for (i = 0; i < sfp->count; i++) { + /* + * struct xfs_dir2_sf_entry has a variable length. + * Check the fixed-offset parts of the structure are + * within the data buffer. + */ + XFS_WANT_CORRUPTED_RETURN(mp, + ((char *)sfep + sizeof(*sfep)) < endp); + + /* Don't allow names with known bad length. */ + XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen > 0); + XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen < MAXNAMELEN); + + /* + * Check that the variable-length part of the structure is + * within the data buffer. The next entry starts after the + * name component, so nextentry is an acceptable test. + */ + next_sfep = dops->sf_nextentry(sfp, sfep); + XFS_WANT_CORRUPTED_RETURN(mp, endp >= (char *)next_sfep); + + /* Check that the offsets always increase. */ + XFS_WANT_CORRUPTED_RETURN(mp, + xfs_dir2_sf_get_offset(sfep) >= offset); + + /* Check the inode number. */ + ino = dops->sf_get_ino(sfp, sfep); + i8count += ino > XFS_DIR2_MAX_SHORT_INUM; + XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino)); + + /* Check the file type. */ + filetype = dops->sf_get_ftype(sfep); + XFS_WANT_CORRUPTED_RETURN(mp, filetype < XFS_DIR3_FT_MAX); + + offset = xfs_dir2_sf_get_offset(sfep) + + dops->data_entsize(sfep->namelen); + + sfep = next_sfep; + } + XFS_WANT_CORRUPTED_RETURN(mp, i8count == sfp->i8count); + XFS_WANT_CORRUPTED_RETURN(mp, (void *)sfep == (void *)endp); + + /* Make sure this whole thing ought to be in local format. */ + XFS_WANT_CORRUPTED_RETURN(mp, offset + + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + + (uint)sizeof(xfs_dir2_block_tail_t) <= mp->m_dir_geo->blksize); + + return 0; +} + /* * Create a new (shortform) directory. */ diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 25c1e078aef6..9653e964eda4 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -33,6 +33,8 @@ #include "xfs_trace.h" #include "xfs_attr_sf.h" #include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_dir2_priv.h" kmem_zone_t *xfs_ifork_zone; @@ -320,6 +322,7 @@ xfs_iformat_local( int whichfork, int size) { + int error; /* * If the size is unreasonable, then something @@ -336,6 +339,14 @@ xfs_iformat_local( return -EFSCORRUPTED; } + if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) { + error = xfs_dir2_sf_verify(ip->i_mount, + (struct xfs_dir2_sf_hdr *)XFS_DFORK_DPTR(dip), + size); + if (error) + return error; + } + xfs_init_local_fork(ip, whichfork, XFS_DFORK_PTR(dip, whichfork), size); return 0; } @@ -856,7 +867,7 @@ xfs_iextents_copy( * In these cases, the format always takes precedence, because the * format indicates the current state of the fork. */ -void +int xfs_iflush_fork( xfs_inode_t *ip, xfs_dinode_t *dip, @@ -866,6 +877,7 @@ xfs_iflush_fork( char *cp; xfs_ifork_t *ifp; xfs_mount_t *mp; + int error; static const short brootflag[2] = { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; static const short dataflag[2] = @@ -874,7 +886,7 @@ xfs_iflush_fork( { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; if (!iip) - return; + return 0; ifp = XFS_IFORK_PTR(ip, whichfork); /* * This can happen if we gave up in iformat in an error path, @@ -882,12 +894,19 @@ xfs_iflush_fork( */ if (!ifp) { ASSERT(whichfork == XFS_ATTR_FORK); - return; + return 0; } cp = XFS_DFORK_PTR(dip, whichfork); mp = ip->i_mount; switch (XFS_IFORK_FORMAT(ip, whichfork)) { case XFS_DINODE_FMT_LOCAL: + if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) { + error = xfs_dir2_sf_verify(mp, + (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data, + ifp->if_bytes); + if (error) + return error; + } if ((iip->ili_fields & dataflag[whichfork]) && (ifp->if_bytes > 0)) { ASSERT(ifp->if_u1.if_data != NULL); @@ -940,6 +959,7 @@ xfs_iflush_fork( ASSERT(0); break; } + return 0; } /* diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index 7fb8365326d1..132dc59fdde6 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -140,7 +140,7 @@ typedef struct xfs_ifork { struct xfs_ifork *xfs_iext_state_to_fork(struct xfs_inode *ip, int state); int xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *); -void xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *, +int xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *, struct xfs_inode_log_item *, int); void xfs_idestroy_fork(struct xfs_inode *, int); void xfs_idata_realloc(struct xfs_inode *, int, int); diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index 003a99b83bd8..ad9396e516f6 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -71,22 +71,11 @@ xfs_dir2_sf_getdents( struct xfs_da_geometry *geo = args->geo; ASSERT(dp->i_df.if_flags & XFS_IFINLINE); - /* - * Give up if the directory is way too short. - */ - if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { - ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); - return -EIO; - } - ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - if (dp->i_d.di_size < xfs_dir2_sf_hdr_size(sfp->i8count)) - return -EFSCORRUPTED; - /* * If the block number in the offset is out of range, we're done. */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 7eaf1ef74e3c..c7fe2c2123ab 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3475,6 +3475,7 @@ xfs_iflush_int( struct xfs_inode_log_item *iip = ip->i_itemp; struct xfs_dinode *dip; struct xfs_mount *mp = ip->i_mount; + int error; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); ASSERT(xfs_isiflocked(ip)); @@ -3557,9 +3558,14 @@ xfs_iflush_int( if (ip->i_d.di_flushiter == DI_MAX_FLUSH) ip->i_d.di_flushiter = 0; - xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK); - if (XFS_IFORK_Q(ip)) - xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK); + error = xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK); + if (error) + return error; + if (XFS_IFORK_Q(ip)) { + error = xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK); + if (error) + return error; + } xfs_inobp_check(mp, bp); /* -- cgit v1.2.3 From e67351d56a709853046fbe652b981fb7ca4c3dcc Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Wed, 15 Mar 2017 11:57:47 +0200 Subject: drm/i915/glk: Remove MODULE_FIRMWARE() tag from Geminilake's DMC Geminilake's DMC is not yet available in the linux-firmware repository. To prevent userspace tools such as mkinitramfs to complain about missing firmware, remove the MODULE_FIRMWARE() tag for now. Fixes: dbb28b5c3d3c ("drm/i915/DMC/GLK: Load DMC on GLK") Cc: Rodrigo Vivi Cc: Anusha Srivatsa Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: Signed-off-by: Ander Conselvan de Oliveira Acked-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170306085651.14008-1-ander.conselvan.de.oliveira@intel.com Link: http://patchwork.freedesktop.org/patch/msgid/20170315095747.21845-1-ander.conselvan.de.oliveira@intel.com (cherry picked from commit d9321a03efcda867b3a8c6327e01808516f0acd7) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_csr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 0085bc745f6a..de219b71fb76 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -35,7 +35,6 @@ */ #define I915_CSR_GLK "i915/glk_dmc_ver1_01.bin" -MODULE_FIRMWARE(I915_CSR_GLK); #define GLK_CSR_VERSION_REQUIRED CSR_VERSION(1, 1) #define I915_CSR_KBL "i915/kbl_dmc_ver1_01.bin" -- cgit v1.2.3 From e609ccef5222c73b46b322be7d3796d60bff353d Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Fri, 24 Feb 2017 16:04:15 +0200 Subject: intel_th: Don't leak module refcount on failure to activate Output 'activation' may fail for the reasons of the output driver, for example, if msc's buffer is not allocated. We forget, however, to drop the module reference in this case. So each attempt at activation in this case leaks a reference, preventing the module from ever unloading. This patch adds the missing module_put() in the activation error path. Signed-off-by: Alexander Shishkin Cc: stable@vger.kernel.org # v4.8+ --- drivers/hwtracing/intel_th/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hwtracing/intel_th/core.c b/drivers/hwtracing/intel_th/core.c index cdd9b3b26195..7563eceeaaea 100644 --- a/drivers/hwtracing/intel_th/core.c +++ b/drivers/hwtracing/intel_th/core.c @@ -221,8 +221,10 @@ static int intel_th_output_activate(struct intel_th_device *thdev) else intel_th_trace_enable(thdev); - if (ret) + if (ret) { pm_runtime_put(&thdev->dev); + module_put(thdrv->driver.owner); + } return ret; } -- cgit v1.2.3 From 5118ccd34780f4637a9360be580f41f4c1feab48 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 8 Sep 2015 14:03:55 +0300 Subject: intel_th: pci: Add Denverton SOC support This adds Intel(R) Trace Hub PCI ID for Denverton SOC. Signed-off-by: Alexander Shishkin --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 0bba3842336e..04bd57b0100a 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -85,6 +85,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa2a6), .driver_data = (kernel_ulong_t)0, }, + { + /* Denverton */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x19e1), + .driver_data = (kernel_ulong_t)0, + }, { 0 }, }; -- cgit v1.2.3 From 340837f985c2cb87ca0868d4aa9ce42b0fab3a21 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 30 Jun 2016 16:10:51 +0300 Subject: intel_th: pci: Add Gemini Lake support This adds Intel(R) Trace Hub PCI ID for Gemini Lake SOC. Signed-off-by: Alexander Shishkin --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 04bd57b0100a..590cf90dd21a 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -90,6 +90,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x19e1), .driver_data = (kernel_ulong_t)0, }, + { + /* Gemini Lake */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x318e), + .driver_data = (kernel_ulong_t)0, + }, { 0 }, }; -- cgit v1.2.3 From 8af42949d1681379c1a97d230de9242c1f4f326a Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Mon, 13 Mar 2017 07:41:55 +0900 Subject: openrisc: xchg: fix `computed is not used` warning When building allmodconfig this warning shows. fs/ocfs2/file.c: In function 'ocfs2_file_write_iter': ./arch/openrisc/include/asm/cmpxchg.h:81:3: warning: value computed is not used [-Wunused-value] ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), sizeof(*(ptr)))) ^ Applying the same patch logic that was done to the cmpxchg macro. Signed-off-by: Stafford Horne --- arch/openrisc/include/asm/cmpxchg.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/openrisc/include/asm/cmpxchg.h b/arch/openrisc/include/asm/cmpxchg.h index 5fcb9ac72693..f0a5d8b844d6 100644 --- a/arch/openrisc/include/asm/cmpxchg.h +++ b/arch/openrisc/include/asm/cmpxchg.h @@ -77,7 +77,11 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr, return val; } -#define xchg(ptr, with) \ - ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), sizeof(*(ptr)))) +#define xchg(ptr, with) \ + ({ \ + (__typeof__(*(ptr))) __xchg((unsigned long)(with), \ + (ptr), \ + sizeof(*(ptr))); \ + }) #endif /* __ASM_OPENRISC_CMPXCHG_H */ -- cgit v1.2.3 From 154e67cd8e8f964809d0e75e44bb121b169c75b3 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Mon, 13 Mar 2017 07:44:45 +0900 Subject: openrisc: fix issue handling 8 byte get_user calls Was getting the following error with allmodconfig: ERROR: "__get_user_bad" [lib/test_user_copy.ko] undefined! This was simply a missing break statement, causing an unwanted fall through. Signed-off-by: Stafford Horne --- arch/openrisc/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h index 140faa16685a..1311e6b13991 100644 --- a/arch/openrisc/include/asm/uaccess.h +++ b/arch/openrisc/include/asm/uaccess.h @@ -211,7 +211,7 @@ do { \ case 1: __get_user_asm(x, ptr, retval, "l.lbz"); break; \ case 2: __get_user_asm(x, ptr, retval, "l.lhz"); break; \ case 4: __get_user_asm(x, ptr, retval, "l.lwz"); break; \ - case 8: __get_user_asm2(x, ptr, retval); \ + case 8: __get_user_asm2(x, ptr, retval); break; \ default: (x) = __get_user_bad(); \ } \ } while (0) -- cgit v1.2.3 From 363dad58e4a0f72dce0bf12d361d617239a80317 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Tue, 14 Mar 2017 22:52:49 +0900 Subject: openrisc: Export symbols needed by modules This was detected by allmodconfig, errors reported: ERROR: "empty_zero_page" [net/ceph/libceph.ko] undefined! ERROR: "__ucmpdi2" [lib/842/842_decompress.ko] undefined! ERROR: "empty_zero_page" [fs/nfs/objlayout/objlayoutdriver.ko] undefined! ERROR: "empty_zero_page" [fs/exofs/exofs.ko] undefined! ERROR: "empty_zero_page" [fs/crypto/fscrypto.ko] undefined! ERROR: "__ucmpdi2" [fs/btrfs/btrfs.ko] undefined! ERROR: "pm_power_off" [drivers/regulator/act8865-regulator.ko] undefined! ERROR: "__ucmpdi2" [drivers/media/i2c/adv7842.ko] undefined! ERROR: "__clear_user" [drivers/md/dm-mod.ko] undefined! ERROR: "__clear_user" [net/netfilter/x_tables.ko] undefined! Signed-off-by: Stafford Horne --- arch/openrisc/kernel/or32_ksyms.c | 4 ++++ arch/openrisc/kernel/process.c | 1 + 2 files changed, 5 insertions(+) diff --git a/arch/openrisc/kernel/or32_ksyms.c b/arch/openrisc/kernel/or32_ksyms.c index 5c4695d13542..ee3e604959e1 100644 --- a/arch/openrisc/kernel/or32_ksyms.c +++ b/arch/openrisc/kernel/or32_ksyms.c @@ -30,6 +30,7 @@ #include #include #include +#include #define DECLARE_EXPORT(name) extern void name(void); EXPORT_SYMBOL(name) @@ -42,6 +43,9 @@ DECLARE_EXPORT(__muldi3); DECLARE_EXPORT(__ashrdi3); DECLARE_EXPORT(__ashldi3); DECLARE_EXPORT(__lshrdi3); +DECLARE_EXPORT(__ucmpdi2); +EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(__copy_tofrom_user); +EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(memset); diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index 828a29110459..f8da545854f9 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -90,6 +90,7 @@ void arch_cpu_idle(void) } void (*pm_power_off) (void) = machine_power_off; +EXPORT_SYMBOL(pm_power_off); /* * When a process does an "exec", machine state like FPU and debug -- cgit v1.2.3 From e4c204ced0ac25e02e58679f07096c5bac0b0d96 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 14 Mar 2017 16:18:34 +0100 Subject: cpufreq: intel_pstate: Avoid percentages in limits-related computations Currently, intel_pstate_update_perf_limits() first converts the policy minimum and maximum limits into percentages of the maximum turbo frequency (rounding up to an integer) and then converts these percentages to fractions (by using fixed-point arithmetic to divide them by 100). That introduces a rounding error unnecessarily, because the fractions can be obtained by carrying out fixed-point divisions directly on the input numbers. Rework the computations in intel_pstate_hwp_set() to use fractions instead of percentages (and drop redundant local variables from there) and modify intel_pstate_update_perf_limits() to compute the fractions directly and percentages out of them. While at it, introduce percent_ext_fp() for converting percentages to fractions (with extended number of fraction bits) and use it in the computations. Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 56 +++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index ee12641ee010..08e134ffba68 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -84,6 +84,11 @@ static inline u64 div_ext_fp(u64 x, u64 y) return div64_u64(x << EXT_FRAC_BITS, y); } +static inline int32_t percent_ext_fp(int percent) +{ + return div_ext_fp(percent, 100); +} + /** * struct sample - Store performance sample * @core_avg_perf: Ratio of APERF/MPERF which is the actual average @@ -850,7 +855,6 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy) u64 value, cap; for_each_cpu(cpu, policy->cpus) { - int max_perf_pct, min_perf_pct; struct cpudata *cpu_data = all_cpu_data[cpu]; s16 epp; @@ -864,16 +868,14 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy) else hw_max = HWP_HIGHEST_PERF(cap); - max_perf_pct = perf_limits->max_perf_pct; - min_perf_pct = perf_limits->min_perf_pct; - min = hw_max * min_perf_pct / 100; + min = fp_ext_toint(hw_max * perf_limits->min_perf); rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); value &= ~HWP_MIN_PERF(~0L); value |= HWP_MIN_PERF(min); - max = hw_max * max_perf_pct / 100; + max = fp_ext_toint(hw_max * perf_limits->max_perf); value &= ~HWP_MAX_PERF(~0L); value |= HWP_MAX_PERF(max); @@ -1223,7 +1225,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, limits->max_perf_pct); limits->max_perf_pct = max(limits->min_perf_pct, limits->max_perf_pct); - limits->max_perf = div_ext_fp(limits->max_perf_pct, 100); + limits->max_perf = percent_ext_fp(limits->max_perf_pct); intel_pstate_update_policies(); @@ -1260,7 +1262,7 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, limits->min_perf_pct); limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct); - limits->min_perf = div_ext_fp(limits->min_perf_pct, 100); + limits->min_perf = percent_ext_fp(limits->min_perf_pct); intel_pstate_update_policies(); @@ -2078,36 +2080,34 @@ static void intel_pstate_clear_update_util_hook(unsigned int cpu) static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, struct perf_limits *limits) { + int32_t max_policy_perf, min_policy_perf; - limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100, - policy->cpuinfo.max_freq); - limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0, 100); + max_policy_perf = div_ext_fp(policy->max, policy->cpuinfo.max_freq); + max_policy_perf = clamp_t(int32_t, max_policy_perf, 0, int_ext_tofp(1)); if (policy->max == policy->min) { - limits->min_policy_pct = limits->max_policy_pct; + min_policy_perf = max_policy_perf; } else { - limits->min_policy_pct = DIV_ROUND_UP(policy->min * 100, - policy->cpuinfo.max_freq); - limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, - 0, 100); + min_policy_perf = div_ext_fp(policy->min, + policy->cpuinfo.max_freq); + min_policy_perf = clamp_t(int32_t, min_policy_perf, + 0, max_policy_perf); } - /* Normalize user input to [min_policy_pct, max_policy_pct] */ - limits->min_perf_pct = max(limits->min_policy_pct, - limits->min_sysfs_pct); - limits->min_perf_pct = min(limits->max_policy_pct, - limits->min_perf_pct); - limits->max_perf_pct = min(limits->max_policy_pct, - limits->max_sysfs_pct); - limits->max_perf_pct = max(limits->min_policy_pct, - limits->max_perf_pct); + /* Normalize user input to [min_perf, max_perf] */ + limits->min_perf = max(min_policy_perf, + percent_ext_fp(limits->min_sysfs_pct)); + limits->min_perf = min(limits->min_perf, max_policy_perf); + limits->max_perf = min(max_policy_perf, + percent_ext_fp(limits->max_sysfs_pct)); + limits->max_perf = max(min_policy_perf, limits->max_perf); - /* Make sure min_perf_pct <= max_perf_pct */ - limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct); + /* Make sure min_perf <= max_perf */ + limits->min_perf = min(limits->min_perf, limits->max_perf); - limits->min_perf = div_ext_fp(limits->min_perf_pct, 100); - limits->max_perf = div_ext_fp(limits->max_perf_pct, 100); limits->max_perf = round_up(limits->max_perf, EXT_FRAC_BITS); limits->min_perf = round_up(limits->min_perf, EXT_FRAC_BITS); + limits->max_perf_pct = fp_ext_toint(limits->max_perf * 100); + limits->min_perf_pct = fp_ext_toint(limits->min_perf * 100); pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu, limits->max_perf_pct, limits->min_perf_pct); -- cgit v1.2.3 From 4494dbc6dec37817f2cc2aa7604039a9e87ada18 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Wed, 15 Mar 2017 22:22:08 +0800 Subject: netfilter: nft_ct: do cleanup work when NFTA_CT_DIRECTION is invalid We should jump to invoke __nft_ct_set_destroy() instead of just return error. Fixes: edee4f1e9245 ("netfilter: nft_ct: add zone id set support") Signed-off-by: Liping Zhang Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_ct.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 91585b5e5307..0264258c46fe 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -544,7 +544,8 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, case IP_CT_DIR_REPLY: break; default: - return -EINVAL; + err = -EINVAL; + goto err1; } } -- cgit v1.2.3 From 1b53cf9815bb4744958d41f3795d5d5a1d365e2d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 21 Feb 2017 15:07:11 -0800 Subject: fscrypt: remove broken support for detecting keyring key revocation Filesystem encryption ostensibly supported revoking a keyring key that had been used to "unlock" encrypted files, causing those files to become "locked" again. This was, however, buggy for several reasons, the most severe of which was that when key revocation happened to be detected for an inode, its fscrypt_info was immediately freed, even while other threads could be using it for encryption or decryption concurrently. This could be exploited to crash the kernel or worse. This patch fixes the use-after-free by removing the code which detects the keyring key having been revoked, invalidated, or expired. Instead, an encrypted inode that is "unlocked" now simply remains unlocked until it is evicted from memory. Note that this is no worse than the case for block device-level encryption, e.g. dm-crypt, and it still remains possible for a privileged user to evict unused pages, inodes, and dentries by running 'sync; echo 3 > /proc/sys/vm/drop_caches', or by simply unmounting the filesystem. In fact, one of those actions was already needed anyway for key revocation to work even somewhat sanely. This change is not expected to break any applications. In the future I'd like to implement a real API for fscrypt key revocation that interacts sanely with ongoing filesystem operations --- waiting for existing operations to complete and blocking new operations, and invalidating and sanitizing key material and plaintext from the VFS caches. But this is a hard problem, and for now this bug must be fixed. This bug affected almost all versions of ext4, f2fs, and ubifs encryption, and it was potentially reachable in any kernel configured with encryption support (CONFIG_EXT4_ENCRYPTION=y, CONFIG_EXT4_FS_ENCRYPTION=y, CONFIG_F2FS_FS_ENCRYPTION=y, or CONFIG_UBIFS_FS_ENCRYPTION=y). Note that older kernels did not use the shared fs/crypto/ code, but due to the potential security implications of this bug, it may still be worthwhile to backport this fix to them. Fixes: b7236e21d55f ("ext4 crypto: reorganize how we store keys in the inode") Cc: stable@vger.kernel.org # v4.2+ Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Acked-by: Michael Halcrow --- fs/crypto/crypto.c | 10 +-------- fs/crypto/fname.c | 2 +- fs/crypto/fscrypt_private.h | 4 ---- fs/crypto/keyinfo.c | 52 ++++++++------------------------------------- 4 files changed, 11 insertions(+), 57 deletions(-) diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 02a7a9286449..6d6eca394d4d 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -327,7 +327,6 @@ EXPORT_SYMBOL(fscrypt_decrypt_page); static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) { struct dentry *dir; - struct fscrypt_info *ci; int dir_has_key, cached_with_key; if (flags & LOOKUP_RCU) @@ -339,18 +338,11 @@ static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) return 0; } - ci = d_inode(dir)->i_crypt_info; - if (ci && ci->ci_keyring_key && - (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) | - (1 << KEY_FLAG_REVOKED) | - (1 << KEY_FLAG_DEAD)))) - ci = NULL; - /* this should eventually be an flag in d_flags */ spin_lock(&dentry->d_lock); cached_with_key = dentry->d_flags & DCACHE_ENCRYPTED_WITH_KEY; spin_unlock(&dentry->d_lock); - dir_has_key = (ci != NULL); + dir_has_key = (d_inode(dir)->i_crypt_info != NULL); dput(dir); /* diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 13052b85c393..37b49894c762 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -350,7 +350,7 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, fname->disk_name.len = iname->len; return 0; } - ret = fscrypt_get_crypt_info(dir); + ret = fscrypt_get_encryption_info(dir); if (ret && ret != -EOPNOTSUPP) return ret; diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index fdbb8af32eaf..e39696e64494 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -67,7 +67,6 @@ struct fscrypt_info { u8 ci_filename_mode; u8 ci_flags; struct crypto_skcipher *ci_ctfm; - struct key *ci_keyring_key; u8 ci_master_key[FS_KEY_DESCRIPTOR_SIZE]; }; @@ -101,7 +100,4 @@ extern int fscrypt_do_page_crypto(const struct inode *inode, extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx, gfp_t gfp_flags); -/* keyinfo.c */ -extern int fscrypt_get_crypt_info(struct inode *); - #endif /* _FSCRYPT_PRIVATE_H */ diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index 02eb6b9e4438..cb3e82abf034 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -95,6 +95,7 @@ static int validate_user_key(struct fscrypt_info *crypt_info, kfree(description); if (IS_ERR(keyring_key)) return PTR_ERR(keyring_key); + down_read(&keyring_key->sem); if (keyring_key->type != &key_type_logon) { printk_once(KERN_WARNING @@ -102,11 +103,9 @@ static int validate_user_key(struct fscrypt_info *crypt_info, res = -ENOKEY; goto out; } - down_read(&keyring_key->sem); ukp = user_key_payload(keyring_key); if (ukp->datalen != sizeof(struct fscrypt_key)) { res = -EINVAL; - up_read(&keyring_key->sem); goto out; } master_key = (struct fscrypt_key *)ukp->data; @@ -117,17 +116,11 @@ static int validate_user_key(struct fscrypt_info *crypt_info, "%s: key size incorrect: %d\n", __func__, master_key->size); res = -ENOKEY; - up_read(&keyring_key->sem); goto out; } res = derive_key_aes(ctx->nonce, master_key->raw, raw_key); - up_read(&keyring_key->sem); - if (res) - goto out; - - crypt_info->ci_keyring_key = keyring_key; - return 0; out: + up_read(&keyring_key->sem); key_put(keyring_key); return res; } @@ -169,12 +162,11 @@ static void put_crypt_info(struct fscrypt_info *ci) if (!ci) return; - key_put(ci->ci_keyring_key); crypto_free_skcipher(ci->ci_ctfm); kmem_cache_free(fscrypt_info_cachep, ci); } -int fscrypt_get_crypt_info(struct inode *inode) +int fscrypt_get_encryption_info(struct inode *inode) { struct fscrypt_info *crypt_info; struct fscrypt_context ctx; @@ -184,21 +176,15 @@ int fscrypt_get_crypt_info(struct inode *inode) u8 *raw_key = NULL; int res; + if (inode->i_crypt_info) + return 0; + res = fscrypt_initialize(inode->i_sb->s_cop->flags); if (res) return res; if (!inode->i_sb->s_cop->get_context) return -EOPNOTSUPP; -retry: - crypt_info = ACCESS_ONCE(inode->i_crypt_info); - if (crypt_info) { - if (!crypt_info->ci_keyring_key || - key_validate(crypt_info->ci_keyring_key) == 0) - return 0; - fscrypt_put_encryption_info(inode, crypt_info); - goto retry; - } res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx)); if (res < 0) { @@ -229,7 +215,6 @@ retry: crypt_info->ci_data_mode = ctx.contents_encryption_mode; crypt_info->ci_filename_mode = ctx.filenames_encryption_mode; crypt_info->ci_ctfm = NULL; - crypt_info->ci_keyring_key = NULL; memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor, sizeof(crypt_info->ci_master_key)); @@ -273,14 +258,8 @@ retry: if (res) goto out; - kzfree(raw_key); - raw_key = NULL; - if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) != NULL) { - put_crypt_info(crypt_info); - goto retry; - } - return 0; - + if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) == NULL) + crypt_info = NULL; out: if (res == -ENOKEY) res = 0; @@ -288,6 +267,7 @@ out: kzfree(raw_key); return res; } +EXPORT_SYMBOL(fscrypt_get_encryption_info); void fscrypt_put_encryption_info(struct inode *inode, struct fscrypt_info *ci) { @@ -305,17 +285,3 @@ void fscrypt_put_encryption_info(struct inode *inode, struct fscrypt_info *ci) put_crypt_info(ci); } EXPORT_SYMBOL(fscrypt_put_encryption_info); - -int fscrypt_get_encryption_info(struct inode *inode) -{ - struct fscrypt_info *ci = inode->i_crypt_info; - - if (!ci || - (ci->ci_keyring_key && - (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) | - (1 << KEY_FLAG_REVOKED) | - (1 << KEY_FLAG_DEAD))))) - return fscrypt_get_crypt_info(inode); - return 0; -} -EXPORT_SYMBOL(fscrypt_get_encryption_info); -- cgit v1.2.3 From 62a10498afb27370ec6018e9d802b74850fd8d9a Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 14 Mar 2017 09:34:49 +0900 Subject: ASoC: rcar: clear DE bit only in PDMACHCR when it stops R-Car datasheet indicates "Clear DE in PDMACHCR" for transfer stop, but current code clears all bits in PDMACHCR. Because of this, DE bit might never been cleared, and it causes CMD overflow. This patch fixes this issue. Signed-off-by: Kuninori Morimoto Tested-by: Hiroyuki Yokoyama Signed-off-by: Mark Brown --- sound/soc/sh/rcar/dma.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/sound/soc/sh/rcar/dma.c b/sound/soc/sh/rcar/dma.c index 1f405c833867..c2e199b4fcf4 100644 --- a/sound/soc/sh/rcar/dma.c +++ b/sound/soc/sh/rcar/dma.c @@ -454,6 +454,20 @@ static u32 rsnd_dmapp_read(struct rsnd_dma *dma, u32 reg) return ioread32(rsnd_dmapp_addr(dmac, dma, reg)); } +static void rsnd_dmapp_bset(struct rsnd_dma *dma, u32 data, u32 mask, u32 reg) +{ + struct rsnd_mod *mod = rsnd_mod_get(dma); + struct rsnd_priv *priv = rsnd_mod_to_priv(mod); + struct rsnd_dma_ctrl *dmac = rsnd_priv_to_dmac(priv); + volatile void __iomem *addr = rsnd_dmapp_addr(dmac, dma, reg); + u32 val = ioread32(addr); + + val &= ~mask; + val |= (data & mask); + + iowrite32(val, addr); +} + static int rsnd_dmapp_stop(struct rsnd_mod *mod, struct rsnd_dai_stream *io, struct rsnd_priv *priv) @@ -461,10 +475,10 @@ static int rsnd_dmapp_stop(struct rsnd_mod *mod, struct rsnd_dma *dma = rsnd_mod_to_dma(mod); int i; - rsnd_dmapp_write(dma, 0, PDMACHCR); + rsnd_dmapp_bset(dma, 0, PDMACHCR_DE, PDMACHCR); for (i = 0; i < 1024; i++) { - if (0 == rsnd_dmapp_read(dma, PDMACHCR)) + if (0 == (rsnd_dmapp_read(dma, PDMACHCR) & PDMACHCR_DE)) return 0; udelay(1); } -- cgit v1.2.3 From 85b29008d8af6d94a0723aaa8d93cfb6e041158b Mon Sep 17 00:00:00 2001 From: Don Brace Date: Fri, 10 Mar 2017 14:35:11 -0600 Subject: scsi: hpsa: update check for logical volume status - Add in a new case for volume offline. Resolves internal testing bug for multilun array management. - Return correct status for failed TURs. Reviewed-by: Scott Benesh Reviewed-by: Scott Teel Signed-off-by: Don Brace Signed-off-by: Martin K. Petersen --- drivers/scsi/hpsa.c | 35 ++++++++++++++++------------------- drivers/scsi/hpsa_cmd.h | 2 ++ 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 524a0c755ed7..90b76c4c6d36 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -3714,7 +3714,7 @@ exit_failed: * # (integer code indicating one of several NOT READY states * describing why a volume is to be kept offline) */ -static int hpsa_volume_offline(struct ctlr_info *h, +static unsigned char hpsa_volume_offline(struct ctlr_info *h, unsigned char scsi3addr[]) { struct CommandList *c; @@ -3735,7 +3735,7 @@ static int hpsa_volume_offline(struct ctlr_info *h, DEFAULT_TIMEOUT); if (rc) { cmd_free(h, c); - return 0; + return HPSA_VPD_LV_STATUS_UNSUPPORTED; } sense = c->err_info->SenseInfo; if (c->err_info->SenseLen > sizeof(c->err_info->SenseInfo)) @@ -3746,19 +3746,13 @@ static int hpsa_volume_offline(struct ctlr_info *h, cmd_status = c->err_info->CommandStatus; scsi_status = c->err_info->ScsiStatus; cmd_free(h, c); - /* Is the volume 'not ready'? */ - if (cmd_status != CMD_TARGET_STATUS || - scsi_status != SAM_STAT_CHECK_CONDITION || - sense_key != NOT_READY || - asc != ASC_LUN_NOT_READY) { - return 0; - } /* Determine the reason for not ready state */ ldstat = hpsa_get_volume_status(h, scsi3addr); /* Keep volume offline in certain cases: */ switch (ldstat) { + case HPSA_LV_FAILED: case HPSA_LV_UNDERGOING_ERASE: case HPSA_LV_NOT_AVAILABLE: case HPSA_LV_UNDERGOING_RPI: @@ -3780,7 +3774,7 @@ static int hpsa_volume_offline(struct ctlr_info *h, default: break; } - return 0; + return HPSA_LV_OK; } /* @@ -3853,10 +3847,10 @@ static int hpsa_update_device_info(struct ctlr_info *h, /* Do an inquiry to the device to see what it is. */ if (hpsa_scsi_do_inquiry(h, scsi3addr, 0, inq_buff, (unsigned char) OBDR_TAPE_INQ_SIZE) != 0) { - /* Inquiry failed (msg printed already) */ dev_err(&h->pdev->dev, - "hpsa_update_device_info: inquiry failed\n"); - rc = -EIO; + "%s: inquiry failed, device will be skipped.\n", + __func__); + rc = HPSA_INQUIRY_FAILED; goto bail_out; } @@ -3885,15 +3879,19 @@ static int hpsa_update_device_info(struct ctlr_info *h, if ((this_device->devtype == TYPE_DISK || this_device->devtype == TYPE_ZBC) && is_logical_dev_addr_mode(scsi3addr)) { - int volume_offline; + unsigned char volume_offline; hpsa_get_raid_level(h, scsi3addr, &this_device->raid_level); if (h->fw_support & MISC_FW_RAID_OFFLOAD_BASIC) hpsa_get_ioaccel_status(h, scsi3addr, this_device); volume_offline = hpsa_volume_offline(h, scsi3addr); - if (volume_offline < 0 || volume_offline > 0xff) - volume_offline = HPSA_VPD_LV_STATUS_UNSUPPORTED; - this_device->volume_offline = volume_offline & 0xff; + if (volume_offline == HPSA_LV_FAILED) { + rc = HPSA_LV_FAILED; + dev_err(&h->pdev->dev, + "%s: LV failed, device will be skipped.\n", + __func__); + goto bail_out; + } } else { this_device->raid_level = RAID_UNKNOWN; this_device->offload_config = 0; @@ -4379,8 +4377,7 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h) goto out; } if (rc) { - dev_warn(&h->pdev->dev, - "Inquiry failed, skipping device.\n"); + h->drv_req_rescan = 1; continue; } diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h index a584cdf07058..5961705eef76 100644 --- a/drivers/scsi/hpsa_cmd.h +++ b/drivers/scsi/hpsa_cmd.h @@ -156,6 +156,7 @@ #define CFGTBL_BusType_Fibre2G 0x00000200l /* VPD Inquiry types */ +#define HPSA_INQUIRY_FAILED 0x02 #define HPSA_VPD_SUPPORTED_PAGES 0x00 #define HPSA_VPD_LV_DEVICE_ID 0x83 #define HPSA_VPD_LV_DEVICE_GEOMETRY 0xC1 @@ -166,6 +167,7 @@ /* Logical volume states */ #define HPSA_VPD_LV_STATUS_UNSUPPORTED 0xff #define HPSA_LV_OK 0x0 +#define HPSA_LV_FAILED 0x01 #define HPSA_LV_NOT_AVAILABLE 0x0b #define HPSA_LV_UNDERGOING_ERASE 0x0F #define HPSA_LV_UNDERGOING_RPI 0x12 -- cgit v1.2.3 From 87b9e6aa87d9411f1059aa245c0c79976bc557ac Mon Sep 17 00:00:00 2001 From: Don Brace Date: Fri, 10 Mar 2017 14:35:17 -0600 Subject: scsi: hpsa: limit outstanding rescans Avoid rescan storms. No need to queue another if one is pending. Reviewed-by: Scott Benesh Reviewed-by: Scott Teel Reviewed-by: Tomas Henzl Signed-off-by: Don Brace Signed-off-by: Martin K. Petersen --- drivers/scsi/hpsa.c | 16 +++++++++++++++- drivers/scsi/hpsa.h | 1 + 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 90b76c4c6d36..0a8ac68f4ca1 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -5555,7 +5555,7 @@ static void hpsa_scan_complete(struct ctlr_info *h) spin_lock_irqsave(&h->scan_lock, flags); h->scan_finished = 1; - wake_up_all(&h->scan_wait_queue); + wake_up(&h->scan_wait_queue); spin_unlock_irqrestore(&h->scan_lock, flags); } @@ -5573,11 +5573,23 @@ static void hpsa_scan_start(struct Scsi_Host *sh) if (unlikely(lockup_detected(h))) return hpsa_scan_complete(h); + /* + * If a scan is already waiting to run, no need to add another + */ + spin_lock_irqsave(&h->scan_lock, flags); + if (h->scan_waiting) { + spin_unlock_irqrestore(&h->scan_lock, flags); + return; + } + + spin_unlock_irqrestore(&h->scan_lock, flags); + /* wait until any scan already in progress is finished. */ while (1) { spin_lock_irqsave(&h->scan_lock, flags); if (h->scan_finished) break; + h->scan_waiting = 1; spin_unlock_irqrestore(&h->scan_lock, flags); wait_event(h->scan_wait_queue, h->scan_finished); /* Note: We don't need to worry about a race between this @@ -5587,6 +5599,7 @@ static void hpsa_scan_start(struct Scsi_Host *sh) */ } h->scan_finished = 0; /* mark scan as in progress */ + h->scan_waiting = 0; spin_unlock_irqrestore(&h->scan_lock, flags); if (unlikely(lockup_detected(h))) @@ -8789,6 +8802,7 @@ reinit_after_soft_reset: init_waitqueue_head(&h->event_sync_wait_queue); mutex_init(&h->reset_mutex); h->scan_finished = 1; /* no scan currently in progress */ + h->scan_waiting = 0; pci_set_drvdata(pdev, h); h->ndevices = 0; diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h index bf6cdc106654..6f04f2ad4125 100644 --- a/drivers/scsi/hpsa.h +++ b/drivers/scsi/hpsa.h @@ -201,6 +201,7 @@ struct ctlr_info { dma_addr_t errinfo_pool_dhandle; unsigned long *cmd_pool_bits; int scan_finished; + u8 scan_waiting : 1; spinlock_t scan_lock; wait_queue_head_t scan_wait_queue; -- cgit v1.2.3 From 2ef2884980873081a4edae92f9d88dd580c85f6e Mon Sep 17 00:00:00 2001 From: Don Brace Date: Fri, 10 Mar 2017 14:35:23 -0600 Subject: scsi: hpsa: do not timeout reset operations Resets can take longer than DEFAULT_TIMEOUT. Reviewed-by: Scott Benesh Reviewed-by: Scott Teel Reviewed-by: Tomas Henzl Signed-off-by: Don Brace Signed-off-by: Martin K. Petersen --- drivers/scsi/hpsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 0a8ac68f4ca1..0d0be7754a65 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -2956,7 +2956,7 @@ static int hpsa_send_reset(struct ctlr_info *h, unsigned char *scsi3addr, /* fill_cmd can't fail here, no data buffer to map. */ (void) fill_cmd(c, reset_type, h, NULL, 0, 0, scsi3addr, TYPE_MSG); - rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, DEFAULT_TIMEOUT); + rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, NO_TIMEOUT); if (rc) { dev_warn(&h->pdev->dev, "Failed to send reset command\n"); goto out; -- cgit v1.2.3 From 949d7fa158b2b1af533bdb1af0dda8ab103ac58d Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Sun, 12 Mar 2017 12:22:02 +0200 Subject: scsi: ufs: don't check unsigned type for a negative value Fix compilation warning: drivers/scsi/ufs/ufshcd.c:7645:13: warning: comparison of unsigned expression < 0 is always false [-Wtype-limits] if ((value < UFS_PM_LVL_0) || (value >= UFS_PM_LVL_MAX)) Signed-off-by: Tomas Winkler Reviewed-by: Subhash Jadavani Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 1359913bf840..e8c26e6e6237 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -7642,7 +7642,7 @@ static inline ssize_t ufshcd_pm_lvl_store(struct device *dev, if (kstrtoul(buf, 0, &value)) return -EINVAL; - if ((value < UFS_PM_LVL_0) || (value >= UFS_PM_LVL_MAX)) + if (value >= UFS_PM_LVL_MAX) return -EINVAL; spin_lock_irqsave(hba->host->host_lock, flags); -- cgit v1.2.3 From 7d7080335f8d93a51e8238b6e85be8af4ba452b6 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 8 Mar 2017 14:36:01 -0800 Subject: scsi: lpfc: Finalize Kconfig options for nvme Reviewing the result of what was just added for Kconfig, we made a poor choice. It worked well for full kernel builds, but not so much for how it would be deployed on a distro. Here's the final result: - lpfc will compile in NVME initiator and/or NVME target support based on whether the kernel has the corresponding subsystem support. Kconfig is not used to drive this specifically for lpfc. - There is a module parameter, lpfc_enable_fc4_type, that indicates whether the ports will do FCP-only or FCP & NVME (NVME-only not yet possible due to dependency on fc transport). As FCP & NVME divvys up exchange resources, and given NVME will not be often initially, the default is changed to FCP only. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/Kconfig | 14 -------------- drivers/scsi/lpfc/lpfc_attr.c | 4 ++-- drivers/scsi/lpfc/lpfc_init.c | 7 +++++++ drivers/scsi/lpfc/lpfc_nvme.c | 8 ++++---- drivers/scsi/lpfc/lpfc_nvmet.c | 8 ++++---- 5 files changed, 17 insertions(+), 24 deletions(-) diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 4bf55b5d78be..3c52867dfe28 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -1253,20 +1253,6 @@ config SCSI_LPFC_DEBUG_FS This makes debugging information from the lpfc driver available via the debugfs filesystem. -config LPFC_NVME_INITIATOR - bool "Emulex LightPulse Fibre Channel NVME Initiator Support" - depends on SCSI_LPFC && NVME_FC - ---help--- - This enables NVME Initiator support in the Emulex lpfc driver. - -config LPFC_NVME_TARGET - bool "Emulex LightPulse Fibre Channel NVME Initiator Support" - depends on SCSI_LPFC && NVME_TARGET_FC - ---help--- - This enables NVME Target support in the Emulex lpfc driver. - Target enablement must still be enabled on a per adapter - basis by module parameters. - config SCSI_SIM710 tristate "Simple 53c710 SCSI support (Compaq, NCR machines)" depends on (EISA || MCA) && SCSI diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index fbd3a563be53..84aa62f1a4de 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -3315,9 +3315,9 @@ LPFC_ATTR_R(nvmet_mrq_post, LPFC_DEF_MRQ_POST, * lpfc_enable_fc4_type: Defines what FC4 types are supported. * Supported Values: 1 - register just FCP * 3 - register both FCP and NVME - * Supported values are [1,3]. Default value is 3 + * Supported values are [1,3]. Default value is 1 */ -LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_BOTH, +LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_FCP, LPFC_ENABLE_FCP, LPFC_ENABLE_BOTH, "Define fc4 type to register with fabric."); diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 2697d49da4d7..6cc561b04211 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -5891,10 +5891,17 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) /* Check to see if it matches any module parameter */ for (i = 0; i < lpfc_enable_nvmet_cnt; i++) { if (wwn == lpfc_enable_nvmet[i]) { +#if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "6017 NVME Target %016llx\n", wwn); phba->nvmet_support = 1; /* a match */ +#else + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "6021 Can't enable NVME Target." + " NVME_TARGET_FC infrastructure" + " is not in kernel\n"); +#endif } } } diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 0a4c19081409..0024de1c6c1f 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -2149,7 +2149,7 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport) /* localport is allocated from the stack, but the registration * call allocates heap memory as well as the private area. */ -#ifdef CONFIG_LPFC_NVME_INITIATOR +#if (IS_ENABLED(CONFIG_NVME_FC)) ret = nvme_fc_register_localport(&nfcp_info, &lpfc_nvme_template, &vport->phba->pcidev->dev, &localport); #else @@ -2190,7 +2190,7 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport) void lpfc_nvme_destroy_localport(struct lpfc_vport *vport) { -#ifdef CONFIG_LPFC_NVME_INITIATOR +#if (IS_ENABLED(CONFIG_NVME_FC)) struct nvme_fc_local_port *localport; struct lpfc_nvme_lport *lport; struct lpfc_nvme_rport *rport = NULL, *rport_next = NULL; @@ -2274,7 +2274,7 @@ lpfc_nvme_update_localport(struct lpfc_vport *vport) int lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) { -#ifdef CONFIG_LPFC_NVME_INITIATOR +#if (IS_ENABLED(CONFIG_NVME_FC)) int ret = 0; struct nvme_fc_local_port *localport; struct lpfc_nvme_lport *lport; @@ -2403,7 +2403,7 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) void lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) { -#ifdef CONFIG_LPFC_NVME_INITIATOR +#if (IS_ENABLED(CONFIG_NVME_FC)) int ret; struct nvme_fc_local_port *localport; struct lpfc_nvme_lport *lport; diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index b7739a554fe0..7ca868f394da 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -671,7 +671,7 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba) lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP | NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED; -#ifdef CONFIG_LPFC_NVME_TARGET +#if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) error = nvmet_fc_register_targetport(&pinfo, &lpfc_tgttemplate, &phba->pcidev->dev, &phba->targetport); @@ -756,7 +756,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba, void lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba) { -#ifdef CONFIG_LPFC_NVME_TARGET +#if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) struct lpfc_nvmet_tgtport *tgtp; if (phba->nvmet_support == 0) @@ -788,7 +788,7 @@ static void lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, struct hbq_dmabuf *nvmebuf) { -#ifdef CONFIG_LPFC_NVME_TARGET +#if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) struct lpfc_nvmet_tgtport *tgtp; struct fc_frame_header *fc_hdr; struct lpfc_nvmet_rcv_ctx *ctxp; @@ -891,7 +891,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, struct rqb_dmabuf *nvmebuf, uint64_t isr_timestamp) { -#ifdef CONFIG_LPFC_NVME_TARGET +#if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) struct lpfc_nvmet_rcv_ctx *ctxp; struct lpfc_nvmet_tgtport *tgtp; struct fc_frame_header *fc_hdr; -- cgit v1.2.3 From 94840e3c802daa1a62985957f36ac48faf8ceedd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 22 Feb 2017 13:25:14 -0800 Subject: fscrypt: eliminate ->prepare_context() operation The only use of the ->prepare_context() fscrypt operation was to allow ext4 to evict inline data from the inode before ->set_context(). However, there is no reason why this cannot be done as simply the first step in ->set_context(), and in fact it makes more sense to do it that way because then the policy modes and flags get validated before any real work is done. Therefore, merge ext4_prepare_context() into ext4_set_context(), and remove ->prepare_context(). Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- fs/crypto/policy.c | 7 ------- fs/ext4/super.c | 10 ++++------ include/linux/fscrypt_common.h | 1 - 3 files changed, 4 insertions(+), 14 deletions(-) diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index 14b76da71269..4908906d54d5 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -33,17 +33,10 @@ static int create_encryption_context_from_policy(struct inode *inode, const struct fscrypt_policy *policy) { struct fscrypt_context ctx; - int res; if (!inode->i_sb->s_cop->set_context) return -EOPNOTSUPP; - if (inode->i_sb->s_cop->prepare_context) { - res = inode->i_sb->s_cop->prepare_context(inode); - if (res) - return res; - } - ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1; memcpy(ctx.master_key_descriptor, policy->master_key_descriptor, FS_KEY_DESCRIPTOR_SIZE); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2e03a0a88d92..a9448db1cf7e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1120,17 +1120,16 @@ static int ext4_get_context(struct inode *inode, void *ctx, size_t len) EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len); } -static int ext4_prepare_context(struct inode *inode) -{ - return ext4_convert_inline_data(inode); -} - static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, void *fs_data) { handle_t *handle = fs_data; int res, res2, retries = 0; + res = ext4_convert_inline_data(inode); + if (res) + return res; + /* * If a journal handle was specified, then the encryption context is * being set on a new inode via inheritance and is part of a larger @@ -1196,7 +1195,6 @@ static unsigned ext4_max_namelen(struct inode *inode) static const struct fscrypt_operations ext4_cryptops = { .key_prefix = "ext4:", .get_context = ext4_get_context, - .prepare_context = ext4_prepare_context, .set_context = ext4_set_context, .dummy_context = ext4_dummy_context, .is_encrypted = ext4_encrypted_inode, diff --git a/include/linux/fscrypt_common.h b/include/linux/fscrypt_common.h index 547f81592ba1..10c1abfbac6c 100644 --- a/include/linux/fscrypt_common.h +++ b/include/linux/fscrypt_common.h @@ -87,7 +87,6 @@ struct fscrypt_operations { unsigned int flags; const char *key_prefix; int (*get_context)(struct inode *, void *, size_t); - int (*prepare_context)(struct inode *); int (*set_context)(struct inode *, const void *, size_t, void *); int (*dummy_context)(struct inode *); bool (*is_encrypted)(struct inode *); -- cgit v1.2.3 From b9cf625d6ecde0d372e23ae022feead72b4228a6 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 15 Mar 2017 14:52:02 -0400 Subject: ext4: mark inode dirty after converting inline directory If ext4_convert_inline_data() was called on a directory with inline data, the filesystem was left in an inconsistent state (as considered by e2fsck) because the file size was not increased to cover the new block. This happened because the inode was not marked dirty after i_disksize was updated. Fix this by marking the inode dirty at the end of ext4_finish_convert_inline_dir(). This bug was probably not noticed before because most users mark the inode dirty afterwards for other reasons. But if userspace executed FS_IOC_SET_ENCRYPTION_POLICY with invalid parameters, as exercised by 'kvm-xfstests -c adv generic/396', then the inode was never marked dirty after updating i_disksize. Cc: stable@vger.kernel.org # 3.10+ Fixes: 3c47d54170b6a678875566b1b8d6dcf57904e49b Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- fs/ext4/inline.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 30a9f210d1e3..375fb1c05d49 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1169,10 +1169,9 @@ static int ext4_finish_convert_inline_dir(handle_t *handle, set_buffer_uptodate(dir_block); err = ext4_handle_dirty_dirent_node(handle, inode, dir_block); if (err) - goto out; + return err; set_buffer_verified(dir_block); -out: - return err; + return ext4_mark_inode_dirty(handle, inode); } static int ext4_convert_inline_data_nolock(handle_t *handle, -- cgit v1.2.3 From cd9cb405e0b948363811dc74dbb2890f56f2cb87 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 15 Mar 2017 15:08:48 -0400 Subject: jbd2: don't leak memory if setting up journal fails In journal_init_common(), if we failed to allocate the j_wbuf array, or if we failed to create the buffer_head for the journal superblock, we leaked the memory allocated for the revocation tables. Fix this. Cc: stable@vger.kernel.org # 4.9 Fixes: f0c9fd5458bacf7b12a9a579a727dc740cbe047e Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/jbd2/journal.c | 22 +++++++++++----------- fs/jbd2/revoke.c | 1 + 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index a1a359bfcc9c..5adc2fb62b0f 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1125,10 +1125,8 @@ static journal_t *journal_init_common(struct block_device *bdev, /* Set up a default-sized revoke table for the new mount. */ err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH); - if (err) { - kfree(journal); - return NULL; - } + if (err) + goto err_cleanup; spin_lock_init(&journal->j_history_lock); @@ -1145,23 +1143,25 @@ static journal_t *journal_init_common(struct block_device *bdev, journal->j_wbufsize = n; journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *), GFP_KERNEL); - if (!journal->j_wbuf) { - kfree(journal); - return NULL; - } + if (!journal->j_wbuf) + goto err_cleanup; bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize); if (!bh) { pr_err("%s: Cannot get buffer for journal superblock\n", __func__); - kfree(journal->j_wbuf); - kfree(journal); - return NULL; + goto err_cleanup; } journal->j_sb_buffer = bh; journal->j_superblock = (journal_superblock_t *)bh->b_data; return journal; + +err_cleanup: + kfree(journal->j_wbuf); + jbd2_journal_destroy_revoke(journal); + kfree(journal); + return NULL; } /* jbd2_journal_init_dev and jbd2_journal_init_inode: diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index cfc38b552118..f9aefcda5854 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -280,6 +280,7 @@ int jbd2_journal_init_revoke(journal_t *journal, int hash_size) fail1: jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]); + journal->j_revoke_table[0] = NULL; fail0: return -ENOMEM; } -- cgit v1.2.3 From fe8daf5fa715f7214952f06a387e4b7de818c5be Mon Sep 17 00:00:00 2001 From: Taku Izumi Date: Wed, 15 Mar 2017 13:47:50 +0900 Subject: fjes: Fix wrong netdevice feature flags This patch fixes netdev->features for Extended Socket network device. Currently Extended Socket network device's netdev->feature claims NETIF_F_HW_CSUM, however this is completely wrong. There's no feature of checksum offloading. That causes invalid TCP/UDP checksum and packet rejection when IP forwarding from Extended Socket network device to other network device. NETIF_F_HW_CSUM should be omitted. Signed-off-by: Taku Izumi Signed-off-by: David S. Miller --- drivers/net/fjes/fjes_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index b75d9cdcfb0c..c4b3c4b77a9c 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -1316,7 +1316,7 @@ static void fjes_netdev_setup(struct net_device *netdev) netdev->min_mtu = fjes_support_mtu[0]; netdev->max_mtu = fjes_support_mtu[3]; netdev->flags |= IFF_BROADCAST; - netdev->features |= NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_FILTER; + netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; } static void fjes_irq_watch_task(struct work_struct *work) -- cgit v1.2.3 From e83bb3e6f3efa21f4a9d883a25d0ecd9dfb431e1 Mon Sep 17 00:00:00 2001 From: Michael Engl Date: Tue, 3 Oct 2017 13:57:00 +0100 Subject: iio: adc: ti_am335x_adc: fix fifo overrun recovery The tiadc_irq_h(int irq, void *private) function is handling FIFO overruns by clearing flags, disabling and enabling the ADC to recover. If the ADC is running in continuous mode a FIFO overrun happens regularly. If the disabling of the ADC happens concurrently with a new conversion. It might happen that the enabling of the ADC is ignored by the hardware. This stops the ADC permanently. No more interrupts are triggered. According to the AM335x Reference Manual (SPRUH73H October 2011 - Revised April 2013 - Chapter 12.4 and 12.5) it is necessary to check the ADC FSM bits in REG_ADCFSM before enabling the ADC again. Because the disabling of the ADC is done right after the current conversion has been finished. To trigger this bug it is necessary to run the ADC in continuous mode. The ADC values of all channels need to be read in an endless loop. The bug appears within the first 6 hours (~5.4 million handled FIFO overruns). The user space application will hang on reading new values from the character device. Fixes: ca9a563805f7a ("iio: ti_am335x_adc: Add continuous sampling support") Signed-off-by: Michael Engl Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ti_am335x_adc.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c index ad9dec30bb30..4282ceca3d8f 100644 --- a/drivers/iio/adc/ti_am335x_adc.c +++ b/drivers/iio/adc/ti_am335x_adc.c @@ -169,7 +169,9 @@ static irqreturn_t tiadc_irq_h(int irq, void *private) { struct iio_dev *indio_dev = private; struct tiadc_device *adc_dev = iio_priv(indio_dev); - unsigned int status, config; + unsigned int status, config, adc_fsm; + unsigned short count = 0; + status = tiadc_readl(adc_dev, REG_IRQSTATUS); /* @@ -183,6 +185,15 @@ static irqreturn_t tiadc_irq_h(int irq, void *private) tiadc_writel(adc_dev, REG_CTRL, config); tiadc_writel(adc_dev, REG_IRQSTATUS, IRQENB_FIFO1OVRRUN | IRQENB_FIFO1UNDRFLW | IRQENB_FIFO1THRES); + + /* wait for idle state. + * ADC needs to finish the current conversion + * before disabling the module + */ + do { + adc_fsm = tiadc_readl(adc_dev, REG_ADCFSM); + } while (adc_fsm != 0x10 && count++ < 100); + tiadc_writel(adc_dev, REG_CTRL, (config | CNTRLREG_TSCSSENB)); return IRQ_HANDLED; } else if (status & IRQENB_FIFO1THRES) { -- cgit v1.2.3 From 5f655322b1ba4bd46e26e307d04098f9c84df764 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 14 Mar 2017 11:47:29 -0400 Subject: parisc: support R_PARISC_SECREL32 relocation in modules The parisc kernel doesn't work with CONFIG_MODVERSIONS since the commit 71810db27c1c853b335675bee335d893bc3d324b. It can't load modules with the error: "module unix: Unknown relocation: 41". The commit changes __kcrctab from 64-bit valus to 32-bit values. The assembler generates R_PARISC_SECREL32 secrel relocation for them and the module loader doesn't support this relocation. This patch adds the R_PARISC_SECREL32 relocation to the module loader. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org # v4.10+ Signed-off-by: Helge Deller --- arch/parisc/kernel/module.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index a0ecdb4abcc8..c66c943d9322 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -620,6 +620,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, */ *loc = fsel(val, addend); break; + case R_PARISC_SECREL32: + /* 32-bit section relative address. */ + *loc = fsel(val, addend); + break; case R_PARISC_DPREL21L: /* left 21 bit of relative address */ val = lrsel(val - dp, addend); @@ -807,6 +811,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, */ *loc = fsel(val, addend); break; + case R_PARISC_SECREL32: + /* 32-bit section relative address. */ + *loc = fsel(val, addend); + break; case R_PARISC_FPTR64: /* 64-bit function address */ if(in_local(me, (void *)(val + addend))) { -- cgit v1.2.3 From 316ec0624f951166daedbe446988ef92ae72b59f Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sat, 11 Mar 2017 18:03:34 -0500 Subject: parisc: Optimize flush_kernel_vmap_range and invalidate_kernel_vmap_range The previously submitted patch did not resolve the random segmentation faults observed on the phantom buildd system. There are still unresolved problems with the Debian 4.8 and 4.9 kernels on C8000. The attached patch removes the flush of the offset map pages and does a whole data cache flush for large ranges. No other arch flushes the offset map in these routines as far as I can tell. I have not observed any random segmentation faults on rp3440 in two weeks of testing with 4.10.0 and 4.10.1. Signed-off-by: John David Anglin Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Helge Deller --- arch/parisc/include/asm/cacheflush.h | 23 ++--------------------- arch/parisc/kernel/cache.c | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index 19c9c3c5f267..c7e15cc5c668 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -43,28 +43,9 @@ static inline void flush_kernel_dcache_page(struct page *page) #define flush_kernel_dcache_range(start,size) \ flush_kernel_dcache_range_asm((start), (start)+(size)); -/* vmap range flushes and invalidates. Architecturally, we don't need - * the invalidate, because the CPU should refuse to speculate once an - * area has been flushed, so invalidate is left empty */ -static inline void flush_kernel_vmap_range(void *vaddr, int size) -{ - unsigned long start = (unsigned long)vaddr; - - flush_kernel_dcache_range_asm(start, start + size); -} -static inline void invalidate_kernel_vmap_range(void *vaddr, int size) -{ - unsigned long start = (unsigned long)vaddr; - void *cursor = vaddr; - for ( ; cursor < vaddr + size; cursor += PAGE_SIZE) { - struct page *page = vmalloc_to_page(cursor); - - if (test_and_clear_bit(PG_dcache_dirty, &page->flags)) - flush_kernel_dcache_page(page); - } - flush_kernel_dcache_range_asm(start, start + size); -} +void flush_kernel_vmap_range(void *vaddr, int size); +void invalidate_kernel_vmap_range(void *vaddr, int size); #define flush_cache_vmap(start, end) flush_cache_all() #define flush_cache_vunmap(start, end) flush_cache_all() diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 0dc72d5de861..c32a09095216 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -616,3 +616,25 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); } } + +void flush_kernel_vmap_range(void *vaddr, int size) +{ + unsigned long start = (unsigned long)vaddr; + + if ((unsigned long)size > parisc_cache_flush_threshold) + flush_data_cache(); + else + flush_kernel_dcache_range_asm(start, start + size); +} +EXPORT_SYMBOL(flush_kernel_vmap_range); + +void invalidate_kernel_vmap_range(void *vaddr, int size) +{ + unsigned long start = (unsigned long)vaddr; + + if ((unsigned long)size > parisc_cache_flush_threshold) + flush_data_cache(); + else + flush_kernel_dcache_range_asm(start, start + size); +} +EXPORT_SYMBOL(invalidate_kernel_vmap_range); -- cgit v1.2.3 From 63d32d1e09cb2fc65b084b261976c06b40d19115 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 15 Mar 2017 21:10:17 +0100 Subject: parisc: Wire up statx system call Signed-off-by: Helge Deller --- arch/parisc/include/uapi/asm/unistd.h | 3 ++- arch/parisc/kernel/syscall_table.S | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/parisc/include/uapi/asm/unistd.h b/arch/parisc/include/uapi/asm/unistd.h index 6b0741e7a7ed..667c99421003 100644 --- a/arch/parisc/include/uapi/asm/unistd.h +++ b/arch/parisc/include/uapi/asm/unistd.h @@ -362,8 +362,9 @@ #define __NR_copy_file_range (__NR_Linux + 346) #define __NR_preadv2 (__NR_Linux + 347) #define __NR_pwritev2 (__NR_Linux + 348) +#define __NR_statx (__NR_Linux + 349) -#define __NR_Linux_syscalls (__NR_pwritev2 + 1) +#define __NR_Linux_syscalls (__NR_statx + 1) #define __IGNORE_select /* newselect */ diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 3cfef1de8061..44aeaa9c039f 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -444,6 +444,7 @@ ENTRY_SAME(copy_file_range) ENTRY_COMP(preadv2) ENTRY_COMP(pwritev2) + ENTRY_SAME(statx) .ifne (. - 90b) - (__NR_Linux_syscalls * (91b - 90b)) -- cgit v1.2.3 From 0f424de1fd9bc4ab24bd1fe5430ab5618e803e31 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 14 Mar 2017 14:42:03 -0400 Subject: drm/radeon/si: add dpm quirk for Oland MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OLAND 0x1002:0x6604 0x1028:0x066F 0x00 seems to have problems with higher sclks. Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/si_dpm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index d12b8978142f..72e1588580a1 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2984,6 +2984,12 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, (rdev->pdev->device == 0x6667)) { max_sclk = 75000; } + } else if (rdev->family == CHIP_OLAND) { + if ((rdev->pdev->device == 0x6604) && + (rdev->pdev->subsystem_vendor == 0x1028) && + (rdev->pdev->subsystem_device == 0x066F)) { + max_sclk = 75000; + } } if (rps->vce_active) { -- cgit v1.2.3 From 18a8de1bc37e97dff1c96ee6cf49adbd02a0f775 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 14 Mar 2017 19:24:19 -0400 Subject: drm/amdgpu/si: add dpm quirk for Oland MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OLAND 0x1002:0x6604 0x1028:0x066F 0x00 seems to have problems with higher sclks. Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/si_dpm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index f55e45b52fbc..33b504bafb88 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -3464,6 +3464,12 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, (adev->pdev->device == 0x6667)) { max_sclk = 75000; } + } else if (adev->asic_type == CHIP_OLAND) { + if ((adev->pdev->device == 0x6604) && + (adev->pdev->subsystem_vendor == 0x1028) && + (adev->pdev->subsystem_device == 0x066F)) { + max_sclk = 75000; + } } if (rps->vce_active) { -- cgit v1.2.3 From 801a6aa9a63c90724e8899982ad8c7f16be1e2cd Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Wed, 15 Mar 2017 05:34:25 -0400 Subject: drm/amd/amdgpu: Fix debugfs reg read/write address width MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MMIO space is wider now so we mask the lower 22 bits instead of 18. Signed-off-by: Tom St Denis Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 4120b351a8e5..a3a105ec99e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2590,7 +2590,7 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf, use_bank = 0; } - *pos &= 0x3FFFF; + *pos &= (1UL << 22) - 1; if (use_bank) { if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) || @@ -2666,7 +2666,7 @@ static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf, use_bank = 0; } - *pos &= 0x3FFFF; + *pos &= (1UL << 22) - 1; if (use_bank) { if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) || -- cgit v1.2.3 From 186ecf14e58befba434f0774eea89e35f64d3c6a Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 15 Mar 2017 21:48:42 +0100 Subject: parisc: Avoid compiler warnings with access_ok() Commit 09b871ffd4d8 (parisc: Define access_ok() as macro) missed to mark uaddr as used, which then gives compiler warnings about unused variables. Fix it by comparing uaddr to uaddr which then gets optimized away by the compiler. Signed-off-by: Helge Deller Fixes: 09b871ffd4d8 ("parisc: Define access_ok() as macro") --- arch/parisc/include/asm/uaccess.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index fb4382c28259..edfbf9d6a6dd 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -32,7 +32,8 @@ * that put_user is the same as __put_user, etc. */ -#define access_ok(type, uaddr, size) (1) +#define access_ok(type, uaddr, size) \ + ( (uaddr) == (uaddr) ) #define put_user __put_user #define get_user __get_user -- cgit v1.2.3 From 3d20f1f7bd575d147ffa75621fa560eea0aec690 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 15 Mar 2017 18:10:47 +0200 Subject: net/openvswitch: Set the ipv6 source tunnel key address attribute correctly When dealing with ipv6 source tunnel key address attribute (OVS_TUNNEL_KEY_ATTR_IPV6_SRC) we are wrongly setting the tunnel dst ip, fix that. Fixes: 6b26ba3a7d95 ('openvswitch: netlink attributes for IPv6 tunneling') Signed-off-by: Or Gerlitz Reported-by: Paul Blakey Acked-by: Jiri Benc Acked-by: Joe Stringer Signed-off-by: David S. Miller --- net/openvswitch/flow_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 6f5fa50f716d..a08ff834676b 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -604,7 +604,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, ipv4 = true; break; case OVS_TUNNEL_KEY_ATTR_IPV6_SRC: - SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst, + SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.src, nla_get_in6_addr(a), is_mask); ipv6 = true; break; -- cgit v1.2.3 From 88d339e2d3be955848a034970970931a7ed33956 Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Wed, 15 Mar 2017 18:39:46 +0100 Subject: MAINTAINERS: remove MACVLAN and VLAN entries macvlan.c file seems to be both in VLAN and MACVLAN DRIVER, so remove the MACVLAN DRIVER since this is redundant. I propose with this patch to remove the VLAN (802.1Q) entry so this just falls into the NETWORKING [GENERAL]. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- MAINTAINERS | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index c776906f67a9..33875e07482b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7774,13 +7774,6 @@ F: include/net/mac80211.h F: net/mac80211/ F: drivers/net/wireless/mac80211_hwsim.[ch] -MACVLAN DRIVER -M: Patrick McHardy -L: netdev@vger.kernel.org -S: Maintained -F: drivers/net/macvlan.c -F: include/linux/if_macvlan.h - MAILBOX API M: Jassi Brar L: linux-kernel@vger.kernel.org @@ -13383,14 +13376,6 @@ W: https://linuxtv.org S: Maintained F: drivers/media/platform/vivid/* -VLAN (802.1Q) -M: Patrick McHardy -L: netdev@vger.kernel.org -S: Maintained -F: drivers/net/macvlan.c -F: include/linux/if_*vlan.h -F: net/8021q/ - VLYNQ BUS M: Florian Fainelli L: openwrt-devel@lists.openwrt.org (subscribers-only) -- cgit v1.2.3 From 5371bbf4b295eea334ed453efa286afa2c3ccff3 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 15 Mar 2017 12:57:21 -0700 Subject: net: bcmgenet: Do not suspend PHY if Wake-on-LAN is enabled Suspending the PHY would be putting it in a low power state where it may no longer allow us to do Wake-on-LAN. Fixes: cc013fb48898 ("net: bcmgenet: correctly suspend and resume PHY device") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 69015fa50f20..365895ed3c3e 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -3481,7 +3481,8 @@ static int bcmgenet_suspend(struct device *d) bcmgenet_netif_stop(dev); - phy_suspend(priv->phydev); + if (!device_may_wakeup(d)) + phy_suspend(priv->phydev); netif_device_detach(dev); @@ -3578,7 +3579,8 @@ static int bcmgenet_resume(struct device *d) netif_device_attach(dev); - phy_resume(priv->phydev); + if (!device_may_wakeup(d)) + phy_resume(priv->phydev); if (priv->eee.eee_enabled) bcmgenet_eee_enable_set(dev, true); -- cgit v1.2.3 From 622c36f143fc9566ba49d7cec994c2da1182d9e2 Mon Sep 17 00:00:00 2001 From: "Lendacky, Thomas" Date: Wed, 15 Mar 2017 15:11:23 -0500 Subject: amd-xgbe: Fix jumbo MTU processing on newer hardware Newer hardware does not provide a cumulative payload length when multiple descriptors are needed to handle the data. Once the MTU increases beyond the size that can be handled by a single descriptor, the SKB does not get built properly by the driver. The driver will now calculate the size of the data buffers used by the hardware. The first buffer of the first descriptor is for packet headers or packet headers and data when the headers can't be split. Subsequent descriptors in a multi-descriptor chain will not use the first buffer. The second buffer is used by all the descriptors in the chain for payload data. Based on whether the driver is processing the first, intermediate, or last descriptor it can calculate the buffer usage and build the SKB properly. Tested and verified on both old and new hardware. Signed-off-by: Tom Lendacky Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-common.h | 6 +- drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 20 +++--- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 102 +++++++++++++++++----------- 3 files changed, 78 insertions(+), 50 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index 8a280e7d66bd..86f1626816ff 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -1148,8 +1148,8 @@ #define RX_PACKET_ATTRIBUTES_CSUM_DONE_WIDTH 1 #define RX_PACKET_ATTRIBUTES_VLAN_CTAG_INDEX 1 #define RX_PACKET_ATTRIBUTES_VLAN_CTAG_WIDTH 1 -#define RX_PACKET_ATTRIBUTES_INCOMPLETE_INDEX 2 -#define RX_PACKET_ATTRIBUTES_INCOMPLETE_WIDTH 1 +#define RX_PACKET_ATTRIBUTES_LAST_INDEX 2 +#define RX_PACKET_ATTRIBUTES_LAST_WIDTH 1 #define RX_PACKET_ATTRIBUTES_CONTEXT_NEXT_INDEX 3 #define RX_PACKET_ATTRIBUTES_CONTEXT_NEXT_WIDTH 1 #define RX_PACKET_ATTRIBUTES_CONTEXT_INDEX 4 @@ -1158,6 +1158,8 @@ #define RX_PACKET_ATTRIBUTES_RX_TSTAMP_WIDTH 1 #define RX_PACKET_ATTRIBUTES_RSS_HASH_INDEX 6 #define RX_PACKET_ATTRIBUTES_RSS_HASH_WIDTH 1 +#define RX_PACKET_ATTRIBUTES_FIRST_INDEX 7 +#define RX_PACKET_ATTRIBUTES_FIRST_WIDTH 1 #define RX_NORMAL_DESC0_OVT_INDEX 0 #define RX_NORMAL_DESC0_OVT_WIDTH 16 diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index 937f37a5dcb2..24a687ce4388 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -1896,10 +1896,15 @@ static int xgbe_dev_read(struct xgbe_channel *channel) /* Get the header length */ if (XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, FD)) { + XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, + FIRST, 1); rdata->rx.hdr_len = XGMAC_GET_BITS_LE(rdesc->desc2, RX_NORMAL_DESC2, HL); if (rdata->rx.hdr_len) pdata->ext_stats.rx_split_header_packets++; + } else { + XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, + FIRST, 0); } /* Get the RSS hash */ @@ -1922,19 +1927,16 @@ static int xgbe_dev_read(struct xgbe_channel *channel) } } - /* Get the packet length */ - rdata->rx.len = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, PL); - - if (!XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, LD)) { - /* Not all the data has been transferred for this packet */ - XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, - INCOMPLETE, 1); + /* Not all the data has been transferred for this packet */ + if (!XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, LD)) return 0; - } /* This is the last of the data for this packet */ XGMAC_SET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, - INCOMPLETE, 0); + LAST, 1); + + /* Get the packet length */ + rdata->rx.len = XGMAC_GET_BITS_LE(rdesc->desc3, RX_NORMAL_DESC3, PL); /* Set checksum done indicator as appropriate */ if (netdev->features & NETIF_F_RXCSUM) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index ffea9859f5a7..a713abd9d03e 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1971,13 +1971,12 @@ static struct sk_buff *xgbe_create_skb(struct xgbe_prv_data *pdata, { struct sk_buff *skb; u8 *packet; - unsigned int copy_len; skb = napi_alloc_skb(napi, rdata->rx.hdr.dma_len); if (!skb) return NULL; - /* Start with the header buffer which may contain just the header + /* Pull in the header buffer which may contain just the header * or the header plus data */ dma_sync_single_range_for_cpu(pdata->dev, rdata->rx.hdr.dma_base, @@ -1986,30 +1985,49 @@ static struct sk_buff *xgbe_create_skb(struct xgbe_prv_data *pdata, packet = page_address(rdata->rx.hdr.pa.pages) + rdata->rx.hdr.pa.pages_offset; - copy_len = (rdata->rx.hdr_len) ? rdata->rx.hdr_len : len; - copy_len = min(rdata->rx.hdr.dma_len, copy_len); - skb_copy_to_linear_data(skb, packet, copy_len); - skb_put(skb, copy_len); - - len -= copy_len; - if (len) { - /* Add the remaining data as a frag */ - dma_sync_single_range_for_cpu(pdata->dev, - rdata->rx.buf.dma_base, - rdata->rx.buf.dma_off, - rdata->rx.buf.dma_len, - DMA_FROM_DEVICE); - - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, - rdata->rx.buf.pa.pages, - rdata->rx.buf.pa.pages_offset, - len, rdata->rx.buf.dma_len); - rdata->rx.buf.pa.pages = NULL; - } + skb_copy_to_linear_data(skb, packet, len); + skb_put(skb, len); return skb; } +static unsigned int xgbe_rx_buf1_len(struct xgbe_ring_data *rdata, + struct xgbe_packet_data *packet) +{ + /* Always zero if not the first descriptor */ + if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, FIRST)) + return 0; + + /* First descriptor with split header, return header length */ + if (rdata->rx.hdr_len) + return rdata->rx.hdr_len; + + /* First descriptor but not the last descriptor and no split header, + * so the full buffer was used + */ + if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, LAST)) + return rdata->rx.hdr.dma_len; + + /* First descriptor and last descriptor and no split header, so + * calculate how much of the buffer was used + */ + return min_t(unsigned int, rdata->rx.hdr.dma_len, rdata->rx.len); +} + +static unsigned int xgbe_rx_buf2_len(struct xgbe_ring_data *rdata, + struct xgbe_packet_data *packet, + unsigned int len) +{ + /* Always the full buffer if not the last descriptor */ + if (!XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, LAST)) + return rdata->rx.buf.dma_len; + + /* Last descriptor so calculate how much of the buffer was used + * for the last bit of data + */ + return rdata->rx.len - len; +} + static int xgbe_tx_poll(struct xgbe_channel *channel) { struct xgbe_prv_data *pdata = channel->pdata; @@ -2092,8 +2110,8 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget) struct napi_struct *napi; struct sk_buff *skb; struct skb_shared_hwtstamps *hwtstamps; - unsigned int incomplete, error, context_next, context; - unsigned int len, rdesc_len, max_len; + unsigned int last, error, context_next, context; + unsigned int len, buf1_len, buf2_len, max_len; unsigned int received = 0; int packet_count = 0; @@ -2103,7 +2121,7 @@ static int xgbe_rx_poll(struct xgbe_channel *channel, int budget) if (!ring) return 0; - incomplete = 0; + last = 0; context_next = 0; napi = (pdata->per_channel_irq) ? &channel->napi : &pdata->napi; @@ -2137,9 +2155,8 @@ read_again: received++; ring->cur++; - incomplete = XGMAC_GET_BITS(packet->attributes, - RX_PACKET_ATTRIBUTES, - INCOMPLETE); + last = XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, + LAST); context_next = XGMAC_GET_BITS(packet->attributes, RX_PACKET_ATTRIBUTES, CONTEXT_NEXT); @@ -2148,7 +2165,7 @@ read_again: CONTEXT); /* Earlier error, just drain the remaining data */ - if ((incomplete || context_next) && error) + if ((!last || context_next) && error) goto read_again; if (error || packet->errors) { @@ -2160,16 +2177,22 @@ read_again: } if (!context) { - /* Length is cumulative, get this descriptor's length */ - rdesc_len = rdata->rx.len - len; - len += rdesc_len; + /* Get the data length in the descriptor buffers */ + buf1_len = xgbe_rx_buf1_len(rdata, packet); + len += buf1_len; + buf2_len = xgbe_rx_buf2_len(rdata, packet, len); + len += buf2_len; - if (rdesc_len && !skb) { + if (!skb) { skb = xgbe_create_skb(pdata, napi, rdata, - rdesc_len); - if (!skb) + buf1_len); + if (!skb) { error = 1; - } else if (rdesc_len) { + goto skip_data; + } + } + + if (buf2_len) { dma_sync_single_range_for_cpu(pdata->dev, rdata->rx.buf.dma_base, rdata->rx.buf.dma_off, @@ -2179,13 +2202,14 @@ read_again: skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rdata->rx.buf.pa.pages, rdata->rx.buf.pa.pages_offset, - rdesc_len, + buf2_len, rdata->rx.buf.dma_len); rdata->rx.buf.pa.pages = NULL; } } - if (incomplete || context_next) +skip_data: + if (!last || context_next) goto read_again; if (!skb) @@ -2243,7 +2267,7 @@ next_packet: } /* Check if we need to save state before leaving */ - if (received && (incomplete || context_next)) { + if (received && (!last || context_next)) { rdata = XGBE_GET_DESC_DATA(ring, ring->cur); rdata->state_saved = 1; rdata->state.skb = skb; -- cgit v1.2.3 From 22a0e18eac7a9e986fec76c60fa4a2926d1291e2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Mar 2017 13:21:28 -0700 Subject: net: properly release sk_frag.page I mistakenly added the code to release sk->sk_frag in sk_common_release() instead of sk_destruct() TCP sockets using sk->sk_allocation == GFP_ATOMIC do no call sk_common_release() at close time, thus leaking one (order-3) page. iSCSI is using such sockets. Fixes: 5640f7685831 ("net: use a per task frag allocator") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index a96d5f7a5734..acb0d4137499 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1442,6 +1442,11 @@ static void __sk_destruct(struct rcu_head *head) pr_debug("%s: optmem leakage (%d bytes) detected\n", __func__, atomic_read(&sk->sk_omem_alloc)); + if (sk->sk_frag.page) { + put_page(sk->sk_frag.page); + sk->sk_frag.page = NULL; + } + if (sk->sk_peer_cred) put_cred(sk->sk_peer_cred); put_pid(sk->sk_peer_pid); @@ -2787,11 +2792,6 @@ void sk_common_release(struct sock *sk) sk_refcnt_debug_release(sk); - if (sk->sk_frag.page) { - put_page(sk->sk_frag.page); - sk->sk_frag.page = NULL; - } - sock_put(sk); } EXPORT_SYMBOL(sk_common_release); -- cgit v1.2.3 From 5f7c2beef819d9ea2d1b814edf6f5981420e9cf8 Mon Sep 17 00:00:00 2001 From: Bill Kuzeja Date: Tue, 14 Mar 2017 13:28:44 -0400 Subject: scsi: qla2xxx: Fix crash in qla2xxx_eh_abort on bad ptr After a Qlogic card breaks when initializing (test case), the system can crash in qla2xxx_eh_abort if processing anything but a scsi command type srb. Fixes: 1535aa75a3d8 ("scsi: qla2xxx: fix invalid DMA access after command aborts in PCI device remove") Signed-off-by: Bill Kuzeja Acked-By: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 71b6b20ae82b..579363a6f44f 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1617,7 +1617,8 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res) /* Don't abort commands in adapter during EEH * recovery as it's not accessible/responding. */ - if (GET_CMD_SP(sp) && !ha->flags.eeh_busy) { + if (GET_CMD_SP(sp) && !ha->flags.eeh_busy && + (sp->type == SRB_SCSI_CMD)) { /* Get a reference to the sp and drop the lock. * The reference ensures this sp->done() call * - and not the call in qla2xxx_eh_abort() - -- cgit v1.2.3 From e498520edec6655e93ac5e768b04f4fd2299fe4d Mon Sep 17 00:00:00 2001 From: Raghava Aditya Renukunta Date: Tue, 14 Mar 2017 09:20:19 -0700 Subject: scsi: aacraid: Fix potential null access Currently, command threads fails to return ioctls commands for older controller versions, since it returns when all the fibs have been allocated. Another issue is even all the fibs have not been allocated, the correct allocated fibs is not updated nor freed. Fixes: 113156bcea9ef1e6 (scsi: aacraid: Reworked aac_command_thread) Reported-by: Tomas Henzl Signed-off-by: Raghava Aditya Renukunta Reviewed-by: Dave Carroll Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/commsup.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index a3ad04293487..c8172f16cf33 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -2056,7 +2056,6 @@ static int fillup_pools(struct aac_dev *dev, struct hw_fib **hw_fib_pool, { struct hw_fib **hw_fib_p; struct fib **fib_p; - int rcode = 1; hw_fib_p = hw_fib_pool; fib_p = fib_pool; @@ -2074,11 +2073,11 @@ static int fillup_pools(struct aac_dev *dev, struct hw_fib **hw_fib_pool, } } + /* + * Get the actual number of allocated fibs + */ num = hw_fib_p - hw_fib_pool; - if (!num) - rcode = 0; - - return rcode; + return num; } static void wakeup_fibctx_threads(struct aac_dev *dev, @@ -2186,7 +2185,6 @@ static void aac_process_events(struct aac_dev *dev) struct fib *fib; unsigned long flags; spinlock_t *t_lock; - unsigned int rcode; t_lock = dev->queues->queue[HostNormCmdQueue].lock; spin_lock_irqsave(t_lock, flags); @@ -2269,8 +2267,8 @@ static void aac_process_events(struct aac_dev *dev) * Fill up fib pointer pools with actual fibs * and hw_fibs */ - rcode = fillup_pools(dev, hw_fib_pool, fib_pool, num); - if (!rcode) + num = fillup_pools(dev, hw_fib_pool, fib_pool, num); + if (!num) goto free_mem; /* -- cgit v1.2.3 From 046885251ae24b0fa1adbc5e3ca4a4bc9930c1f3 Mon Sep 17 00:00:00 2001 From: Manish Rangankar Date: Wed, 15 Mar 2017 01:32:53 -0700 Subject: scsi: qedi: Add PCI device-ID for QL41xxx adapters. Signed-off-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 8e3d92807cb8..92775a8b74b1 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -2007,6 +2007,7 @@ static void qedi_remove(struct pci_dev *pdev) static struct pci_device_id qedi_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, 0x165E) }, + { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, 0x8084) }, { 0 }, }; MODULE_DEVICE_TABLE(pci, qedi_pci_tbl); -- cgit v1.2.3 From 645b8ef5943f95b74240568105ce2be21c6640b4 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Tue, 14 Mar 2017 14:19:36 +0200 Subject: scsi: ufshcd-platform: remove the useless cast in ERR_PTR/IS_ERR IS_ERR and ERR_PTR already forcefully cast their argument, hence there is no need for additional (complex) casting. Signed-off-by: Tomas Winkler Reviewed-by: Subhash Jadavani Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd-pltfrm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c b/drivers/scsi/ufs/ufshcd-pltfrm.c index a72a4ba78125..8e5e6c04c035 100644 --- a/drivers/scsi/ufs/ufshcd-pltfrm.c +++ b/drivers/scsi/ufs/ufshcd-pltfrm.c @@ -309,8 +309,8 @@ int ufshcd_pltfrm_init(struct platform_device *pdev, mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); mmio_base = devm_ioremap_resource(dev, mem_res); - if (IS_ERR(*(void **)&mmio_base)) { - err = PTR_ERR(*(void **)&mmio_base); + if (IS_ERR(mmio_base)) { + err = PTR_ERR(mmio_base); goto out; } -- cgit v1.2.3 From 9b4f603e7a9f4282aec451063ffbbb8bb410dcd9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 15 Mar 2017 00:12:16 +0100 Subject: cpufreq: Fix and clean up show_cpuinfo_cur_freq() There is a missing newline in show_cpuinfo_cur_freq(), so add it, but while at it clean that function up somewhat too. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Cc: All applicable --- drivers/cpufreq/cpufreq.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 38b9fdf854a4..b8ff617d449d 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -680,9 +680,11 @@ static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy, char *buf) { unsigned int cur_freq = __cpufreq_get(policy); - if (!cur_freq) - return sprintf(buf, ""); - return sprintf(buf, "%u\n", cur_freq); + + if (cur_freq) + return sprintf(buf, "%u\n", cur_freq); + + return sprintf(buf, "\n"); } /** -- cgit v1.2.3 From a733ded50b6ea846200073e7381a302df71e13b3 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Sun, 5 Mar 2017 21:40:41 +0200 Subject: mei: fix deadlock on mei reset This patch fixes 'mei: synchronize irq before initiating a reset' The patch had introduced a deadlock between irq thread and mei_reset() as they are both holding the same device lock. ---> device_lock: mei_reset() <---- interrupt thread device_lock ---> synchornize_irq() wait on interrupt thread == (dead lock) The fix is to call synchronize_irq prior to call locked mei_reset function. Cc: #4.10+ Fixes: f302bb0de6ac (mei: synchronize irq before initiating a reset) Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/init.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c index cfb1cdf176fa..13c55b8f9261 100644 --- a/drivers/misc/mei/init.c +++ b/drivers/misc/mei/init.c @@ -124,8 +124,6 @@ int mei_reset(struct mei_device *dev) mei_clear_interrupts(dev); - mei_synchronize_irq(dev); - /* we're already in reset, cancel the init timer * if the reset was called due the hbm protocol error * we need to call it before hw start @@ -304,6 +302,9 @@ static void mei_reset_work(struct work_struct *work) container_of(work, struct mei_device, reset_work); int ret; + mei_clear_interrupts(dev); + mei_synchronize_irq(dev); + mutex_lock(&dev->device_lock); ret = mei_reset(dev); @@ -328,6 +329,9 @@ void mei_stop(struct mei_device *dev) mei_cancel_work(dev); + mei_clear_interrupts(dev); + mei_synchronize_irq(dev); + mutex_lock(&dev->device_lock); dev->dev_state = MEI_DEV_POWER_DOWN; -- cgit v1.2.3 From c6240cacdb2c3cb56a21fb3ea0c105154ab87a2a Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Sun, 5 Mar 2017 21:40:42 +0200 Subject: mei: don't wait for os version message reply The driver still struggles with firmwares that do not replay to the OS version request. It is safe not waiting for the replay. First, the driver doesn't do anything with the replay second the connection is closed immediately, hence the packet will be just safely discarded in case it is received and last the driver won't get stuck if the firmware won't reply. Cc: #4.10+ Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus-fixup.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c index 3600c9993a98..29f2daed37e0 100644 --- a/drivers/misc/mei/bus-fixup.c +++ b/drivers/misc/mei/bus-fixup.c @@ -112,11 +112,9 @@ struct mkhi_msg { static int mei_osver(struct mei_cl_device *cldev) { - int ret; const size_t size = sizeof(struct mkhi_msg_hdr) + sizeof(struct mkhi_fwcaps) + sizeof(struct mei_os_ver); - size_t length = 8; char buf[size]; struct mkhi_msg *req; struct mkhi_fwcaps *fwcaps; @@ -137,15 +135,7 @@ static int mei_osver(struct mei_cl_device *cldev) os_ver = (struct mei_os_ver *)fwcaps->data; os_ver->os_type = OSTYPE_LINUX; - ret = __mei_cl_send(cldev->cl, buf, size, mode); - if (ret < 0) - return ret; - - ret = __mei_cl_recv(cldev->cl, buf, length, 0); - if (ret < 0) - return ret; - - return 0; + return __mei_cl_send(cldev->cl, buf, size, mode); } static void mei_mkhi_fix(struct mei_cl_device *cldev) @@ -160,7 +150,7 @@ static void mei_mkhi_fix(struct mei_cl_device *cldev) return; ret = mei_osver(cldev); - if (ret) + if (ret < 0) dev_err(&cldev->dev, "OS version command failed %d\n", ret); mei_cldev_disable(cldev); -- cgit v1.2.3 From 8200f2085abe7f29a016381f3122000cc7b2a760 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 4 Mar 2017 18:13:57 -0700 Subject: vmbus: use rcu for per-cpu channel list The per-cpu channel list is now referred to in the interrupt routine. This is mostly safe since the host will not normally generate an interrupt when channel is being deleted but if it did then there would be a use after free problem. To solve, this use RCU protection on ther per-cpu list. Fixes: 631e63a9f346 ("vmbus: change to per channel tasklet") Signed-off-by: Stephen Hemminger Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel_mgmt.c | 7 ++++--- drivers/hv/vmbus_drv.c | 6 +++++- include/linux/hyperv.h | 7 +++++++ 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index f33465d78a02..d2cfa3eb71a2 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -350,7 +350,8 @@ static struct vmbus_channel *alloc_channel(void) static void free_channel(struct vmbus_channel *channel) { tasklet_kill(&channel->callback_event); - kfree(channel); + + kfree_rcu(channel, rcu); } static void percpu_channel_enq(void *arg) @@ -359,14 +360,14 @@ static void percpu_channel_enq(void *arg) struct hv_per_cpu_context *hv_cpu = this_cpu_ptr(hv_context.cpu_context); - list_add_tail(&channel->percpu_list, &hv_cpu->chan_list); + list_add_tail_rcu(&channel->percpu_list, &hv_cpu->chan_list); } static void percpu_channel_deq(void *arg) { struct vmbus_channel *channel = arg; - list_del(&channel->percpu_list); + list_del_rcu(&channel->percpu_list); } diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index da6b59ba5940..8370b9dc6037 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -939,8 +939,10 @@ static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu) if (relid == 0) continue; + rcu_read_lock(); + /* Find channel based on relid */ - list_for_each_entry(channel, &hv_cpu->chan_list, percpu_list) { + list_for_each_entry_rcu(channel, &hv_cpu->chan_list, percpu_list) { if (channel->offermsg.child_relid != relid) continue; @@ -956,6 +958,8 @@ static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu) tasklet_schedule(&channel->callback_event); } } + + rcu_read_unlock(); } } diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 62bbf3c1aa4a..c4c7ae91f9d1 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -845,6 +845,13 @@ struct vmbus_channel { * link up channels based on their CPU affinity. */ struct list_head percpu_list; + + /* + * Defer freeing channel until after all cpu's have + * gone through grace period. + */ + struct rcu_head rcu; + /* * For performance critical channels (storage, networking * etc,), Hyper-V has a mechanism to enhance the throughput -- cgit v1.2.3 From dad72a1d28442b03aac86836a42de2d00a1014ab Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Sat, 4 Mar 2017 18:13:58 -0700 Subject: vmbus: remove hv_event_tasklet_disable/enable With the recent introduction of per-channel tasklet, we need to update the way we handle the 3 concurrency issues: 1. hv_process_channel_removal -> percpu_channel_deq vs. vmbus_chan_sched -> list_for_each_entry(..., percpu_list); 2. vmbus_process_offer -> percpu_channel_enq/deq vs. vmbus_chan_sched. 3. vmbus_close_internal vs. the per-channel tasklet vmbus_on_event; The first 2 issues can be handled by Stephen's recent patch "vmbus: use rcu for per-cpu channel list", and the third issue can be handled by calling tasklet_disable in vmbus_close_internal here. We don't need the original hv_event_tasklet_disable/enable since we now use per-channel tasklet instead of the previous per-CPU tasklet, and actually we must remove them due to the side effect now: vmbus_process_offer -> hv_event_tasklet_enable -> tasklet_schedule will start the per-channel callback prematurely, cauing NULL dereferencing (the channel may haven't been properly configured to run the callback yet). Fixes: 631e63a9f346 ("vmbus: change to per channel tasklet") Signed-off-by: Dexuan Cui Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Stephen Hemminger Tested-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel.c | 12 ++++-------- drivers/hv/channel_mgmt.c | 19 ------------------- include/linux/hyperv.h | 3 --- 3 files changed, 4 insertions(+), 30 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index bd0d1988feb2..57b2958205c7 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -530,15 +530,13 @@ static int vmbus_close_internal(struct vmbus_channel *channel) int ret; /* - * vmbus_on_event(), running in the tasklet, can race + * vmbus_on_event(), running in the per-channel tasklet, can race * with vmbus_close_internal() in the case of SMP guest, e.g., when * the former is accessing channel->inbound.ring_buffer, the latter - * could be freeing the ring_buffer pages. - * - * To resolve the race, we can serialize them by disabling the - * tasklet when the latter is running here. + * could be freeing the ring_buffer pages, so here we must stop it + * first. */ - hv_event_tasklet_disable(channel); + tasklet_disable(&channel->callback_event); /* * In case a device driver's probe() fails (e.g., @@ -605,8 +603,6 @@ static int vmbus_close_internal(struct vmbus_channel *channel) get_order(channel->ringbuffer_pagecount * PAGE_SIZE)); out: - hv_event_tasklet_enable(channel); - return ret; } diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index d2cfa3eb71a2..bf846d078d85 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -382,19 +382,6 @@ static void vmbus_release_relid(u32 relid) true); } -void hv_event_tasklet_disable(struct vmbus_channel *channel) -{ - tasklet_disable(&channel->callback_event); -} - -void hv_event_tasklet_enable(struct vmbus_channel *channel) -{ - tasklet_enable(&channel->callback_event); - - /* In case there is any pending event */ - tasklet_schedule(&channel->callback_event); -} - void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) { unsigned long flags; @@ -403,7 +390,6 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) BUG_ON(!channel->rescind); BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); - hv_event_tasklet_disable(channel); if (channel->target_cpu != get_cpu()) { put_cpu(); smp_call_function_single(channel->target_cpu, @@ -412,7 +398,6 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) percpu_channel_deq(channel); put_cpu(); } - hv_event_tasklet_enable(channel); if (channel->primary_channel == NULL) { list_del(&channel->listentry); @@ -506,7 +491,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) init_vp_index(newchannel, dev_type); - hv_event_tasklet_disable(newchannel); if (newchannel->target_cpu != get_cpu()) { put_cpu(); smp_call_function_single(newchannel->target_cpu, @@ -516,7 +500,6 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) percpu_channel_enq(newchannel); put_cpu(); } - hv_event_tasklet_enable(newchannel); /* * This state is used to indicate a successful open @@ -566,7 +549,6 @@ err_deq_chan: list_del(&newchannel->listentry); mutex_unlock(&vmbus_connection.channel_mutex); - hv_event_tasklet_disable(newchannel); if (newchannel->target_cpu != get_cpu()) { put_cpu(); smp_call_function_single(newchannel->target_cpu, @@ -575,7 +557,6 @@ err_deq_chan: percpu_channel_deq(newchannel); put_cpu(); } - hv_event_tasklet_enable(newchannel); vmbus_release_relid(newchannel->offermsg.child_relid); diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index c4c7ae91f9d1..970771a5f739 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1437,9 +1437,6 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, const int *srv_version, int srv_vercnt, int *nego_fw_version, int *nego_srv_version); -void hv_event_tasklet_disable(struct vmbus_channel *channel); -void hv_event_tasklet_enable(struct vmbus_channel *channel); - void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid); void vmbus_setevent(struct vmbus_channel *channel); -- cgit v1.2.3 From e9c18ae6eb2b312f16c63e34b43ea23926daa398 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Sat, 4 Mar 2017 18:13:59 -0700 Subject: Drivers: hv: util: move waiting for release to hv_utils_transport itself Waiting for release_event in all three drivers introduced issues on release as on_reset() hook is not always called. E.g. if the device was never opened we will never get the completion. Move the waiting code to hvutil_transport_destroy() and make sure it is only called when the device is open. hvt->lock serialization should guarantee the absence of races. Fixes: 5a66fecbf6aa ("Drivers: hv: util: kvp: Fix a rescind processing issue") Fixes: 20951c7535b5 ("Drivers: hv: util: Fcopy: Fix a rescind processing issue") Fixes: d77044d142e9 ("Drivers: hv: util: Backup: Fix a rescind processing issue") Reported-by: Dexuan Cui Tested-by: Dexuan Cui Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_fcopy.c | 4 ---- drivers/hv/hv_kvp.c | 4 ---- drivers/hv/hv_snapshot.c | 4 ---- drivers/hv/hv_utils_transport.c | 12 ++++++++---- drivers/hv/hv_utils_transport.h | 1 + 5 files changed, 9 insertions(+), 16 deletions(-) diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c index 9aee6014339d..a5596a642ed0 100644 --- a/drivers/hv/hv_fcopy.c +++ b/drivers/hv/hv_fcopy.c @@ -71,7 +71,6 @@ static DECLARE_WORK(fcopy_send_work, fcopy_send_data); static const char fcopy_devname[] = "vmbus/hv_fcopy"; static u8 *recv_buffer; static struct hvutil_transport *hvt; -static struct completion release_event; /* * This state maintains the version number registered by the daemon. */ @@ -331,7 +330,6 @@ static void fcopy_on_reset(void) if (cancel_delayed_work_sync(&fcopy_timeout_work)) fcopy_respond_to_host(HV_E_FAIL); - complete(&release_event); } int hv_fcopy_init(struct hv_util_service *srv) @@ -339,7 +337,6 @@ int hv_fcopy_init(struct hv_util_service *srv) recv_buffer = srv->recv_buffer; fcopy_transaction.recv_channel = srv->channel; - init_completion(&release_event); /* * When this driver loads, the user level daemon that * processes the host requests may not yet be running. @@ -361,5 +358,4 @@ void hv_fcopy_deinit(void) fcopy_transaction.state = HVUTIL_DEVICE_DYING; cancel_delayed_work_sync(&fcopy_timeout_work); hvutil_transport_destroy(hvt); - wait_for_completion(&release_event); } diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index de263712e247..a1adfe2cfb34 100644 --- a/drivers/hv/hv_kvp.c +++ b/drivers/hv/hv_kvp.c @@ -101,7 +101,6 @@ static DECLARE_WORK(kvp_sendkey_work, kvp_send_key); static const char kvp_devname[] = "vmbus/hv_kvp"; static u8 *recv_buffer; static struct hvutil_transport *hvt; -static struct completion release_event; /* * Register the kernel component with the user-level daemon. * As part of this registration, pass the LIC version number. @@ -714,7 +713,6 @@ static void kvp_on_reset(void) if (cancel_delayed_work_sync(&kvp_timeout_work)) kvp_respond_to_host(NULL, HV_E_FAIL); kvp_transaction.state = HVUTIL_DEVICE_INIT; - complete(&release_event); } int @@ -723,7 +721,6 @@ hv_kvp_init(struct hv_util_service *srv) recv_buffer = srv->recv_buffer; kvp_transaction.recv_channel = srv->channel; - init_completion(&release_event); /* * When this driver loads, the user level daemon that * processes the host requests may not yet be running. @@ -747,5 +744,4 @@ void hv_kvp_deinit(void) cancel_delayed_work_sync(&kvp_timeout_work); cancel_work_sync(&kvp_sendkey_work); hvutil_transport_destroy(hvt); - wait_for_completion(&release_event); } diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c index bcc03f0748d6..e659d1b94a57 100644 --- a/drivers/hv/hv_snapshot.c +++ b/drivers/hv/hv_snapshot.c @@ -79,7 +79,6 @@ static int dm_reg_value; static const char vss_devname[] = "vmbus/hv_vss"; static __u8 *recv_buffer; static struct hvutil_transport *hvt; -static struct completion release_event; static void vss_timeout_func(struct work_struct *dummy); static void vss_handle_request(struct work_struct *dummy); @@ -361,13 +360,11 @@ static void vss_on_reset(void) if (cancel_delayed_work_sync(&vss_timeout_work)) vss_respond_to_host(HV_E_FAIL); vss_transaction.state = HVUTIL_DEVICE_INIT; - complete(&release_event); } int hv_vss_init(struct hv_util_service *srv) { - init_completion(&release_event); if (vmbus_proto_version < VERSION_WIN8_1) { pr_warn("Integration service 'Backup (volume snapshot)'" " not supported on this host version.\n"); @@ -400,5 +397,4 @@ void hv_vss_deinit(void) cancel_delayed_work_sync(&vss_timeout_work); cancel_work_sync(&vss_handle_request_work); hvutil_transport_destroy(hvt); - wait_for_completion(&release_event); } diff --git a/drivers/hv/hv_utils_transport.c b/drivers/hv/hv_utils_transport.c index c235a9515267..4402a71e23f7 100644 --- a/drivers/hv/hv_utils_transport.c +++ b/drivers/hv/hv_utils_transport.c @@ -182,10 +182,11 @@ static int hvt_op_release(struct inode *inode, struct file *file) * connects back. */ hvt_reset(hvt); - mutex_unlock(&hvt->lock); if (mode_old == HVUTIL_TRANSPORT_DESTROY) - hvt_transport_free(hvt); + complete(&hvt->release); + + mutex_unlock(&hvt->lock); return 0; } @@ -304,6 +305,7 @@ struct hvutil_transport *hvutil_transport_init(const char *name, init_waitqueue_head(&hvt->outmsg_q); mutex_init(&hvt->lock); + init_completion(&hvt->release); spin_lock(&hvt_list_lock); list_add(&hvt->list, &hvt_list); @@ -351,6 +353,8 @@ void hvutil_transport_destroy(struct hvutil_transport *hvt) if (hvt->cn_id.idx > 0 && hvt->cn_id.val > 0) cn_del_callback(&hvt->cn_id); - if (mode_old != HVUTIL_TRANSPORT_CHARDEV) - hvt_transport_free(hvt); + if (mode_old == HVUTIL_TRANSPORT_CHARDEV) + wait_for_completion(&hvt->release); + + hvt_transport_free(hvt); } diff --git a/drivers/hv/hv_utils_transport.h b/drivers/hv/hv_utils_transport.h index d98f5225c3e6..79afb626e166 100644 --- a/drivers/hv/hv_utils_transport.h +++ b/drivers/hv/hv_utils_transport.h @@ -41,6 +41,7 @@ struct hvutil_transport { int outmsg_len; /* its length */ wait_queue_head_t outmsg_q; /* poll/read wait queue */ struct mutex lock; /* protects struct members */ + struct completion release; /* synchronize with fd release */ }; struct hvutil_transport *hvutil_transport_init(const char *name, -- cgit v1.2.3 From 5a16dfc855127906fcd2935fb039bc8989313915 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Sat, 4 Mar 2017 18:14:00 -0700 Subject: Drivers: hv: util: don't forget to init host_ts.lock Without the patch, I always get a "BUG: spinlock bad magic" warning. Fixes: 3716a49a81ba ("hv_utils: implement Hyper-V PTP source") Signed-off-by: Dexuan Cui Cc: Vitaly Kuznetsov Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Stephen Hemminger Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_util.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c index 3042eaa13062..186b10083c55 100644 --- a/drivers/hv/hv_util.c +++ b/drivers/hv/hv_util.c @@ -590,6 +590,8 @@ static int hv_timesync_init(struct hv_util_service *srv) if (!hyperv_cs) return -ENODEV; + spin_lock_init(&host_ts.lock); + INIT_WORK(&wrk.work, hv_set_host_time); /* -- cgit v1.2.3 From 9a5476020a5f06a0fc6f17097efc80275d2f03cd Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Mon, 13 Mar 2017 15:57:09 -0700 Subject: Drivers: hv: vmbus: Don't leak channel ids If we cannot allocate memory for the channel, free the relid associated with the channel. Signed-off-by: K. Y. Srinivasan Cc: Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel_mgmt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index bf846d078d85..fbcb06352308 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -796,6 +796,7 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr) /* Allocate the channel object and save this offer. */ newchannel = alloc_channel(); if (!newchannel) { + vmbus_release_relid(offer->child_relid); pr_err("Unable to allocate channel object\n"); return; } -- cgit v1.2.3 From 5e030d5ce9d99a899b648413139ff65bab12b038 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Sun, 12 Mar 2017 20:00:30 -0700 Subject: Drivers: hv: vmbus: Don't leak memory when a channel is rescinded When we close a channel that has been rescinded, we will leak memory since vmbus_teardown_gpadl() returns an error. Fix this so that we can properly cleanup the memory allocated to the ring buffers. Fixes: ccb61f8a99e6 ("Drivers: hv: vmbus: Fix a rescind handling bug") Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 57b2958205c7..321b8833fa6f 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -502,12 +502,15 @@ int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle) wait_for_completion(&info->waitevent); - if (channel->rescind) { - ret = -ENODEV; - goto post_msg_err; - } - post_msg_err: + /* + * If the channel has been rescinded; + * we will be awakened by the rescind + * handler; set the error code to zero so we don't leak memory. + */ + if (channel->rescind) + ret = 0; + spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); list_del(&info->msglistentry); spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); -- cgit v1.2.3 From 9a3fcf912ef7f5c6e18f9af6875dd13f7311f7aa Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Tue, 14 Mar 2017 09:50:35 +0200 Subject: iwlwifi: mvm: cleanup pending frames in DQA mode When a station is asleep, the fw will set it as "asleep". All queues that are used only by one station will be stopped by the fw. In pre-DQA mode this was relevant for aggregation queues. However, in DQA mode a queue is owned by one station only, so all queues will be stopped. As a result, we don't expect to get filtered frames back to mac80211 and don't have to maintain the entire pending_frames state logic, the same way as we do in aggregations. The correct behavior is to align DQA behavior with the aggregation queue behaviour pre-DQA: - Don't count pending frames. - Let mac80211 know we have frames in these queues so that it can properly handle trigger frames. When a trigger frame is received, mac80211 tells the driver to send frames from the queues using release_buffered_frames. The driver will tell the fw to let frames out even if the station is asleep. This is done by iwl_mvm_sta_modify_sleep_tx_count. Reported-and-tested-by: Jens Axboe Reported-by: Linus Torvalds Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 5 +-- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 11 +++--- drivers/net/wireless/intel/iwlwifi/mvm/sta.h | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 41 ++++++++++------------- 4 files changed, 28 insertions(+), 31 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index d37b1695c64e..6927caecd48e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -2319,7 +2319,7 @@ iwl_mvm_mac_release_buffered_frames(struct ieee80211_hw *hw, { struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); - /* Called when we need to transmit (a) frame(s) from agg queue */ + /* Called when we need to transmit (a) frame(s) from agg or dqa queue */ iwl_mvm_sta_modify_sleep_tx_count(mvm, sta, reason, num_frames, tids, more_data, true); @@ -2338,7 +2338,8 @@ static void __iwl_mvm_mac_sta_notify(struct ieee80211_hw *hw, for (tid = 0; tid < IWL_MAX_TID_COUNT; tid++) { struct iwl_mvm_tid_data *tid_data = &mvmsta->tid_data[tid]; - if (tid_data->state != IWL_AGG_ON && + if (!iwl_mvm_is_dqa_supported(mvm) && + tid_data->state != IWL_AGG_ON && tid_data->state != IWL_EMPTYING_HW_QUEUE_DELBA) continue; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index bd1dcc863d8f..b51a2853cc80 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -3135,7 +3135,7 @@ void iwl_mvm_sta_modify_sleep_tx_count(struct iwl_mvm *mvm, struct ieee80211_sta *sta, enum ieee80211_frame_release_type reason, u16 cnt, u16 tids, bool more_data, - bool agg) + bool single_sta_queue) { struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); struct iwl_mvm_add_sta_cmd cmd = { @@ -3155,14 +3155,14 @@ void iwl_mvm_sta_modify_sleep_tx_count(struct iwl_mvm *mvm, for_each_set_bit(tid, &_tids, IWL_MAX_TID_COUNT) cmd.awake_acs |= BIT(tid_to_ucode_ac[tid]); - /* If we're releasing frames from aggregation queues then check if the - * all queues combined that we're releasing frames from have + /* If we're releasing frames from aggregation or dqa queues then check + * if all the queues that we're releasing frames from, combined, have: * - more frames than the service period, in which case more_data * needs to be set * - fewer than 'cnt' frames, in which case we need to adjust the * firmware command (but do that unconditionally) */ - if (agg) { + if (single_sta_queue) { int remaining = cnt; int sleep_tx_count; @@ -3172,7 +3172,8 @@ void iwl_mvm_sta_modify_sleep_tx_count(struct iwl_mvm *mvm, u16 n_queued; tid_data = &mvmsta->tid_data[tid]; - if (WARN(tid_data->state != IWL_AGG_ON && + if (WARN(!iwl_mvm_is_dqa_supported(mvm) && + tid_data->state != IWL_AGG_ON && tid_data->state != IWL_EMPTYING_HW_QUEUE_DELBA, "TID %d state is %d\n", tid, tid_data->state)) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h index 4be34f902278..1927ce607798 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h @@ -547,7 +547,7 @@ void iwl_mvm_sta_modify_sleep_tx_count(struct iwl_mvm *mvm, struct ieee80211_sta *sta, enum ieee80211_frame_release_type reason, u16 cnt, u16 tids, bool more_data, - bool agg); + bool single_sta_queue); int iwl_mvm_drain_sta(struct iwl_mvm *mvm, struct iwl_mvm_sta *mvmsta, bool drain); void iwl_mvm_sta_modify_disable_tx(struct iwl_mvm *mvm, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index dd2b4a300819..3f37075f4cde 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -7,7 +7,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH - * Copyright(c) 2016 Intel Deutschland GmbH + * Copyright(c) 2016 - 2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -34,6 +34,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 - 2017 Intel Deutschland GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -628,8 +629,10 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) * values. * Note that we don't need to make sure it isn't agg'd, since we're * TXing non-sta + * For DQA mode - we shouldn't increase it though */ - atomic_inc(&mvm->pending_frames[sta_id]); + if (!iwl_mvm_is_dqa_supported(mvm)) + atomic_inc(&mvm->pending_frames[sta_id]); return 0; } @@ -1005,11 +1008,8 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb, spin_unlock(&mvmsta->lock); - /* Increase pending frames count if this isn't AMPDU */ - if ((iwl_mvm_is_dqa_supported(mvm) && - mvmsta->tid_data[tx_cmd->tid_tspec].state != IWL_AGG_ON && - mvmsta->tid_data[tx_cmd->tid_tspec].state != IWL_AGG_STARTING) || - (!iwl_mvm_is_dqa_supported(mvm) && !is_ampdu)) + /* Increase pending frames count if this isn't AMPDU or DQA queue */ + if (!iwl_mvm_is_dqa_supported(mvm) && !is_ampdu) atomic_inc(&mvm->pending_frames[mvmsta->sta_id]); return 0; @@ -1079,12 +1079,13 @@ static void iwl_mvm_check_ratid_empty(struct iwl_mvm *mvm, lockdep_assert_held(&mvmsta->lock); if ((tid_data->state == IWL_AGG_ON || - tid_data->state == IWL_EMPTYING_HW_QUEUE_DELBA) && + tid_data->state == IWL_EMPTYING_HW_QUEUE_DELBA || + iwl_mvm_is_dqa_supported(mvm)) && iwl_mvm_tid_queued(tid_data) == 0) { /* - * Now that this aggregation queue is empty tell mac80211 so it - * knows we no longer have frames buffered for the station on - * this TID (for the TIM bitmap calculation.) + * Now that this aggregation or DQA queue is empty tell + * mac80211 so it knows we no longer have frames buffered for + * the station on this TID (for the TIM bitmap calculation.) */ ieee80211_sta_set_buffered(sta, tid, false); } @@ -1257,7 +1258,6 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm, u8 skb_freed = 0; u16 next_reclaimed, seq_ctl; bool is_ndp = false; - bool txq_agg = false; /* Is this TXQ aggregated */ __skb_queue_head_init(&skbs); @@ -1283,6 +1283,10 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm, info->flags |= IEEE80211_TX_STAT_ACK; break; case TX_STATUS_FAIL_DEST_PS: + /* In DQA, the FW should have stopped the queue and not + * return this status + */ + WARN_ON(iwl_mvm_is_dqa_supported(mvm)); info->flags |= IEEE80211_TX_STAT_TX_FILTERED; break; default: @@ -1387,15 +1391,6 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm, bool send_eosp_ndp = false; spin_lock_bh(&mvmsta->lock); - if (iwl_mvm_is_dqa_supported(mvm)) { - enum iwl_mvm_agg_state state; - - state = mvmsta->tid_data[tid].state; - txq_agg = (state == IWL_AGG_ON || - state == IWL_EMPTYING_HW_QUEUE_DELBA); - } else { - txq_agg = txq_id >= mvm->first_agg_queue; - } if (!is_ndp) { tid_data->next_reclaimed = next_reclaimed; @@ -1452,11 +1447,11 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm, * If the txq is not an AMPDU queue, there is no chance we freed * several skbs. Check that out... */ - if (txq_agg) + if (iwl_mvm_is_dqa_supported(mvm) || txq_id >= mvm->first_agg_queue) goto out; /* We can't free more than one frame at once on a shared queue */ - WARN_ON(!iwl_mvm_is_dqa_supported(mvm) && (skb_freed > 1)); + WARN_ON(skb_freed > 1); /* If we have still frames for this STA nothing to do here */ if (!atomic_sub_and_test(skb_freed, &mvm->pending_frames[sta_id])) -- cgit v1.2.3 From b7048ea12fbb2724ee0cd30752d4fac43cab0651 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 15 Mar 2017 16:31:58 +0200 Subject: drm/i915: Do .init_clock_gating() earlier to avoid it clobbering watermarks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently ILK-BDW explicitly disable LP1+ watermarks from their .init_clock_gating() hooks. Unfortunately that hook gets called way too late since by that time we've already initialized all the watermark state tracking which then gets out of sync with the hardware state. We may eventually want to consider killing off the explicit LP1+ disable from .init_clock_gating(). In the meantime however, we can avoid the problem by reordering the init sequence such that intel_modeset_init_hw()->intel_init_clock_gating() gets called prior to the hardware state takeover. I suppose prior to the two stage watermark programming we were magically saved by something that forced the watermarks to be reprogrammed fully after .init_clock_gating() got called. But now that no longer happens. Note that the diff might look a bit odd as it kills off one call of intel_update_cdclk(), but that's fine because intel_modeset_init_hw() does the exact same thing. Previously we just did it twice. Actually even this new init sequence is pretty bogus as .init_clock_gating() really should be called before any gem hardware init since it can configure various clock gating workarounds and whatnot that affect the GT side as well. Also intel_modeset_init() really should get split up into better defined init stages. Another "fun" detail is that intel_modeset_gem_init() is where RPS/RC6 gets configured. Why that is done from the display code is beyond me. I've decided to leave all this be for now, and just try to fix the init sequence enough for watermarks to work. Cc: stable@vger.kernel.org Cc: Gabriele Mazzotta Cc: David Purton Cc: Matt Roper Cc: Maarten Lankhorst Reported-by: Gabriele Mazzotta Reported-by: David Purton Tested-by: Gabriele Mazzotta Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96645 Fixes: ed4a6a7ca853 ("drm/i915: Add two-stage ILK-style watermark programming (v11)") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170220140443.30891-1-ville.syrjala@linux.intel.com Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170315143158.31780-1-ville.syrjala@linux.intel.com (cherry picked from commit 5be6e33400992d3450e1c8234a5af353e1560580) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3282b0f4b134..ed1f4f272b4f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -16696,12 +16696,11 @@ int intel_modeset_init(struct drm_device *dev) } } - intel_update_czclk(dev_priv); - intel_update_cdclk(dev_priv); - dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq; - intel_shared_dpll_init(dev); + intel_update_czclk(dev_priv); + intel_modeset_init_hw(dev); + if (dev_priv->max_cdclk_freq == 0) intel_update_max_cdclk(dev_priv); @@ -17258,8 +17257,6 @@ void intel_modeset_gem_init(struct drm_device *dev) intel_init_gt_powersave(dev_priv); - intel_modeset_init_hw(dev); - intel_setup_overlay(dev_priv); } -- cgit v1.2.3 From abda288bb207e5c681306299126af8c022709c18 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 19 Feb 2017 16:33:35 -0800 Subject: auxdisplay: img-ascii-lcd: add missing sentinel entry in img_ascii_lcd_matches The OF device table must be terminated, otherwise we'll be walking past it and into areas unknown. Fixes: 0cad855fbd08 ("auxdisplay: img-ascii-lcd: driver for simple ASCII...") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Tested-by: Fengguang Wu Signed-off-by: Greg Kroah-Hartman --- drivers/auxdisplay/img-ascii-lcd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/auxdisplay/img-ascii-lcd.c b/drivers/auxdisplay/img-ascii-lcd.c index bf43b5d2aafc..83f1439e57fd 100644 --- a/drivers/auxdisplay/img-ascii-lcd.c +++ b/drivers/auxdisplay/img-ascii-lcd.c @@ -218,6 +218,7 @@ static const struct of_device_id img_ascii_lcd_matches[] = { { .compatible = "img,boston-lcd", .data = &boston_config }, { .compatible = "mti,malta-lcd", .data = &malta_config }, { .compatible = "mti,sead3-lcd", .data = &sead3_config }, + { /* sentinel */ } }; /** -- cgit v1.2.3 From 4e841d3eb9294ce4137fdb5d0a88f1bceab9c212 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 10 Mar 2017 17:39:21 -0800 Subject: mwifiex: pcie: don't leak DMA buffers when removing When PCIe FLR support was added, much of the remove/release code for PCIe was migrated to ->down_dev(), but ->down_dev() is never called for device removal. Let's refactor the cleanup to be done in both cases. Also, drop the comments above mwifiex_cleanup_pcie(), because they were clearly wrong, and it's better to have clear and obvious code than to detail the code steps in comments anyway. Fixes: 4c5dae59d2e9 ("mwifiex: add PCIe function level reset support") Cc: Signed-off-by: Brian Norris Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/pcie.c | 38 ++++++++++++++--------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index a0d918094889..b8c990d10d6e 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -2739,6 +2739,21 @@ static void mwifiex_pcie_device_dump(struct mwifiex_adapter *adapter) schedule_work(&card->work); } +static void mwifiex_pcie_free_buffers(struct mwifiex_adapter *adapter) +{ + struct pcie_service_card *card = adapter->card; + const struct mwifiex_pcie_card_reg *reg = card->pcie.reg; + + if (reg->sleep_cookie) + mwifiex_pcie_delete_sleep_cookie_buf(adapter); + + mwifiex_pcie_delete_cmdrsp_buf(adapter); + mwifiex_pcie_delete_evtbd_ring(adapter); + mwifiex_pcie_delete_rxbd_ring(adapter); + mwifiex_pcie_delete_txbd_ring(adapter); + card->cmdrsp_buf = NULL; +} + /* * This function initializes the PCI-E host memory space, WCB rings, etc. * @@ -2850,13 +2865,6 @@ err_enable_dev: /* * This function cleans up the allocated card buffers. - * - * The following are freed by this function - - * - TXBD ring buffers - * - RXBD ring buffers - * - Event BD ring buffers - * - Command response ring buffer - * - Sleep cookie buffer */ static void mwifiex_cleanup_pcie(struct mwifiex_adapter *adapter) { @@ -2875,6 +2883,8 @@ static void mwifiex_cleanup_pcie(struct mwifiex_adapter *adapter) "Failed to write driver not-ready signature\n"); } + mwifiex_pcie_free_buffers(adapter); + if (pdev) { pci_iounmap(pdev, card->pci_mmap); pci_iounmap(pdev, card->pci_mmap1); @@ -3126,10 +3136,7 @@ err_cre_txbd: pci_iounmap(pdev, card->pci_mmap1); } -/* This function cleans up the PCI-E host memory space. - * Some code is extracted from mwifiex_unregister_dev() - * - */ +/* This function cleans up the PCI-E host memory space. */ static void mwifiex_pcie_down_dev(struct mwifiex_adapter *adapter) { struct pcie_service_card *card = adapter->card; @@ -3140,14 +3147,7 @@ static void mwifiex_pcie_down_dev(struct mwifiex_adapter *adapter) adapter->seq_num = 0; - if (reg->sleep_cookie) - mwifiex_pcie_delete_sleep_cookie_buf(adapter); - - mwifiex_pcie_delete_cmdrsp_buf(adapter); - mwifiex_pcie_delete_evtbd_ring(adapter); - mwifiex_pcie_delete_rxbd_ring(adapter); - mwifiex_pcie_delete_txbd_ring(adapter); - card->cmdrsp_buf = NULL; + mwifiex_pcie_free_buffers(adapter); } static struct mwifiex_if_ops pcie_ops = { -- cgit v1.2.3 From ba1c7e45ec224cc8d2df33ecaee1946d48e79231 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 10 Mar 2017 17:39:22 -0800 Subject: mwifiex: set adapter->dev before starting to use mwifiex_dbg() The mwifiex_dbg() log handler utilizes the struct device in adapter->dev. Without it, it decides not to print anything. As of commit 2e02b5814217 ("mwifiex: Allow mwifiex early access to device structure"), we started assigning that pointer only after we finished mwifiex_register() -- this effectively neuters any mwifiex_dbg() logging done before this point. Let's move the device assignment into mwifiex_register(). Fixes: 2e02b5814217 ("mwifiex: Allow mwifiex early access to device structure") Cc: Rajat Jain Signed-off-by: Brian Norris Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c index 5ebca1d0cfc7..43d040e02e4d 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.c +++ b/drivers/net/wireless/marvell/mwifiex/main.c @@ -57,8 +57,8 @@ MODULE_PARM_DESC(mfg_mode, "manufacturing mode enable:1, disable:0"); * In case of any errors during inittialization, this function also ensures * proper cleanup before exiting. */ -static int mwifiex_register(void *card, struct mwifiex_if_ops *if_ops, - void **padapter) +static int mwifiex_register(void *card, struct device *dev, + struct mwifiex_if_ops *if_ops, void **padapter) { struct mwifiex_adapter *adapter; int i; @@ -68,6 +68,7 @@ static int mwifiex_register(void *card, struct mwifiex_if_ops *if_ops, return -ENOMEM; *padapter = adapter; + adapter->dev = dev; adapter->card = card; /* Save interface specific operations in adapter */ @@ -1568,12 +1569,11 @@ mwifiex_add_card(void *card, struct completion *fw_done, { struct mwifiex_adapter *adapter; - if (mwifiex_register(card, if_ops, (void **)&adapter)) { + if (mwifiex_register(card, dev, if_ops, (void **)&adapter)) { pr_err("%s: software init failed\n", __func__); goto err_init_sw; } - adapter->dev = dev; mwifiex_probe_of(adapter); adapter->iface_type = iface_type; -- cgit v1.2.3 From 36908c4e5b1063eff3e11336fab544a76c625b69 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 10 Mar 2017 17:39:23 -0800 Subject: mwifiex: uninit wakeup info when removing device We manually init wakeup info, but we don't detach it on device removal. This means that if we (for example) rmmod + modprobe the driver, the device framework might return -EEXIST the second time, and we'll complain in the logs: [ 839.311881] mwifiex_pcie 0000:01:00.0: fail to init wakeup for mwifiex AFAICT, there's no other negative effect. But we can fix this by disabling wakeup on remove, similar to what a few other drivers do (e.g., the power supply framework). This code (and bug) has existed on SDIO for a while, but it got moved around and enabled for PCIe with commit 853402a00823 ("mwifiex: Enable WoWLAN for both sdio and pcie"). Signed-off-by: Brian Norris Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c index 43d040e02e4d..b62e03d11c2e 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.c +++ b/drivers/net/wireless/marvell/mwifiex/main.c @@ -1718,6 +1718,9 @@ int mwifiex_remove_card(struct mwifiex_adapter *adapter) wiphy_unregister(adapter->wiphy); wiphy_free(adapter->wiphy); + if (adapter->irq_wakeup >= 0) + device_init_wakeup(adapter->dev, false); + /* Unregister device */ mwifiex_dbg(adapter, INFO, "info: unregister device\n"); -- cgit v1.2.3 From cf8c44d42c4f4f38468a53e9ce2a0314e7ebeaa1 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 28 Feb 2017 18:54:31 +0530 Subject: MAINTAINERS: update for mwifiex driver maintainers Ganapathi & Xinming are starting to take a more active role in the mwifiex driver maintainership here onwards on account of organizational changes. CC: Xinming Hu CC: Ganapathi Bhat Signed-off-by: Amitkumar Karwar Signed-off-by: Nishant Sarmukadam Signed-off-by: Cathy Luo Signed-off-by: Kalle Valo --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 33875e07482b..078c38217daa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7846,6 +7846,8 @@ F: drivers/net/ethernet/marvell/mvneta.* MARVELL MWIFIEX WIRELESS DRIVER M: Amitkumar Karwar M: Nishant Sarmukadam +M: Ganapathi Bhat +M: Xinming Hu L: linux-wireless@vger.kernel.org S: Maintained F: drivers/net/wireless/marvell/mwifiex/ -- cgit v1.2.3 From 6bce725a78de1b171928ce66dec2bae4b569e5d1 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Wed, 8 Mar 2017 14:30:34 +0100 Subject: x86/mpx: Make unnecessarily global function static Make the function get_user_bd_entry() static as it is not used outside of arch/x86/mm/mpx.c This fixes a sparse warning. Signed-off-by: Tobias Klauser Cc: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/mpx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 5126dfd52b18..cd44ae727df7 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -590,7 +590,7 @@ static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm, * we might run off the end of the bounds table if we are on * a 64-bit kernel and try to get 8 bytes. */ -int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret, +static int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret, long __user *bd_entry_ptr) { u32 bd_entry_32; -- cgit v1.2.3 From 03270c6ac6207fc55bbf9d20d195029dca210c79 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Mon, 6 Mar 2017 23:23:42 +0000 Subject: parport: fix attempt to write duplicate procfiles Usually every parallel port will have a single pardev registered with it. But ppdev driver is an exception. This userspace parallel port driver allows to create multiple parrallel port devices for a single parallel port. And as a result we were having a nice warning like: "sysctl table check failed: /dev/parport/parport0/devices/ppdev0/timeslice Sysctl already exists" Use the same logic as used in parport_register_device() and register the proc files only once for each parallel port. Fixes: 6fa45a226897 ("parport: add device-model to parport subsystem") Cc: stable # v4.4+ Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1414656 Bugzilla: https://bugs.archlinux.org/task/52322 Tested-by: James Feeney Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/parport/share.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/parport/share.c b/drivers/parport/share.c index bc090daa850a..5dc53d420ca8 100644 --- a/drivers/parport/share.c +++ b/drivers/parport/share.c @@ -939,8 +939,10 @@ parport_register_dev_model(struct parport *port, const char *name, * pardevice fields. -arca */ port->ops->init_state(par_dev, par_dev->state); - port->proc_device = par_dev; - parport_device_proc_register(par_dev); + if (!test_and_set_bit(PARPORT_DEVPROC_REGISTERED, &port->devflags)) { + port->proc_device = par_dev; + parport_device_proc_register(par_dev); + } return par_dev; -- cgit v1.2.3 From 9a69645dde1188723d80745c1bc6ee9af2cbe2a7 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Mon, 6 Mar 2017 23:23:43 +0000 Subject: ppdev: fix registering same device name Usually every parallel port will have a single pardev registered with it. But ppdev driver is an exception. This userspace parallel port driver allows to create multiple parrallel port devices for a single parallel port. And as a result we were having a big warning like: "sysfs: cannot create duplicate filename '/devices/parport0/ppdev0.0'". And with that many parallel port printers stopped working. We have been using the minor number as the id field while registering a parralel port device with a parralel port. But when there are multiple parrallel port device for one single parallel port, they all tried to register with the same name like 'pardev0.0' and everything started failing. Use an incremented index as the id instead of the minor number. Fixes: 8b7d3a9d903e ("ppdev: use new parport device model") Cc: stable # v4.9+ Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1414656 Bugzilla: https://bugs.archlinux.org/task/52322 Tested-by: James Feeney Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/char/ppdev.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c index 2a558c706581..3e73bcdf9e65 100644 --- a/drivers/char/ppdev.c +++ b/drivers/char/ppdev.c @@ -84,11 +84,14 @@ struct pp_struct { struct ieee1284_info state; struct ieee1284_info saved_state; long default_inactivity; + int index; }; /* should we use PARDEVICE_MAX here? */ static struct device *devices[PARPORT_MAX]; +static DEFINE_IDA(ida_index); + /* pp_struct.flags bitfields */ #define PP_CLAIMED (1<<0) #define PP_EXCL (1<<1) @@ -290,7 +293,7 @@ static int register_device(int minor, struct pp_struct *pp) struct pardevice *pdev = NULL; char *name; struct pardev_cb ppdev_cb; - int rc = 0; + int rc = 0, index; name = kasprintf(GFP_KERNEL, CHRDEV "%x", minor); if (name == NULL) @@ -303,20 +306,23 @@ static int register_device(int minor, struct pp_struct *pp) goto err; } + index = ida_simple_get(&ida_index, 0, 0, GFP_KERNEL); memset(&ppdev_cb, 0, sizeof(ppdev_cb)); ppdev_cb.irq_func = pp_irq; ppdev_cb.flags = (pp->flags & PP_EXCL) ? PARPORT_FLAG_EXCL : 0; ppdev_cb.private = pp; - pdev = parport_register_dev_model(port, name, &ppdev_cb, minor); + pdev = parport_register_dev_model(port, name, &ppdev_cb, index); parport_put_port(port); if (!pdev) { pr_warn("%s: failed to register device!\n", name); rc = -ENXIO; + ida_simple_remove(&ida_index, index); goto err; } pp->pdev = pdev; + pp->index = index; dev_dbg(&pdev->dev, "registered pardevice\n"); err: kfree(name); @@ -755,6 +761,7 @@ static int pp_release(struct inode *inode, struct file *file) if (pp->pdev) { parport_unregister_device(pp->pdev); + ida_simple_remove(&ida_index, pp->index); pp->pdev = NULL; pr_debug(CHRDEV "%x: unregistered pardevice\n", minor); } -- cgit v1.2.3 From c3423563c68fc454b805b46cb69fd4816db933e2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 3 Mar 2017 07:58:06 -0700 Subject: vmw_vmci: handle the return value from pci_alloc_irq_vectors correctly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It returns the number of vectors allocated when successful, so check for a negative error only. Fixes: 3bb434cd ("vmw_vmci: switch to pci_irq_alloc_vectors") Signed-off-by: Christoph Hellwig Reported-by: Loïc Yhuel Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_vmci/vmci_guest.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/vmw_vmci/vmci_guest.c b/drivers/misc/vmw_vmci/vmci_guest.c index 9d659542a335..dad5abee656e 100644 --- a/drivers/misc/vmw_vmci/vmci_guest.c +++ b/drivers/misc/vmw_vmci/vmci_guest.c @@ -566,10 +566,10 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, */ error = pci_alloc_irq_vectors(pdev, VMCI_MAX_INTRS, VMCI_MAX_INTRS, PCI_IRQ_MSIX); - if (error) { + if (error < 0) { error = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY); - if (error) + if (error < 0) goto err_remove_bitmap; } else { vmci_dev->exclusive_vectors = true; -- cgit v1.2.3 From ea90e0dc8cecba6359b481e24d9c37160f6f524f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 Mar 2017 14:26:04 +0100 Subject: nl80211: fix dumpit error path RTNL deadlocks Sowmini pointed out Dmitry's RTNL deadlock report to me, and it turns out to be perfectly accurate - there are various error paths that miss unlock of the RTNL. To fix those, change the locking a bit to not be conditional in all those nl80211_prepare_*_dump() functions, but make those require the RTNL to start with, and fix the buggy error paths. This also let me use sparse (by appropriately overriding the rtnl_lock/rtnl_unlock functions) to validate the changes. Cc: stable@vger.kernel.org Reported-by: Sowmini Varadhan Reported-by: Dmitry Vyukov Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 127 ++++++++++++++++++++++--------------------------- 1 file changed, 56 insertions(+), 71 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d7f8be4e321a..2312dc2ffdb9 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -545,22 +545,18 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, { int err; - rtnl_lock(); - if (!cb->args[0]) { err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, genl_family_attrbuf(&nl80211_fam), nl80211_fam.maxattr, nl80211_policy); if (err) - goto out_unlock; + return err; *wdev = __cfg80211_wdev_from_attrs( sock_net(skb->sk), genl_family_attrbuf(&nl80211_fam)); - if (IS_ERR(*wdev)) { - err = PTR_ERR(*wdev); - goto out_unlock; - } + if (IS_ERR(*wdev)) + return PTR_ERR(*wdev); *rdev = wiphy_to_rdev((*wdev)->wiphy); /* 0 is the first index - add 1 to parse only once */ cb->args[0] = (*rdev)->wiphy_idx + 1; @@ -570,10 +566,8 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1); struct wireless_dev *tmp; - if (!wiphy) { - err = -ENODEV; - goto out_unlock; - } + if (!wiphy) + return -ENODEV; *rdev = wiphy_to_rdev(wiphy); *wdev = NULL; @@ -584,21 +578,11 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, } } - if (!*wdev) { - err = -ENODEV; - goto out_unlock; - } + if (!*wdev) + return -ENODEV; } return 0; - out_unlock: - rtnl_unlock(); - return err; -} - -static void nl80211_finish_wdev_dump(struct cfg80211_registered_device *rdev) -{ - rtnl_unlock(); } /* IE validation */ @@ -2608,17 +2592,17 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * int filter_wiphy = -1; struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; + int ret; rtnl_lock(); if (!cb->args[2]) { struct nl80211_dump_wiphy_state state = { .filter_wiphy = -1, }; - int ret; ret = nl80211_dump_wiphy_parse(skb, cb, &state); if (ret) - return ret; + goto out_unlock; filter_wiphy = state.filter_wiphy; @@ -2663,12 +2647,14 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * wp_idx++; } out: - rtnl_unlock(); - cb->args[0] = wp_idx; cb->args[1] = if_idx; - return skb->len; + ret = skb->len; + out_unlock: + rtnl_unlock(); + + return ret; } static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info) @@ -4452,9 +4438,10 @@ static int nl80211_dump_station(struct sk_buff *skb, int sta_idx = cb->args[2]; int err; + rtnl_lock(); err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); if (err) - return err; + goto out_err; if (!wdev->netdev) { err = -EINVAL; @@ -4489,7 +4476,7 @@ static int nl80211_dump_station(struct sk_buff *skb, cb->args[2] = sta_idx; err = skb->len; out_err: - nl80211_finish_wdev_dump(rdev); + rtnl_unlock(); return err; } @@ -5275,9 +5262,10 @@ static int nl80211_dump_mpath(struct sk_buff *skb, int path_idx = cb->args[2]; int err; + rtnl_lock(); err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); if (err) - return err; + goto out_err; if (!rdev->ops->dump_mpath) { err = -EOPNOTSUPP; @@ -5310,7 +5298,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb, cb->args[2] = path_idx; err = skb->len; out_err: - nl80211_finish_wdev_dump(rdev); + rtnl_unlock(); return err; } @@ -5470,9 +5458,10 @@ static int nl80211_dump_mpp(struct sk_buff *skb, int path_idx = cb->args[2]; int err; + rtnl_lock(); err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); if (err) - return err; + goto out_err; if (!rdev->ops->dump_mpp) { err = -EOPNOTSUPP; @@ -5505,7 +5494,7 @@ static int nl80211_dump_mpp(struct sk_buff *skb, cb->args[2] = path_idx; err = skb->len; out_err: - nl80211_finish_wdev_dump(rdev); + rtnl_unlock(); return err; } @@ -7674,9 +7663,12 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb) int start = cb->args[2], idx = 0; int err; + rtnl_lock(); err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); - if (err) + if (err) { + rtnl_unlock(); return err; + } wdev_lock(wdev); spin_lock_bh(&rdev->bss_lock); @@ -7699,7 +7691,7 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb) wdev_unlock(wdev); cb->args[2] = idx; - nl80211_finish_wdev_dump(rdev); + rtnl_unlock(); return skb->len; } @@ -7784,9 +7776,10 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) int res; bool radio_stats; + rtnl_lock(); res = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev); if (res) - return res; + goto out_err; /* prepare_wdev_dump parsed the attributes */ radio_stats = attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS]; @@ -7827,7 +7820,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) cb->args[2] = survey_idx; res = skb->len; out_err: - nl80211_finish_wdev_dump(rdev); + rtnl_unlock(); return res; } @@ -11508,17 +11501,13 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, void *data = NULL; unsigned int data_len = 0; - rtnl_lock(); - if (cb->args[0]) { /* subtract the 1 again here */ struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1); struct wireless_dev *tmp; - if (!wiphy) { - err = -ENODEV; - goto out_unlock; - } + if (!wiphy) + return -ENODEV; *rdev = wiphy_to_rdev(wiphy); *wdev = NULL; @@ -11538,23 +11527,19 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, attrbuf, nl80211_fam.maxattr, nl80211_policy); if (err) - goto out_unlock; + return err; if (!attrbuf[NL80211_ATTR_VENDOR_ID] || - !attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) { - err = -EINVAL; - goto out_unlock; - } + !attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) + return -EINVAL; *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), attrbuf); if (IS_ERR(*wdev)) *wdev = NULL; *rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf); - if (IS_ERR(*rdev)) { - err = PTR_ERR(*rdev); - goto out_unlock; - } + if (IS_ERR(*rdev)) + return PTR_ERR(*rdev); vid = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_ID]); subcmd = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_SUBCMD]); @@ -11567,19 +11552,15 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, if (vcmd->info.vendor_id != vid || vcmd->info.subcmd != subcmd) continue; - if (!vcmd->dumpit) { - err = -EOPNOTSUPP; - goto out_unlock; - } + if (!vcmd->dumpit) + return -EOPNOTSUPP; vcmd_idx = i; break; } - if (vcmd_idx < 0) { - err = -EOPNOTSUPP; - goto out_unlock; - } + if (vcmd_idx < 0) + return -EOPNOTSUPP; if (attrbuf[NL80211_ATTR_VENDOR_DATA]) { data = nla_data(attrbuf[NL80211_ATTR_VENDOR_DATA]); @@ -11596,9 +11577,6 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, /* keep rtnl locked in successful case */ return 0; - out_unlock: - rtnl_unlock(); - return err; } static int nl80211_vendor_cmd_dump(struct sk_buff *skb, @@ -11613,9 +11591,10 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb, int err; struct nlattr *vendor_data; + rtnl_lock(); err = nl80211_prepare_vendor_dump(skb, cb, &rdev, &wdev); if (err) - return err; + goto out; vcmd_idx = cb->args[2]; data = (void *)cb->args[3]; @@ -11624,15 +11603,21 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb, if (vcmd->flags & (WIPHY_VENDOR_CMD_NEED_WDEV | WIPHY_VENDOR_CMD_NEED_NETDEV)) { - if (!wdev) - return -EINVAL; + if (!wdev) { + err = -EINVAL; + goto out; + } if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_NETDEV && - !wdev->netdev) - return -EINVAL; + !wdev->netdev) { + err = -EINVAL; + goto out; + } if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) { - if (!wdev_running(wdev)) - return -ENETDOWN; + if (!wdev_running(wdev)) { + err = -ENETDOWN; + goto out; + } } } -- cgit v1.2.3 From 7c468447f40645fbf2a033dfdaa92b1957130d50 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Fri, 10 Mar 2017 12:28:18 -0600 Subject: crypto: ccp - Assign DMA commands to the channel's CCP The CCP driver generally uses a round-robin approach when assigning operations to available CCPs. For the DMA engine, however, the DMA mappings of the SGs are associated with a specific CCP. When an IOMMU is enabled, the IOMMU is programmed based on this specific device. If the DMA operations are not performed by that specific CCP then addressing errors and I/O page faults will occur. Update the CCP driver to allow a specific CCP device to be requested for an operation and use this in the DMA engine support. Cc: # 4.9.x- Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-dev.c | 5 ++++- drivers/crypto/ccp/ccp-dmaengine.c | 1 + include/linux/ccp.h | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c index 511ab042b5e7..92d1c6959f08 100644 --- a/drivers/crypto/ccp/ccp-dev.c +++ b/drivers/crypto/ccp/ccp-dev.c @@ -283,11 +283,14 @@ EXPORT_SYMBOL_GPL(ccp_version); */ int ccp_enqueue_cmd(struct ccp_cmd *cmd) { - struct ccp_device *ccp = ccp_get_device(); + struct ccp_device *ccp; unsigned long flags; unsigned int i; int ret; + /* Some commands might need to be sent to a specific device */ + ccp = cmd->ccp ? cmd->ccp : ccp_get_device(); + if (!ccp) return -ENODEV; diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c index e5d9278f4019..8d0eeb46d4a2 100644 --- a/drivers/crypto/ccp/ccp-dmaengine.c +++ b/drivers/crypto/ccp/ccp-dmaengine.c @@ -390,6 +390,7 @@ static struct ccp_dma_desc *ccp_create_desc(struct dma_chan *dma_chan, goto err; ccp_cmd = &cmd->ccp_cmd; + ccp_cmd->ccp = chan->ccp; ccp_pt = &ccp_cmd->u.passthru_nomap; ccp_cmd->flags = CCP_CMD_MAY_BACKLOG; ccp_cmd->flags |= CCP_CMD_PASSTHRU_NO_DMA_MAP; diff --git a/include/linux/ccp.h b/include/linux/ccp.h index c71dd8fa5764..c41b8d99dd0e 100644 --- a/include/linux/ccp.h +++ b/include/linux/ccp.h @@ -556,7 +556,7 @@ enum ccp_engine { * struct ccp_cmd - CCP operation request * @entry: list element (ccp driver use only) * @work: work element used for callbacks (ccp driver use only) - * @ccp: CCP device to be run on (ccp driver use only) + * @ccp: CCP device to be run on * @ret: operation return code (ccp driver use only) * @flags: cmd processing flags * @engine: CCP operation to perform -- cgit v1.2.3 From 69db7009318758769d625b023402161c750f7876 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Tue, 14 Mar 2017 07:36:01 -0400 Subject: hwrng: amd - Revert managed API changes After commit 31b2a73c9c5f ("hwrng: amd - Migrate to managed API"), the amd-rng driver uses devres with pci_dev->dev to keep track of resources, but does not actually register a PCI driver. This results in the following issues: 1. The message WARNING: CPU: 2 PID: 621 at drivers/base/dd.c:349 driver_probe_device+0x38c is output when the i2c_amd756 driver loads and attempts to register a PCI driver. The PCI & device subsystems assume that no resources have been registered for the device, and the WARN_ON() triggers since amd-rng has already do so. 2. The driver leaks memory because the driver does not attach to a device. The driver only uses the PCI device as a reference. devm_*() functions will release resources on driver detach, which the amd-rng driver will never do. As a result, 3. The driver cannot be reloaded because there is always a use of the ioport and region after the first load of the driver. Revert the changes made by 31b2a73c9c5f ("hwrng: amd - Migrate to managed API"). Cc: Signed-off-by: Prarit Bhargava Fixes: 31b2a73c9c5f ("hwrng: amd - Migrate to managed API"). Cc: Matt Mackall Cc: Corentin LABBE Cc: PrasannaKumar Muralidharan Cc: Wei Yongjun Cc: linux-crypto@vger.kernel.org Cc: linux-geode@lists.infradead.org Signed-off-by: Herbert Xu --- drivers/char/hw_random/amd-rng.c | 42 ++++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/drivers/char/hw_random/amd-rng.c b/drivers/char/hw_random/amd-rng.c index 4a99ac756f08..9959c762da2f 100644 --- a/drivers/char/hw_random/amd-rng.c +++ b/drivers/char/hw_random/amd-rng.c @@ -55,6 +55,7 @@ MODULE_DEVICE_TABLE(pci, pci_tbl); struct amd768_priv { void __iomem *iobase; struct pci_dev *pcidev; + u32 pmbase; }; static int amd_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait) @@ -148,33 +149,58 @@ found: if (pmbase == 0) return -EIO; - priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; - if (!devm_request_region(&pdev->dev, pmbase + PMBASE_OFFSET, - PMBASE_SIZE, DRV_NAME)) { + if (!request_region(pmbase + PMBASE_OFFSET, PMBASE_SIZE, DRV_NAME)) { dev_err(&pdev->dev, DRV_NAME " region 0x%x already in use!\n", pmbase + 0xF0); - return -EBUSY; + err = -EBUSY; + goto out; } - priv->iobase = devm_ioport_map(&pdev->dev, pmbase + PMBASE_OFFSET, - PMBASE_SIZE); + priv->iobase = ioport_map(pmbase + PMBASE_OFFSET, PMBASE_SIZE); if (!priv->iobase) { pr_err(DRV_NAME "Cannot map ioport\n"); - return -ENOMEM; + err = -EINVAL; + goto err_iomap; } amd_rng.priv = (unsigned long)priv; + priv->pmbase = pmbase; priv->pcidev = pdev; pr_info(DRV_NAME " detected\n"); - return devm_hwrng_register(&pdev->dev, &amd_rng); + err = hwrng_register(&amd_rng); + if (err) { + pr_err(DRV_NAME " registering failed (%d)\n", err); + goto err_hwrng; + } + return 0; + +err_hwrng: + ioport_unmap(priv->iobase); +err_iomap: + release_region(pmbase + PMBASE_OFFSET, PMBASE_SIZE); +out: + kfree(priv); + return err; } static void __exit mod_exit(void) { + struct amd768_priv *priv; + + priv = (struct amd768_priv *)amd_rng.priv; + + hwrng_unregister(&amd_rng); + + ioport_unmap(priv->iobase); + + release_region(priv->pmbase + PMBASE_OFFSET, PMBASE_SIZE); + + kfree(priv); } module_init(mod_init); -- cgit v1.2.3 From 8c75704ebcac2ffa31ee7bcc359baf701b52bf00 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Tue, 14 Mar 2017 07:36:02 -0400 Subject: hwrng: geode - Revert managed API changes After commit e9afc746299d ("hwrng: geode - Use linux/io.h instead of asm/io.h") the geode-rng driver uses devres with pci_dev->dev to keep track of resources, but does not actually register a PCI driver. This results in the following issues: 1. The driver leaks memory because the driver does not attach to a device. The driver only uses the PCI device as a reference. devm_*() functions will release resources on driver detach, which the geode-rng driver will never do. As a result, 2. The driver cannot be reloaded because there is always a use of the ioport and region after the first load of the driver. Revert the changes made by e9afc746299d ("hwrng: geode - Use linux/io.h instead of asm/io.h"). Cc: Signed-off-by: Prarit Bhargava Fixes: 6e9b5e76882c ("hwrng: geode - Migrate to managed API") Cc: Matt Mackall Cc: Corentin LABBE Cc: PrasannaKumar Muralidharan Cc: Wei Yongjun Cc: linux-crypto@vger.kernel.org Cc: linux-geode@lists.infradead.org Signed-off-by: Herbert Xu --- drivers/char/hw_random/geode-rng.c | 50 ++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/drivers/char/hw_random/geode-rng.c b/drivers/char/hw_random/geode-rng.c index e7a245942029..e1d421a36a13 100644 --- a/drivers/char/hw_random/geode-rng.c +++ b/drivers/char/hw_random/geode-rng.c @@ -31,6 +31,9 @@ #include #include + +#define PFX KBUILD_MODNAME ": " + #define GEODE_RNG_DATA_REG 0x50 #define GEODE_RNG_STATUS_REG 0x54 @@ -82,6 +85,7 @@ static struct hwrng geode_rng = { static int __init mod_init(void) { + int err = -ENODEV; struct pci_dev *pdev = NULL; const struct pci_device_id *ent; void __iomem *mem; @@ -89,27 +93,43 @@ static int __init mod_init(void) for_each_pci_dev(pdev) { ent = pci_match_id(pci_tbl, pdev); - if (ent) { - rng_base = pci_resource_start(pdev, 0); - if (rng_base == 0) - return -ENODEV; - - mem = devm_ioremap(&pdev->dev, rng_base, 0x58); - if (!mem) - return -ENOMEM; - geode_rng.priv = (unsigned long)mem; - - pr_info("AMD Geode RNG detected\n"); - return devm_hwrng_register(&pdev->dev, &geode_rng); - } + if (ent) + goto found; } - /* Device not found. */ - return -ENODEV; + goto out; + +found: + rng_base = pci_resource_start(pdev, 0); + if (rng_base == 0) + goto out; + err = -ENOMEM; + mem = ioremap(rng_base, 0x58); + if (!mem) + goto out; + geode_rng.priv = (unsigned long)mem; + + pr_info("AMD Geode RNG detected\n"); + err = hwrng_register(&geode_rng); + if (err) { + pr_err(PFX "RNG registering failed (%d)\n", + err); + goto err_unmap; + } +out: + return err; + +err_unmap: + iounmap(mem); + goto out; } static void __exit mod_exit(void) { + void __iomem *mem = (void __iomem *)geode_rng.priv; + + hwrng_unregister(&geode_rng); + iounmap(mem); } module_init(mod_init); -- cgit v1.2.3 From f717629c7f834ab2efa05c7dbf0826f1d7c32ade Mon Sep 17 00:00:00 2001 From: Chandan Rajendra Date: Thu, 16 Mar 2017 14:37:11 +0530 Subject: powerpc: Wire up statx() syscall Test runs on a ppc64 BE guest succeeded. linux/samples/statx/test-statx program was executed on the following file types, 1. Regular file 2. Directory 3. device file 4. symlink 5. Named pipe The test run also included invoking test-statx with the runtime options provided in the main() function of test-statx.c Signed-off-by: Chandan Rajendra Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/systbl.h | 1 + arch/powerpc/include/asm/unistd.h | 2 +- arch/powerpc/include/uapi/asm/unistd.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 4b369d83fe9c..1c9470881c4a 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -387,3 +387,4 @@ SYSCALL(copy_file_range) COMPAT_SYS_SPU(preadv2) COMPAT_SYS_SPU(pwritev2) SYSCALL(kexec_file_load) +SYSCALL(statx) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index eb1acee91a20..9ba11dbcaca9 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,7 +12,7 @@ #include -#define NR_syscalls 383 +#define NR_syscalls 384 #define __NR__exit __NR_exit diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index 2f26335a3c42..b85f14228857 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -393,5 +393,6 @@ #define __NR_preadv2 380 #define __NR_pwritev2 381 #define __NR_kexec_file_load 382 +#define __NR_statx 383 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ -- cgit v1.2.3 From 5c71ad17f97e84d6d7e11a8e193d5d96890ed2ed Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 9 Mar 2017 01:45:39 +0800 Subject: EDAC, pnd2_edac: Add new EDAC driver for Intel SoC platforms Initial target for this driver is the Intel Apollo Lake platform and Denverton micro-server, they use the same internal memory controller IP called Pondicherry2. Memory controller registers are not in PCI config space like earlier Intel memory controllers. For Apollo Lake platform they are accessed via a "side-band" interface, for Denverton micro-server they are access via PCI config space and memory map I/O. This driver is for Apollo Lake and Denverton, but only the Denverton is fully enabled while we wait for the sideband driver. Apollo lake driver and initial cut at Denverton driver by Tony Luck. Extensive cleanup, refactoring and basic verification by Qiuxu Zhuo. Signed-off-by: Tony Luck Signed-off-by: Qiuxu Zhuo Cc: linux-edac Link: http://lkml.kernel.org/r/20170308174539.14432-1-qiuxu.zhuo@intel.com Signed-off-by: Borislav Petkov --- MAINTAINERS | 6 + drivers/edac/Kconfig | 9 + drivers/edac/Makefile | 1 + drivers/edac/pnd2_edac.c | 1542 ++++++++++++++++++++++++++++++++++++++++++++++ drivers/edac/pnd2_edac.h | 301 +++++++++ 5 files changed, 1859 insertions(+) create mode 100644 drivers/edac/pnd2_edac.c create mode 100644 drivers/edac/pnd2_edac.h diff --git a/MAINTAINERS b/MAINTAINERS index c265a5fe4848..572fe4252ac4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4776,6 +4776,12 @@ L: linux-edac@vger.kernel.org S: Maintained F: drivers/edac/mpc85xx_edac.[ch] +EDAC-PND2 +M: Tony Luck +L: linux-edac@vger.kernel.org +S: Maintained +F: drivers/edac/pnd2_edac.[ch] + EDAC-PASEMI M: Egor Martovetsky L: linux-edac@vger.kernel.org diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 82d85cce81f8..be3eac6ad54d 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -259,6 +259,15 @@ config EDAC_SKX Support for error detection and correction the Intel Skylake server Integrated Memory Controllers. +config EDAC_PND2 + tristate "Intel Pondicherry2" + depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL + help + Support for error detection and correction on the Intel + Pondicherry2 Integrated Memory Controller. This SoC IP is + first used on the Apollo Lake platform and Denverton + micro-server but may appear on others in the future. + config EDAC_MPC85XX tristate "Freescale MPC83xx / MPC85xx" depends on EDAC_MM_EDAC && FSL_SOC diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 88e472e8b9a9..587107e90996 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -32,6 +32,7 @@ obj-$(CONFIG_EDAC_I7300) += i7300_edac.o obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o obj-$(CONFIG_EDAC_SBRIDGE) += sb_edac.o obj-$(CONFIG_EDAC_SKX) += skx_edac.o +obj-$(CONFIG_EDAC_PND2) += pnd2_edac.o obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o obj-$(CONFIG_EDAC_E752X) += e752x_edac.o obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c new file mode 100644 index 000000000000..14d39f05226e --- /dev/null +++ b/drivers/edac/pnd2_edac.c @@ -0,0 +1,1542 @@ +/* + * Driver for Pondicherry2 memory controller. + * + * Copyright (c) 2016, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * [Derived from sb_edac.c] + * + * Translation of system physical addresses to DIMM addresses + * is a two stage process: + * + * First the Pondicherry 2 memory controller handles slice and channel interleaving + * in "sys2pmi()". This is (almost) completley common between platforms. + * + * Then a platform specific dunit (DIMM unit) completes the process to provide DIMM, + * rank, bank, row and column using the appropriate "dunit_ops" functions/parameters. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "edac_mc.h" +#include "edac_module.h" +#include "pnd2_edac.h" + +#define APL_NUM_CHANNELS 4 +#define DNV_NUM_CHANNELS 2 +#define DNV_MAX_DIMMS 2 /* Max DIMMs per channel */ + +enum type { + APL, + DNV, /* All requests go to PMI CH0 on each slice (CH1 disabled) */ +}; + +struct dram_addr { + int chan; + int dimm; + int rank; + int bank; + int row; + int col; +}; + +struct pnd2_pvt { + int dimm_geom[APL_NUM_CHANNELS]; + u64 tolm, tohm; +}; + +/* + * System address space is divided into multiple regions with + * different interleave rules in each. The as0/as1 regions + * have no interleaving at all. The as2 region is interleaved + * between two channels. The mot region is magic and may overlap + * other regions, with its interleave rules taking precedence. + * Addresses not in any of these regions are interleaved across + * all four channels. + */ +static struct region { + u64 base; + u64 limit; + u8 enabled; +} mot, as0, as1, as2; + +static struct dunit_ops { + char *name; + enum type type; + int pmiaddr_shift; + int pmiidx_shift; + int channels; + int dimms_per_channel; + int (*rd_reg)(int port, int off, int op, void *data, size_t sz, char *name); + int (*get_registers)(void); + int (*check_ecc)(void); + void (*mk_region)(char *name, struct region *rp, void *asym); + void (*get_dimm_config)(struct mem_ctl_info *mci); + int (*pmi2mem)(struct mem_ctl_info *mci, u64 pmiaddr, u32 pmiidx, + struct dram_addr *daddr, char *msg); +} *ops; + +static struct mem_ctl_info *pnd2_mci; + +#define PND2_MSG_SIZE 256 + +/* Debug macros */ +#define pnd2_printk(level, fmt, arg...) \ + edac_printk(level, "pnd2", fmt, ##arg) + +#define pnd2_mc_printk(mci, level, fmt, arg...) \ + edac_mc_chipset_printk(mci, level, "pnd2", fmt, ##arg) + +#define MOT_CHAN_INTLV_BIT_1SLC_2CH 12 +#define MOT_CHAN_INTLV_BIT_2SLC_2CH 13 +#define SELECTOR_DISABLED (-1) +#define _4GB (1ul << 32) + +#define PMI_ADDRESS_WIDTH 31 +#define PND_MAX_PHYS_BIT 39 + +#define APL_ASYMSHIFT 28 +#define DNV_ASYMSHIFT 31 +#define CH_HASH_MASK_LSB 6 +#define SLICE_HASH_MASK_LSB 6 +#define MOT_SLC_INTLV_BIT 12 +#define LOG2_PMI_ADDR_GRANULARITY 5 +#define MOT_SHIFT 24 + +#define GET_BITFIELD(v, lo, hi) (((v) & GENMASK_ULL(hi, lo)) >> (lo)) +#define U64_LSHIFT(val, s) ((u64)(val) << (s)) + +#ifdef CONFIG_X86_INTEL_SBI_APL +#include "linux/platform_data/sbi_apl.h" +int sbi_send(int port, int off, int op, u32 *data) +{ + struct sbi_apl_message sbi_arg; + int ret, read = 0; + + memset(&sbi_arg, 0, sizeof(sbi_arg)); + + if (op == 0 || op == 4 || op == 6) + read = 1; + else + sbi_arg.data = *data; + + sbi_arg.opcode = op; + sbi_arg.port_address = port; + sbi_arg.register_offset = off; + ret = sbi_apl_commit(&sbi_arg); + if (ret || sbi_arg.status) + edac_dbg(2, "sbi_send status=%d ret=%d data=%x\n", + sbi_arg.status, ret, sbi_arg.data); + + if (ret == 0) + ret = sbi_arg.status; + + if (ret == 0 && read) + *data = sbi_arg.data; + + return ret; +} +#else +int sbi_send(int port, int off, int op, u32 *data) +{ + return -EUNATCH; +} +#endif + +static int apl_rd_reg(int port, int off, int op, void *data, size_t sz, char *name) +{ + int ret = 0; + + edac_dbg(2, "Read %s port=%x off=%x op=%x\n", name, port, off, op); + switch (sz) { + case 8: + ret = sbi_send(port, off + 4, op, (u32 *)(data + 4)); + case 4: + ret = sbi_send(port, off, op, (u32 *)data); + pnd2_printk(KERN_DEBUG, "%s=%x%08x ret=%d\n", name, + sz == 8 ? *((u32 *)(data + 4)) : 0, *((u32 *)data), ret); + break; + } + + return ret; +} + +static u64 get_mem_ctrl_hub_base_addr(void) +{ + struct b_cr_mchbar_lo_pci lo; + struct b_cr_mchbar_hi_pci hi; + struct pci_dev *pdev; + + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x1980, NULL); + if (pdev) { + pci_read_config_dword(pdev, 0x48, (u32 *)&lo); + pci_read_config_dword(pdev, 0x4c, (u32 *)&hi); + pci_dev_put(pdev); + } else { + return 0; + } + + if (!lo.enable) { + edac_dbg(2, "MMIO via memory controller hub base address is disabled!\n"); + return 0; + } + + return U64_LSHIFT(hi.base, 32) | U64_LSHIFT(lo.base, 15); +} + +static u64 get_sideband_reg_base_addr(void) +{ + struct pci_dev *pdev; + u32 hi, lo; + + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x19dd, NULL); + if (pdev) { + pci_read_config_dword(pdev, 0x10, &lo); + pci_read_config_dword(pdev, 0x14, &hi); + pci_dev_put(pdev); + return (U64_LSHIFT(hi, 32) | U64_LSHIFT(lo, 0)); + } else { + return 0xfd000000; + } +} + +static int dnv_rd_reg(int port, int off, int op, void *data, size_t sz, char *name) +{ + struct pci_dev *pdev; + char *base; + u64 addr; + + if (op == 4) { + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x1980, NULL); + if (!pdev) + return -ENODEV; + + pci_read_config_dword(pdev, off, data); + pci_dev_put(pdev); + } else { + /* MMIO via memory controller hub base address */ + if (op == 0 && port == 0x4c) { + addr = get_mem_ctrl_hub_base_addr(); + if (!addr) + return -ENODEV; + } else { + /* MMIO via sideband register base address */ + addr = get_sideband_reg_base_addr(); + if (!addr) + return -ENODEV; + addr += (port << 16); + } + + base = ioremap((resource_size_t)addr, 0x10000); + if (!base) + return -ENODEV; + + if (sz == 8) + *(u32 *)(data + 4) = *(u32 *)(base + off + 4); + *(u32 *)data = *(u32 *)(base + off); + + iounmap(base); + } + + edac_dbg(2, "Read %s=%.8x_%.8x\n", name, + (sz == 8) ? *(u32 *)(data + 4) : 0, *(u32 *)data); + + return 0; +} + +#define RD_REGP(regp, regname, port) \ + ops->rd_reg(port, \ + regname##_offset, \ + regname##_r_opcode, \ + regp, sizeof(struct regname), \ + #regname) + +#define RD_REG(regp, regname) \ + ops->rd_reg(regname ## _port, \ + regname##_offset, \ + regname##_r_opcode, \ + regp, sizeof(struct regname), \ + #regname) + +static u64 top_lm, top_hm; +static bool two_slices; +static bool two_channels; /* Both PMI channels in one slice enabled */ + +static u8 sym_chan_mask; +static u8 asym_chan_mask; +static u8 chan_mask; + +static int slice_selector = -1; +static int chan_selector = -1; +static u64 slice_hash_mask; +static u64 chan_hash_mask; + +static void mk_region(char *name, struct region *rp, u64 base, u64 limit) +{ + rp->enabled = 1; + rp->base = base; + rp->limit = limit; + edac_dbg(2, "Region:%s [%llx, %llx]\n", name, base, limit); +} + +static void mk_region_mask(char *name, struct region *rp, u64 base, u64 mask) +{ + if (mask == 0) { + pr_info(FW_BUG "MOT mask cannot be zero\n"); + return; + } + if (mask != GENMASK_ULL(PND_MAX_PHYS_BIT, __ffs(mask))) { + pr_info(FW_BUG "MOT mask not power of two\n"); + return; + } + if (base & ~mask) { + pr_info(FW_BUG "MOT region base/mask alignment error\n"); + return; + } + rp->base = base; + rp->limit = (base | ~mask) & GENMASK_ULL(PND_MAX_PHYS_BIT, 0); + rp->enabled = 1; + edac_dbg(2, "Region:%s [%llx, %llx]\n", name, base, rp->limit); +} + +static bool in_region(struct region *rp, u64 addr) +{ + if (!rp->enabled) + return false; + + return rp->base <= addr && addr <= rp->limit; +} + +static int gen_sym_mask(struct b_cr_slice_channel_hash *p) +{ + int mask = 0; + + if (!p->slice_0_mem_disabled) + mask |= p->sym_slice0_channel_enabled; + + if (!p->slice_1_disabled) + mask |= p->sym_slice1_channel_enabled << 2; + + if (p->ch_1_disabled || p->enable_pmi_dual_data_mode) + mask &= 0x5; + + return mask; +} + +static int gen_asym_mask(struct b_cr_slice_channel_hash *p, + struct b_cr_asym_mem_region0_mchbar *as0, + struct b_cr_asym_mem_region1_mchbar *as1, + struct b_cr_asym_2way_mem_region_mchbar *as2way) +{ + const int intlv[] = { 0x5, 0xA, 0x3, 0xC }; + int mask = 0; + + if (as2way->asym_2way_interleave_enable) + mask = intlv[as2way->asym_2way_intlv_mode]; + if (as0->slice0_asym_enable) + mask |= (1 << as0->slice0_asym_channel_select); + if (as1->slice1_asym_enable) + mask |= (4 << as1->slice1_asym_channel_select); + if (p->slice_0_mem_disabled) + mask &= 0xc; + if (p->slice_1_disabled) + mask &= 0x3; + if (p->ch_1_disabled || p->enable_pmi_dual_data_mode) + mask &= 0x5; + + return mask; +} + +static struct b_cr_tolud_pci tolud; +static struct b_cr_touud_lo_pci touud_lo; +static struct b_cr_touud_hi_pci touud_hi; +static struct b_cr_asym_mem_region0_mchbar asym0; +static struct b_cr_asym_mem_region1_mchbar asym1; +static struct b_cr_asym_2way_mem_region_mchbar asym_2way; +static struct b_cr_mot_out_base_mchbar mot_base; +static struct b_cr_mot_out_mask_mchbar mot_mask; +static struct b_cr_slice_channel_hash chash; + +/* Apollo Lake dunit */ +/* + * Validated on board with just two DIMMs in the [0] and [2] positions + * in this array. Other port number matches documentation, but caution + * advised. + */ +static const int apl_dports[APL_NUM_CHANNELS] = { 0x18, 0x10, 0x11, 0x19 }; +static struct d_cr_drp0 drp0[APL_NUM_CHANNELS]; + +/* Denverton dunit */ +static const int dnv_dports[DNV_NUM_CHANNELS] = { 0x10, 0x12 }; +static struct d_cr_dsch dsch; +static struct d_cr_ecc_ctrl ecc_ctrl[DNV_NUM_CHANNELS]; +static struct d_cr_drp drp[DNV_NUM_CHANNELS]; +static struct d_cr_dmap dmap[DNV_NUM_CHANNELS]; +static struct d_cr_dmap1 dmap1[DNV_NUM_CHANNELS]; +static struct d_cr_dmap2 dmap2[DNV_NUM_CHANNELS]; +static struct d_cr_dmap3 dmap3[DNV_NUM_CHANNELS]; +static struct d_cr_dmap4 dmap4[DNV_NUM_CHANNELS]; +static struct d_cr_dmap5 dmap5[DNV_NUM_CHANNELS]; + +static void apl_mk_region(char *name, struct region *rp, void *asym) +{ + struct b_cr_asym_mem_region0_mchbar *a = asym; + + mk_region(name, rp, + U64_LSHIFT(a->slice0_asym_base, APL_ASYMSHIFT), + U64_LSHIFT(a->slice0_asym_limit, APL_ASYMSHIFT) + + GENMASK_ULL(APL_ASYMSHIFT - 1, 0)); +} + +static void dnv_mk_region(char *name, struct region *rp, void *asym) +{ + struct b_cr_asym_mem_region_denverton *a = asym; + + mk_region(name, rp, + U64_LSHIFT(a->slice_asym_base, DNV_ASYMSHIFT), + U64_LSHIFT(a->slice_asym_limit, DNV_ASYMSHIFT) + + GENMASK_ULL(DNV_ASYMSHIFT - 1, 0)); +} + +static int apl_get_registers(void) +{ + int i; + + if (RD_REG(&asym_2way, b_cr_asym_2way_mem_region_mchbar)) + return -ENODEV; + + for (i = 0; i < APL_NUM_CHANNELS; i++) + if (RD_REGP(&drp0[i], d_cr_drp0, apl_dports[i])) + return -ENODEV; + + return 0; +} + +static int dnv_get_registers(void) +{ + int i; + + if (RD_REG(&dsch, d_cr_dsch)) + return -ENODEV; + + for (i = 0; i < DNV_NUM_CHANNELS; i++) + if (RD_REGP(&ecc_ctrl[i], d_cr_ecc_ctrl, dnv_dports[i]) || + RD_REGP(&drp[i], d_cr_drp, dnv_dports[i]) || + RD_REGP(&dmap[i], d_cr_dmap, dnv_dports[i]) || + RD_REGP(&dmap1[i], d_cr_dmap1, dnv_dports[i]) || + RD_REGP(&dmap2[i], d_cr_dmap2, dnv_dports[i]) || + RD_REGP(&dmap3[i], d_cr_dmap3, dnv_dports[i]) || + RD_REGP(&dmap4[i], d_cr_dmap4, dnv_dports[i]) || + RD_REGP(&dmap5[i], d_cr_dmap5, dnv_dports[i])) + return -ENODEV; + + return 0; +} + +/* + * Read all the h/w config registers once here (they don't + * change at run time. Figure out which address ranges have + * which interleave characteristics. + */ +static int get_registers(void) +{ + const int intlv[] = { 10, 11, 12, 12 }; + + if (RD_REG(&tolud, b_cr_tolud_pci) || + RD_REG(&touud_lo, b_cr_touud_lo_pci) || + RD_REG(&touud_hi, b_cr_touud_hi_pci) || + RD_REG(&asym0, b_cr_asym_mem_region0_mchbar) || + RD_REG(&asym1, b_cr_asym_mem_region1_mchbar) || + RD_REG(&mot_base, b_cr_mot_out_base_mchbar) || + RD_REG(&mot_mask, b_cr_mot_out_mask_mchbar) || + RD_REG(&chash, b_cr_slice_channel_hash)) + return -ENODEV; + + if (ops->get_registers()) + return -ENODEV; + + if (ops->type == DNV) { + /* PMI channel idx (always 0) for asymmetric region */ + asym0.slice0_asym_channel_select = 0; + asym1.slice1_asym_channel_select = 0; + /* PMI channel bitmap (always 1) for symmetric region */ + chash.sym_slice0_channel_enabled = 0x1; + chash.sym_slice1_channel_enabled = 0x1; + } + + if (asym0.slice0_asym_enable) + ops->mk_region("as0", &as0, &asym0); + + if (asym1.slice1_asym_enable) + ops->mk_region("as1", &as1, &asym1); + + if (asym_2way.asym_2way_interleave_enable) { + mk_region("as2way", &as2, + U64_LSHIFT(asym_2way.asym_2way_base, APL_ASYMSHIFT), + U64_LSHIFT(asym_2way.asym_2way_limit, APL_ASYMSHIFT) + + GENMASK_ULL(APL_ASYMSHIFT - 1, 0)); + } + + if (mot_base.imr_en) { + mk_region_mask("mot", &mot, + U64_LSHIFT(mot_base.mot_out_base, MOT_SHIFT), + U64_LSHIFT(mot_mask.mot_out_mask, MOT_SHIFT)); + } + + top_lm = U64_LSHIFT(tolud.tolud, 20); + top_hm = U64_LSHIFT(touud_hi.touud, 32) | U64_LSHIFT(touud_lo.touud, 20); + + two_slices = !chash.slice_1_disabled && + !chash.slice_0_mem_disabled && + (chash.sym_slice0_channel_enabled != 0) && + (chash.sym_slice1_channel_enabled != 0); + two_channels = !chash.ch_1_disabled && + !chash.enable_pmi_dual_data_mode && + ((chash.sym_slice0_channel_enabled == 3) || + (chash.sym_slice1_channel_enabled == 3)); + + sym_chan_mask = gen_sym_mask(&chash); + asym_chan_mask = gen_asym_mask(&chash, &asym0, &asym1, &asym_2way); + chan_mask = sym_chan_mask | asym_chan_mask; + + if (two_slices && !two_channels) { + if (chash.hvm_mode) + slice_selector = 29; + else + slice_selector = intlv[chash.interleave_mode]; + } else if (!two_slices && two_channels) { + if (chash.hvm_mode) + chan_selector = 29; + else + chan_selector = intlv[chash.interleave_mode]; + } else if (two_slices && two_channels) { + if (chash.hvm_mode) { + slice_selector = 29; + chan_selector = 30; + } else { + slice_selector = intlv[chash.interleave_mode]; + chan_selector = intlv[chash.interleave_mode] + 1; + } + } + + if (two_slices) { + if (!chash.hvm_mode) + slice_hash_mask = chash.slice_hash_mask << SLICE_HASH_MASK_LSB; + if (!two_channels) + slice_hash_mask |= BIT_ULL(slice_selector); + } + + if (two_channels) { + if (!chash.hvm_mode) + chan_hash_mask = chash.ch_hash_mask << CH_HASH_MASK_LSB; + if (!two_slices) + chan_hash_mask |= BIT_ULL(chan_selector); + } + + return 0; +} + +/* Get a contiguous memory address (remove the MMIO gap) */ +static u64 remove_mmio_gap(u64 sys) +{ + return (sys < _4GB) ? sys : sys - (_4GB - top_lm); +} + +/* Squeeze out one address bit, shift upper part down to fill gap */ +static void remove_addr_bit(u64 *addr, int bitidx) +{ + u64 mask; + + if (bitidx == -1) + return; + + mask = (1ull << bitidx) - 1; + *addr = ((*addr >> 1) & ~mask) | (*addr & mask); +} + +/* XOR all the bits from addr specified in mask */ +static int hash_by_mask(u64 addr, u64 mask) +{ + u64 result = addr & mask; + + result = (result >> 32) ^ result; + result = (result >> 16) ^ result; + result = (result >> 8) ^ result; + result = (result >> 4) ^ result; + result = (result >> 2) ^ result; + result = (result >> 1) ^ result; + + return (int)result & 1; +} + +/* + * First stage decode. Take the system address and figure out which + * second stage will deal with it based on interleave modes. + */ +static int sys2pmi(const u64 addr, u32 *pmiidx, u64 *pmiaddr, char *msg) +{ + u64 contig_addr, contig_base, contig_offset, contig_base_adj; + int mot_intlv_bit = two_slices ? MOT_CHAN_INTLV_BIT_2SLC_2CH : + MOT_CHAN_INTLV_BIT_1SLC_2CH; + int slice_intlv_bit_rm = SELECTOR_DISABLED; + int chan_intlv_bit_rm = SELECTOR_DISABLED; + /* Determine if address is in the MOT region. */ + bool mot_hit = in_region(&mot, addr); + /* Calculate the number of symmetric regions enabled. */ + int sym_channels = hweight8(sym_chan_mask); + + /* + * The amount we need to shift the asym base can be determined by the + * number of enabled symmetric channels. + * NOTE: This can only work because symmetric memory is not supposed + * to do a 3-way interleave. + */ + int sym_chan_shift = sym_channels >> 1; + + /* Give up if address is out of range, or in MMIO gap */ + if (addr >= (1ul << PND_MAX_PHYS_BIT) || + (addr >= top_lm && addr < _4GB) || addr >= top_hm) { + snprintf(msg, PND2_MSG_SIZE, "Error address 0x%llx is not DRAM", addr); + return -EINVAL; + } + + /* Get a contiguous memory address (remove the MMIO gap) */ + contig_addr = remove_mmio_gap(addr); + + if (in_region(&as0, addr)) { + *pmiidx = asym0.slice0_asym_channel_select; + + contig_base = remove_mmio_gap(as0.base); + contig_offset = contig_addr - contig_base; + contig_base_adj = (contig_base >> sym_chan_shift) * + ((chash.sym_slice0_channel_enabled >> (*pmiidx & 1)) & 1); + contig_addr = contig_offset + ((sym_channels > 0) ? contig_base_adj : 0ull); + } else if (in_region(&as1, addr)) { + *pmiidx = 2u + asym1.slice1_asym_channel_select; + + contig_base = remove_mmio_gap(as1.base); + contig_offset = contig_addr - contig_base; + contig_base_adj = (contig_base >> sym_chan_shift) * + ((chash.sym_slice1_channel_enabled >> (*pmiidx & 1)) & 1); + contig_addr = contig_offset + ((sym_channels > 0) ? contig_base_adj : 0ull); + } else if (in_region(&as2, addr) && (asym_2way.asym_2way_intlv_mode == 0x3ul)) { + bool channel1; + + mot_intlv_bit = MOT_CHAN_INTLV_BIT_1SLC_2CH; + *pmiidx = (asym_2way.asym_2way_intlv_mode & 1) << 1; + channel1 = mot_hit ? ((bool)((addr >> mot_intlv_bit) & 1)) : + hash_by_mask(contig_addr, chan_hash_mask); + *pmiidx |= (u32)channel1; + + contig_base = remove_mmio_gap(as2.base); + chan_intlv_bit_rm = mot_hit ? mot_intlv_bit : chan_selector; + contig_offset = contig_addr - contig_base; + remove_addr_bit(&contig_offset, chan_intlv_bit_rm); + contig_addr = (contig_base >> sym_chan_shift) + contig_offset; + } else { + /* Otherwise we're in normal, boring symmetric mode. */ + *pmiidx = 0u; + + if (two_slices) { + bool slice1; + + if (mot_hit) { + slice_intlv_bit_rm = MOT_SLC_INTLV_BIT; + slice1 = (addr >> MOT_SLC_INTLV_BIT) & 1; + } else { + slice_intlv_bit_rm = slice_selector; + slice1 = hash_by_mask(addr, slice_hash_mask); + } + + *pmiidx = (u32)slice1 << 1; + } + + if (two_channels) { + bool channel1; + + mot_intlv_bit = two_slices ? MOT_CHAN_INTLV_BIT_2SLC_2CH : + MOT_CHAN_INTLV_BIT_1SLC_2CH; + + if (mot_hit) { + chan_intlv_bit_rm = mot_intlv_bit; + channel1 = (addr >> mot_intlv_bit) & 1; + } else { + chan_intlv_bit_rm = chan_selector; + channel1 = hash_by_mask(contig_addr, chan_hash_mask); + } + + *pmiidx |= (u32)channel1; + } + } + + /* Remove the chan_selector bit first */ + remove_addr_bit(&contig_addr, chan_intlv_bit_rm); + /* Remove the slice bit (we remove it second because it must be lower */ + remove_addr_bit(&contig_addr, slice_intlv_bit_rm); + *pmiaddr = contig_addr; + + return 0; +} + +/* Translate PMI address to memory (rank, row, bank, column) */ +#define C(n) (0x10 | (n)) /* column */ +#define B(n) (0x20 | (n)) /* bank */ +#define R(n) (0x40 | (n)) /* row */ +#define RS (0x80) /* rank */ + +/* addrdec values */ +#define AMAP_1KB 0 +#define AMAP_2KB 1 +#define AMAP_4KB 2 +#define AMAP_RSVD 3 + +/* dden values */ +#define DEN_4Gb 0 +#define DEN_8Gb 2 + +/* dwid values */ +#define X8 0 +#define X16 1 + +static struct dimm_geometry { + u8 addrdec; + u8 dden; + u8 dwid; + u8 rowbits, colbits; + u16 bits[PMI_ADDRESS_WIDTH]; +} dimms[] = { + { + .addrdec = AMAP_1KB, .dden = DEN_4Gb, .dwid = X16, + .rowbits = 15, .colbits = 10, + .bits = { + C(2), C(3), C(4), C(5), C(6), B(0), B(1), B(2), R(0), + R(1), R(2), R(3), R(4), R(5), R(6), R(7), R(8), R(9), + R(10), C(7), C(8), C(9), R(11), RS, R(12), R(13), R(14), + 0, 0, 0, 0 + } + }, + { + .addrdec = AMAP_1KB, .dden = DEN_4Gb, .dwid = X8, + .rowbits = 16, .colbits = 10, + .bits = { + C(2), C(3), C(4), C(5), C(6), B(0), B(1), B(2), R(0), + R(1), R(2), R(3), R(4), R(5), R(6), R(7), R(8), R(9), + R(10), C(7), C(8), C(9), R(11), RS, R(12), R(13), R(14), + R(15), 0, 0, 0 + } + }, + { + .addrdec = AMAP_1KB, .dden = DEN_8Gb, .dwid = X16, + .rowbits = 16, .colbits = 10, + .bits = { + C(2), C(3), C(4), C(5), C(6), B(0), B(1), B(2), R(0), + R(1), R(2), R(3), R(4), R(5), R(6), R(7), R(8), R(9), + R(10), C(7), C(8), C(9), R(11), RS, R(12), R(13), R(14), + R(15), 0, 0, 0 + } + }, + { + .addrdec = AMAP_1KB, .dden = DEN_8Gb, .dwid = X8, + .rowbits = 16, .colbits = 11, + .bits = { + C(2), C(3), C(4), C(5), C(6), B(0), B(1), B(2), R(0), + R(1), R(2), R(3), R(4), R(5), R(6), R(7), R(8), R(9), + R(10), C(7), C(8), C(9), R(11), RS, C(11), R(12), R(13), + R(14), R(15), 0, 0 + } + }, + { + .addrdec = AMAP_2KB, .dden = DEN_4Gb, .dwid = X16, + .rowbits = 15, .colbits = 10, + .bits = { + C(2), C(3), C(4), C(5), C(6), C(7), B(0), B(1), B(2), + R(0), R(1), R(2), R(3), R(4), R(5), R(6), R(7), R(8), + R(9), R(10), C(8), C(9), R(11), RS, R(12), R(13), R(14), + 0, 0, 0, 0 + } + }, + { + .addrdec = AMAP_2KB, .dden = DEN_4Gb, .dwid = X8, + .rowbits = 16, .colbits = 10, + .bits = { + C(2), C(3), C(4), C(5), C(6), C(7), B(0), B(1), B(2), + R(0), R(1), R(2), R(3), R(4), R(5), R(6), R(7), R(8), + R(9), R(10), C(8), C(9), R(11), RS, R(12), R(13), R(14), + R(15), 0, 0, 0 + } + }, + { + .addrdec = AMAP_2KB, .dden = DEN_8Gb, .dwid = X16, + .rowbits = 16, .colbits = 10, + .bits = { + C(2), C(3), C(4), C(5), C(6), C(7), B(0), B(1), B(2), + R(0), R(1), R(2), R(3), R(4), R(5), R(6), R(7), R(8), + R(9), R(10), C(8), C(9), R(11), RS, R(12), R(13), R(14), + R(15), 0, 0, 0 + } + }, + { + .addrdec = AMAP_2KB, .dden = DEN_8Gb, .dwid = X8, + .rowbits = 16, .colbits = 11, + .bits = { + C(2), C(3), C(4), C(5), C(6), C(7), B(0), B(1), B(2), + R(0), R(1), R(2), R(3), R(4), R(5), R(6), R(7), R(8), + R(9), R(10), C(8), C(9), R(11), RS, C(11), R(12), R(13), + R(14), R(15), 0, 0 + } + }, + { + .addrdec = AMAP_4KB, .dden = DEN_4Gb, .dwid = X16, + .rowbits = 15, .colbits = 10, + .bits = { + C(2), C(3), C(4), C(5), C(6), C(7), C(8), B(0), B(1), + B(2), R(0), R(1), R(2), R(3), R(4), R(5), R(6), R(7), + R(8), R(9), R(10), C(9), R(11), RS, R(12), R(13), R(14), + 0, 0, 0, 0 + } + }, + { + .addrdec = AMAP_4KB, .dden = DEN_4Gb, .dwid = X8, + .rowbits = 16, .colbits = 10, + .bits = { + C(2), C(3), C(4), C(5), C(6), C(7), C(8), B(0), B(1), + B(2), R(0), R(1), R(2), R(3), R(4), R(5), R(6), R(7), + R(8), R(9), R(10), C(9), R(11), RS, R(12), R(13), R(14), + R(15), 0, 0, 0 + } + }, + { + .addrdec = AMAP_4KB, .dden = DEN_8Gb, .dwid = X16, + .rowbits = 16, .colbits = 10, + .bits = { + C(2), C(3), C(4), C(5), C(6), C(7), C(8), B(0), B(1), + B(2), R(0), R(1), R(2), R(3), R(4), R(5), R(6), R(7), + R(8), R(9), R(10), C(9), R(11), RS, R(12), R(13), R(14), + R(15), 0, 0, 0 + } + }, + { + .addrdec = AMAP_4KB, .dden = DEN_8Gb, .dwid = X8, + .rowbits = 16, .colbits = 11, + .bits = { + C(2), C(3), C(4), C(5), C(6), C(7), C(8), B(0), B(1), + B(2), R(0), R(1), R(2), R(3), R(4), R(5), R(6), R(7), + R(8), R(9), R(10), C(9), R(11), RS, C(11), R(12), R(13), + R(14), R(15), 0, 0 + } + } +}; + +static int bank_hash(u64 pmiaddr, int idx, int shft) +{ + int bhash = 0; + + switch (idx) { + case 0: + bhash ^= ((pmiaddr >> (12 + shft)) ^ (pmiaddr >> (9 + shft))) & 1; + break; + case 1: + bhash ^= (((pmiaddr >> (10 + shft)) ^ (pmiaddr >> (8 + shft))) & 1) << 1; + bhash ^= ((pmiaddr >> 22) & 1) << 1; + break; + case 2: + bhash ^= (((pmiaddr >> (13 + shft)) ^ (pmiaddr >> (11 + shft))) & 1) << 2; + break; + } + + return bhash; +} + +static int rank_hash(u64 pmiaddr) +{ + return ((pmiaddr >> 16) ^ (pmiaddr >> 10)) & 1; +} + +/* Second stage decode. Compute rank, bank, row & column. */ +static int apl_pmi2mem(struct mem_ctl_info *mci, u64 pmiaddr, u32 pmiidx, + struct dram_addr *daddr, char *msg) +{ + struct d_cr_drp0 *cr_drp0 = &drp0[pmiidx]; + struct pnd2_pvt *pvt = mci->pvt_info; + int g = pvt->dimm_geom[pmiidx]; + struct dimm_geometry *d = &dimms[g]; + int column = 0, bank = 0, row = 0, rank = 0; + int i, idx, type, skiprs = 0; + + for (i = 0; i < PMI_ADDRESS_WIDTH; i++) { + int bit = (pmiaddr >> i) & 1; + + if (i + skiprs >= PMI_ADDRESS_WIDTH) { + snprintf(msg, PND2_MSG_SIZE, "Bad dimm_geometry[] table\n"); + return -EINVAL; + } + + type = d->bits[i + skiprs] & ~0xf; + idx = d->bits[i + skiprs] & 0xf; + + /* + * On single rank DIMMs ignore the rank select bit + * and shift remainder of "bits[]" down one place. + */ + if (type == RS && (cr_drp0->rken0 + cr_drp0->rken1) == 1) { + skiprs = 1; + type = d->bits[i + skiprs] & ~0xf; + idx = d->bits[i + skiprs] & 0xf; + } + + switch (type) { + case C(0): + column |= (bit << idx); + break; + case B(0): + bank |= (bit << idx); + if (cr_drp0->bahen) + bank ^= bank_hash(pmiaddr, idx, d->addrdec); + break; + case R(0): + row |= (bit << idx); + break; + case RS: + rank = bit; + if (cr_drp0->rsien) + rank ^= rank_hash(pmiaddr); + break; + default: + if (bit) { + snprintf(msg, PND2_MSG_SIZE, "Bad translation\n"); + return -EINVAL; + } + goto done; + } + } + +done: + daddr->col = column; + daddr->bank = bank; + daddr->row = row; + daddr->rank = rank; + daddr->dimm = 0; + + return 0; +} + +/* Pluck bit "in" from pmiaddr and return value shifted to bit "out" */ +#define dnv_get_bit(pmi, in, out) ((int)(((pmi) >> (in)) & 1u) << (out)) + +static int dnv_pmi2mem(struct mem_ctl_info *mci, u64 pmiaddr, u32 pmiidx, + struct dram_addr *daddr, char *msg) +{ + /* Rank 0 or 1 */ + daddr->rank = dnv_get_bit(pmiaddr, dmap[pmiidx].rs0 + 13, 0); + /* Rank 2 or 3 */ + daddr->rank |= dnv_get_bit(pmiaddr, dmap[pmiidx].rs1 + 13, 1); + + /* + * Normally ranks 0,1 are DIMM0, and 2,3 are DIMM1, but we + * flip them if DIMM1 is larger than DIMM0. + */ + daddr->dimm = (daddr->rank >= 2) ^ drp[pmiidx].dimmflip; + + daddr->bank = dnv_get_bit(pmiaddr, dmap[pmiidx].ba0 + 6, 0); + daddr->bank |= dnv_get_bit(pmiaddr, dmap[pmiidx].ba1 + 6, 1); + daddr->bank |= dnv_get_bit(pmiaddr, dmap[pmiidx].bg0 + 6, 2); + if (dsch.ddr4en) + daddr->bank |= dnv_get_bit(pmiaddr, dmap[pmiidx].bg1 + 6, 3); + if (dmap1[pmiidx].bxor) { + if (dsch.ddr4en) { + daddr->bank ^= dnv_get_bit(pmiaddr, dmap3[pmiidx].row6 + 6, 0); + daddr->bank ^= dnv_get_bit(pmiaddr, dmap3[pmiidx].row7 + 6, 1); + if (dsch.chan_width == 0) + /* 64/72 bit dram channel width */ + daddr->bank ^= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca3 + 6, 2); + else + /* 32/40 bit dram channel width */ + daddr->bank ^= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca4 + 6, 2); + daddr->bank ^= dnv_get_bit(pmiaddr, dmap2[pmiidx].row2 + 6, 3); + } else { + daddr->bank ^= dnv_get_bit(pmiaddr, dmap2[pmiidx].row2 + 6, 0); + daddr->bank ^= dnv_get_bit(pmiaddr, dmap3[pmiidx].row6 + 6, 1); + if (dsch.chan_width == 0) + daddr->bank ^= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca3 + 6, 2); + else + daddr->bank ^= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca4 + 6, 2); + } + } + + daddr->row = dnv_get_bit(pmiaddr, dmap2[pmiidx].row0 + 6, 0); + daddr->row |= dnv_get_bit(pmiaddr, dmap2[pmiidx].row1 + 6, 1); + daddr->row |= dnv_get_bit(pmiaddr, dmap2[pmiidx].row2 + 6, 2); + daddr->row |= dnv_get_bit(pmiaddr, dmap2[pmiidx].row3 + 6, 3); + daddr->row |= dnv_get_bit(pmiaddr, dmap2[pmiidx].row4 + 6, 4); + daddr->row |= dnv_get_bit(pmiaddr, dmap2[pmiidx].row5 + 6, 5); + daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row6 + 6, 6); + daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row7 + 6, 7); + daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row8 + 6, 8); + daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row9 + 6, 9); + daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row10 + 6, 10); + daddr->row |= dnv_get_bit(pmiaddr, dmap3[pmiidx].row11 + 6, 11); + daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row12 + 6, 12); + daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row13 + 6, 13); + if (dmap4[pmiidx].row14 != 31) + daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row14 + 6, 14); + if (dmap4[pmiidx].row15 != 31) + daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row15 + 6, 15); + if (dmap4[pmiidx].row16 != 31) + daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row16 + 6, 16); + if (dmap4[pmiidx].row17 != 31) + daddr->row |= dnv_get_bit(pmiaddr, dmap4[pmiidx].row17 + 6, 17); + + daddr->col = dnv_get_bit(pmiaddr, dmap5[pmiidx].ca3 + 6, 3); + daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca4 + 6, 4); + daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca5 + 6, 5); + daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca6 + 6, 6); + daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca7 + 6, 7); + daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca8 + 6, 8); + daddr->col |= dnv_get_bit(pmiaddr, dmap5[pmiidx].ca9 + 6, 9); + if (!dsch.ddr4en && dmap1[pmiidx].ca11 != 0x3f) + daddr->col |= dnv_get_bit(pmiaddr, dmap1[pmiidx].ca11 + 13, 11); + + return 0; +} + +static int check_channel(int ch) +{ + if (drp0[ch].dramtype != 0) { + pnd2_printk(KERN_INFO, "Unsupported DIMM in channel %d\n", ch); + return 1; + } else if (drp0[ch].eccen == 0) { + pnd2_printk(KERN_INFO, "ECC disabled on channel %d\n", ch); + return 1; + } + return 0; +} + +static int apl_check_ecc_active(void) +{ + int i, ret = 0; + + /* Check dramtype and ECC mode for each present DIMM */ + for (i = 0; i < APL_NUM_CHANNELS; i++) + if (chan_mask & BIT(i)) + ret += check_channel(i); + return ret ? -EINVAL : 0; +} + +#define DIMMS_PRESENT(d) ((d)->rken0 + (d)->rken1 + (d)->rken2 + (d)->rken3) + +static int check_unit(int ch) +{ + struct d_cr_drp *d = &drp[ch]; + + if (DIMMS_PRESENT(d) && !ecc_ctrl[ch].eccen) { + pnd2_printk(KERN_INFO, "ECC disabled on channel %d\n", ch); + return 1; + } + return 0; +} + +static int dnv_check_ecc_active(void) +{ + int i, ret = 0; + + for (i = 0; i < DNV_NUM_CHANNELS; i++) + ret += check_unit(i); + return ret ? -EINVAL : 0; +} + +static int get_memory_error_data(struct mem_ctl_info *mci, u64 addr, + struct dram_addr *daddr, char *msg) +{ + u64 pmiaddr; + u32 pmiidx; + int ret; + + ret = sys2pmi(addr, &pmiidx, &pmiaddr, msg); + if (ret) + return ret; + + pmiaddr >>= ops->pmiaddr_shift; + /* pmi channel idx to dimm channel idx */ + pmiidx >>= ops->pmiidx_shift; + daddr->chan = pmiidx; + + ret = ops->pmi2mem(mci, pmiaddr, pmiidx, daddr, msg); + if (ret) + return ret; + + edac_dbg(0, "SysAddr=%llx PmiAddr=%llx Channel=%d DIMM=%d Rank=%d Bank=%d Row=%d Column=%d\n", + addr, pmiaddr, daddr->chan, daddr->dimm, daddr->rank, daddr->bank, daddr->row, daddr->col); + + return 0; +} + +static void pnd2_mce_output_error(struct mem_ctl_info *mci, const struct mce *m, + struct dram_addr *daddr) +{ + enum hw_event_mc_err_type tp_event; + char *optype, msg[PND2_MSG_SIZE]; + bool ripv = m->mcgstatus & MCG_STATUS_RIPV; + bool overflow = m->status & MCI_STATUS_OVER; + bool uc_err = m->status & MCI_STATUS_UC; + bool recov = m->status & MCI_STATUS_S; + u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52); + u32 mscod = GET_BITFIELD(m->status, 16, 31); + u32 errcode = GET_BITFIELD(m->status, 0, 15); + u32 optypenum = GET_BITFIELD(m->status, 4, 6); + int rc; + + tp_event = uc_err ? (ripv ? HW_EVENT_ERR_FATAL : HW_EVENT_ERR_UNCORRECTED) : + HW_EVENT_ERR_CORRECTED; + + /* + * According with Table 15-9 of the Intel Architecture spec vol 3A, + * memory errors should fit in this mask: + * 000f 0000 1mmm cccc (binary) + * where: + * f = Correction Report Filtering Bit. If 1, subsequent errors + * won't be shown + * mmm = error type + * cccc = channel + * If the mask doesn't match, report an error to the parsing logic + */ + if (!((errcode & 0xef80) == 0x80)) { + optype = "Can't parse: it is not a mem"; + } else { + switch (optypenum) { + case 0: + optype = "generic undef request error"; + break; + case 1: + optype = "memory read error"; + break; + case 2: + optype = "memory write error"; + break; + case 3: + optype = "addr/cmd error"; + break; + case 4: + optype = "memory scrubbing error"; + break; + default: + optype = "reserved"; + break; + } + } + + /* Only decode errors with an valid address (ADDRV) */ + if (!(m->status & MCI_STATUS_ADDRV)) + return; + + rc = get_memory_error_data(mci, m->addr, daddr, msg); + if (rc) + goto address_error; + + snprintf(msg, sizeof(msg), + "%s%s err_code:%04x:%04x channel:%d DIMM:%d rank:%d row:%d bank:%d col:%d", + overflow ? " OVERFLOW" : "", (uc_err && recov) ? " recoverable" : "", mscod, + errcode, daddr->chan, daddr->dimm, daddr->rank, daddr->row, daddr->bank, daddr->col); + + edac_dbg(0, "%s\n", msg); + + /* Call the helper to output message */ + edac_mc_handle_error(tp_event, mci, core_err_cnt, m->addr >> PAGE_SHIFT, + m->addr & ~PAGE_MASK, 0, daddr->chan, 0, -1, optype, msg); + + return; + +address_error: + edac_mc_handle_error(tp_event, mci, core_err_cnt, 0, 0, 0, -1, -1, -1, msg, ""); +} + +static void apl_get_dimm_config(struct mem_ctl_info *mci) +{ + struct pnd2_pvt *pvt = mci->pvt_info; + struct dimm_info *dimm; + struct d_cr_drp0 *d; + u64 capacity; + int i, g; + + for (i = 0; i < APL_NUM_CHANNELS; i++) { + if (!(chan_mask & BIT(i))) + continue; + + dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, i, 0, 0); + if (!dimm) { + edac_dbg(0, "No allocated DIMM for channel %d\n", i); + continue; + } + + d = &drp0[i]; + for (g = 0; g < ARRAY_SIZE(dimms); g++) + if (dimms[g].addrdec == d->addrdec && + dimms[g].dden == d->dden && + dimms[g].dwid == d->dwid) + break; + + if (g == ARRAY_SIZE(dimms)) { + edac_dbg(0, "Channel %d: unrecognized DIMM\n", i); + continue; + } + + pvt->dimm_geom[i] = g; + capacity = (d->rken0 + d->rken1) * 8 * (1ul << dimms[g].rowbits) * + (1ul << dimms[g].colbits); + edac_dbg(0, "Channel %d: %lld MByte DIMM\n", i, capacity >> (20 - 3)); + dimm->nr_pages = MiB_TO_PAGES(capacity >> (20 - 3)); + dimm->grain = 32; + dimm->dtype = (d->dwid == 0) ? DEV_X8 : DEV_X16; + dimm->mtype = MEM_DDR3; + dimm->edac_mode = EDAC_SECDED; + snprintf(dimm->label, sizeof(dimm->label), "Slice#%d_Chan#%d", i / 2, i % 2); + } +} + +static const int dnv_dtypes[] = { + DEV_X8, DEV_X4, DEV_X16, DEV_UNKNOWN +}; + +static void dnv_get_dimm_config(struct mem_ctl_info *mci) +{ + int i, j, ranks_of_dimm[DNV_MAX_DIMMS], banks, rowbits, colbits, memtype; + struct dimm_info *dimm; + struct d_cr_drp *d; + u64 capacity; + + if (dsch.ddr4en) { + memtype = MEM_DDR4; + banks = 16; + colbits = 10; + } else { + memtype = MEM_DDR3; + banks = 8; + } + + for (i = 0; i < DNV_NUM_CHANNELS; i++) { + if (dmap4[i].row14 == 31) + rowbits = 14; + else if (dmap4[i].row15 == 31) + rowbits = 15; + else if (dmap4[i].row16 == 31) + rowbits = 16; + else if (dmap4[i].row17 == 31) + rowbits = 17; + else + rowbits = 18; + + if (memtype == MEM_DDR3) { + if (dmap1[i].ca11 != 0x3f) + colbits = 12; + else + colbits = 10; + } + + d = &drp[i]; + /* DIMM0 is present if rank0 and/or rank1 is enabled */ + ranks_of_dimm[0] = d->rken0 + d->rken1; + /* DIMM1 is present if rank2 and/or rank3 is enabled */ + ranks_of_dimm[1] = d->rken2 + d->rken3; + + for (j = 0; j < DNV_MAX_DIMMS; j++) { + if (!ranks_of_dimm[j]) + continue; + + dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, i, j, 0); + if (!dimm) { + edac_dbg(0, "No allocated DIMM for channel %d DIMM %d\n", i, j); + continue; + } + + capacity = ranks_of_dimm[j] * banks * (1ul << rowbits) * (1ul << colbits); + edac_dbg(0, "Channel %d DIMM %d: %lld MByte DIMM\n", i, j, capacity >> (20 - 3)); + dimm->nr_pages = MiB_TO_PAGES(capacity >> (20 - 3)); + dimm->grain = 32; + dimm->dtype = dnv_dtypes[j ? d->dimmdwid0 : d->dimmdwid1]; + dimm->mtype = memtype; + dimm->edac_mode = EDAC_SECDED; + snprintf(dimm->label, sizeof(dimm->label), "Chan#%d_DIMM#%d", i, j); + } + } +} + +static int pnd2_register_mci(struct mem_ctl_info **ppmci) +{ + struct edac_mc_layer layers[2]; + struct mem_ctl_info *mci; + struct pnd2_pvt *pvt; + int rc; + + rc = ops->check_ecc(); + if (rc < 0) + return rc; + + /* Allocate a new MC control structure */ + layers[0].type = EDAC_MC_LAYER_CHANNEL; + layers[0].size = ops->channels; + layers[0].is_virt_csrow = false; + layers[1].type = EDAC_MC_LAYER_SLOT; + layers[1].size = ops->dimms_per_channel; + layers[1].is_virt_csrow = true; + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt)); + if (!mci) + return -ENOMEM; + + pvt = mci->pvt_info; + memset(pvt, 0, sizeof(*pvt)); + + mci->mod_name = "pnd2_edac.c"; + mci->dev_name = ops->name; + mci->ctl_name = "Pondicherry2"; + + /* Get dimm basic config and the memory layout */ + ops->get_dimm_config(mci); + + if (edac_mc_add_mc(mci)) { + edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); + edac_mc_free(mci); + return -EINVAL; + } + + *ppmci = mci; + + return 0; +} + +static void pnd2_unregister_mci(struct mem_ctl_info *mci) +{ + if (unlikely(!mci || !mci->pvt_info)) { + pnd2_printk(KERN_ERR, "Couldn't find mci handler\n"); + return; + } + + /* Remove MC sysfs nodes */ + edac_mc_del_mc(NULL); + edac_dbg(1, "%s: free mci struct\n", mci->ctl_name); + edac_mc_free(mci); +} + +/* + * Callback function registered with core kernel mce code. + * Called once for each logged error. + */ +static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, void *data) +{ + struct mce *mce = (struct mce *)data; + struct mem_ctl_info *mci; + struct dram_addr daddr; + char *type; + + if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + return NOTIFY_DONE; + + mci = pnd2_mci; + if (!mci) + return NOTIFY_DONE; + + /* + * Just let mcelog handle it if the error is + * outside the memory controller. A memory error + * is indicated by bit 7 = 1 and bits = 8-11,13-15 = 0. + * bit 12 has an special meaning. + */ + if ((mce->status & 0xefff) >> 7 != 1) + return NOTIFY_DONE; + + if (mce->mcgstatus & MCG_STATUS_MCIP) + type = "Exception"; + else + type = "Event"; + + pnd2_mc_printk(mci, KERN_INFO, "HANDLING MCE MEMORY ERROR\n"); + pnd2_mc_printk(mci, KERN_INFO, "CPU %u: Machine Check %s: %llx Bank %u: %llx\n", + mce->extcpu, type, mce->mcgstatus, mce->bank, mce->status); + pnd2_mc_printk(mci, KERN_INFO, "TSC %llx ", mce->tsc); + pnd2_mc_printk(mci, KERN_INFO, "ADDR %llx ", mce->addr); + pnd2_mc_printk(mci, KERN_INFO, "MISC %llx ", mce->misc); + pnd2_mc_printk(mci, KERN_INFO, "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", + mce->cpuvendor, mce->cpuid, mce->time, mce->socketid, mce->apicid); + + pnd2_mce_output_error(mci, mce, &daddr); + + /* Advice mcelog that the error were handled */ + return NOTIFY_STOP; +} + +static struct notifier_block pnd2_mce_dec = { + .notifier_call = pnd2_mce_check_error, +}; + +#ifdef CONFIG_EDAC_DEBUG +/* + * Write an address to this file to exercise the address decode + * logic in this driver. + */ +static u64 pnd2_fake_addr; +#define PND2_BLOB_SIZE 1024 +static char pnd2_result[PND2_BLOB_SIZE]; +static struct dentry *pnd2_test; +static struct debugfs_blob_wrapper pnd2_blob = { + .data = pnd2_result, + .size = 0 +}; + +static int debugfs_u64_set(void *data, u64 val) +{ + struct dram_addr daddr; + struct mce m; + + *(u64 *)data = val; + m.mcgstatus = 0; + /* ADDRV + MemRd + Unknown channel */ + m.status = MCI_STATUS_ADDRV + 0x9f; + m.addr = val; + pnd2_mce_output_error(pnd2_mci, &m, &daddr); + snprintf(pnd2_blob.data, PND2_BLOB_SIZE, + "SysAddr=%llx Channel=%d DIMM=%d Rank=%d Bank=%d Row=%d Column=%d\n", + m.addr, daddr.chan, daddr.dimm, daddr.rank, daddr.bank, daddr.row, daddr.col); + pnd2_blob.size = strlen(pnd2_blob.data); + + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); + +static void setup_pnd2_debug(void) +{ + pnd2_test = edac_debugfs_create_dir("pnd2_test"); + edac_debugfs_create_file("pnd2_debug_addr", 0200, pnd2_test, + &pnd2_fake_addr, &fops_u64_wo); + debugfs_create_blob("pnd2_debug_results", 0400, pnd2_test, &pnd2_blob); +} + +static void teardown_pnd2_debug(void) +{ + debugfs_remove_recursive(pnd2_test); +} +#endif + +static int pnd2_probe(void) +{ + int rc; + + edac_dbg(2, "\n"); + rc = get_registers(); + if (rc) + return rc; + + return pnd2_register_mci(&pnd2_mci); +} + +static void pnd2_remove(void) +{ + edac_dbg(0, "\n"); + pnd2_unregister_mci(pnd2_mci); +} + +static struct dunit_ops apl_ops = { + .name = "pnd2/apl", + .type = APL, + .pmiaddr_shift = LOG2_PMI_ADDR_GRANULARITY, + .pmiidx_shift = 0, + .channels = APL_NUM_CHANNELS, + .dimms_per_channel = 1, + .rd_reg = apl_rd_reg, + .get_registers = apl_get_registers, + .check_ecc = apl_check_ecc_active, + .mk_region = apl_mk_region, + .get_dimm_config = apl_get_dimm_config, + .pmi2mem = apl_pmi2mem, +}; + +static struct dunit_ops dnv_ops = { + .name = "pnd2/dnv", + .type = DNV, + .pmiaddr_shift = 0, + .pmiidx_shift = 1, + .channels = DNV_NUM_CHANNELS, + .dimms_per_channel = 2, + .rd_reg = dnv_rd_reg, + .get_registers = dnv_get_registers, + .check_ecc = dnv_check_ecc_active, + .mk_region = dnv_mk_region, + .get_dimm_config = dnv_get_dimm_config, + .pmi2mem = dnv_pmi2mem, +}; + +static const struct x86_cpu_id pnd2_cpuids[] = { + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT, 0, (kernel_ulong_t)&apl_ops }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_DENVERTON, 0, (kernel_ulong_t)&dnv_ops }, + { } +}; +MODULE_DEVICE_TABLE(x86cpu, pnd2_cpuids); + +static int __init pnd2_init(void) +{ + const struct x86_cpu_id *id; + int rc; + + edac_dbg(2, "\n"); + + id = x86_match_cpu(pnd2_cpuids); + if (!id) + return -ENODEV; + + ops = (struct dunit_ops *)id->driver_data; + + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + + rc = pnd2_probe(); + if (rc < 0) { + pnd2_printk(KERN_ERR, "Failed to register device with error %d.\n", rc); + return rc; + } + + if (!pnd2_mci) + return -ENODEV; + + mce_register_decode_chain(&pnd2_mce_dec); + setup_pnd2_debug(); + + return 0; +} + +static void __exit pnd2_exit(void) +{ + edac_dbg(2, "\n"); + teardown_pnd2_debug(); + mce_unregister_decode_chain(&pnd2_mce_dec); + pnd2_remove(); +} + +module_init(pnd2_init); +module_exit(pnd2_exit); + +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Tony Luck"); +MODULE_DESCRIPTION("MC Driver for Intel SoC using Pondicherry memory controller"); diff --git a/drivers/edac/pnd2_edac.h b/drivers/edac/pnd2_edac.h new file mode 100644 index 000000000000..61b6e79492bb --- /dev/null +++ b/drivers/edac/pnd2_edac.h @@ -0,0 +1,301 @@ +/* + * Register bitfield descriptions for Pondicherry2 memory controller. + * + * Copyright (c) 2016, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _PND2_REGS_H +#define _PND2_REGS_H + +struct b_cr_touud_lo_pci { + u32 lock : 1; + u32 reserved_1 : 19; + u32 touud : 12; +}; + +#define b_cr_touud_lo_pci_port 0x4c +#define b_cr_touud_lo_pci_offset 0xa8 +#define b_cr_touud_lo_pci_r_opcode 0x04 + +struct b_cr_touud_hi_pci { + u32 touud : 7; + u32 reserved_0 : 25; +}; + +#define b_cr_touud_hi_pci_port 0x4c +#define b_cr_touud_hi_pci_offset 0xac +#define b_cr_touud_hi_pci_r_opcode 0x04 + +struct b_cr_tolud_pci { + u32 lock : 1; + u32 reserved_0 : 19; + u32 tolud : 12; +}; + +#define b_cr_tolud_pci_port 0x4c +#define b_cr_tolud_pci_offset 0xbc +#define b_cr_tolud_pci_r_opcode 0x04 + +struct b_cr_mchbar_lo_pci { + u32 enable : 1; + u32 pad_3_1 : 3; + u32 pad_14_4: 11; + u32 base: 17; +}; + +struct b_cr_mchbar_hi_pci { + u32 base : 7; + u32 pad_31_7 : 25; +}; + +/* Symmetric region */ +struct b_cr_slice_channel_hash { + u64 slice_1_disabled : 1; + u64 hvm_mode : 1; + u64 interleave_mode : 2; + u64 slice_0_mem_disabled : 1; + u64 reserved_0 : 1; + u64 slice_hash_mask : 14; + u64 reserved_1 : 11; + u64 enable_pmi_dual_data_mode : 1; + u64 ch_1_disabled : 1; + u64 reserved_2 : 1; + u64 sym_slice0_channel_enabled : 2; + u64 sym_slice1_channel_enabled : 2; + u64 ch_hash_mask : 14; + u64 reserved_3 : 11; + u64 lock : 1; +}; + +#define b_cr_slice_channel_hash_port 0x4c +#define b_cr_slice_channel_hash_offset 0x4c58 +#define b_cr_slice_channel_hash_r_opcode 0x06 + +struct b_cr_mot_out_base_mchbar { + u32 reserved_0 : 14; + u32 mot_out_base : 15; + u32 reserved_1 : 1; + u32 tr_en : 1; + u32 imr_en : 1; +}; + +#define b_cr_mot_out_base_mchbar_port 0x4c +#define b_cr_mot_out_base_mchbar_offset 0x6af0 +#define b_cr_mot_out_base_mchbar_r_opcode 0x00 + +struct b_cr_mot_out_mask_mchbar { + u32 reserved_0 : 14; + u32 mot_out_mask : 15; + u32 reserved_1 : 1; + u32 ia_iwb_en : 1; + u32 gt_iwb_en : 1; +}; + +#define b_cr_mot_out_mask_mchbar_port 0x4c +#define b_cr_mot_out_mask_mchbar_offset 0x6af4 +#define b_cr_mot_out_mask_mchbar_r_opcode 0x00 + +struct b_cr_asym_mem_region0_mchbar { + u32 pad : 4; + u32 slice0_asym_base : 11; + u32 pad_18_15 : 4; + u32 slice0_asym_limit : 11; + u32 slice0_asym_channel_select : 1; + u32 slice0_asym_enable : 1; +}; + +#define b_cr_asym_mem_region0_mchbar_port 0x4c +#define b_cr_asym_mem_region0_mchbar_offset 0x6e40 +#define b_cr_asym_mem_region0_mchbar_r_opcode 0x00 + +struct b_cr_asym_mem_region1_mchbar { + u32 pad : 4; + u32 slice1_asym_base : 11; + u32 pad_18_15 : 4; + u32 slice1_asym_limit : 11; + u32 slice1_asym_channel_select : 1; + u32 slice1_asym_enable : 1; +}; + +#define b_cr_asym_mem_region1_mchbar_port 0x4c +#define b_cr_asym_mem_region1_mchbar_offset 0x6e44 +#define b_cr_asym_mem_region1_mchbar_r_opcode 0x00 + +/* Some bit fields moved in above two structs on Denverton */ +struct b_cr_asym_mem_region_denverton { + u32 pad : 4; + u32 slice_asym_base : 8; + u32 pad_19_12 : 8; + u32 slice_asym_limit : 8; + u32 pad_28_30 : 3; + u32 slice_asym_enable : 1; +}; + +struct b_cr_asym_2way_mem_region_mchbar { + u32 pad : 2; + u32 asym_2way_intlv_mode : 2; + u32 asym_2way_base : 11; + u32 pad_16_15 : 2; + u32 asym_2way_limit : 11; + u32 pad_30_28 : 3; + u32 asym_2way_interleave_enable : 1; +}; + +#define b_cr_asym_2way_mem_region_mchbar_port 0x4c +#define b_cr_asym_2way_mem_region_mchbar_offset 0x6e50 +#define b_cr_asym_2way_mem_region_mchbar_r_opcode 0x00 + +/* Apollo Lake d-unit */ + +struct d_cr_drp0 { + u32 rken0 : 1; + u32 rken1 : 1; + u32 ddmen : 1; + u32 rsvd3 : 1; + u32 dwid : 2; + u32 dden : 3; + u32 rsvd13_9 : 5; + u32 rsien : 1; + u32 bahen : 1; + u32 rsvd18_16 : 3; + u32 caswizzle : 2; + u32 eccen : 1; + u32 dramtype : 3; + u32 blmode : 3; + u32 addrdec : 2; + u32 dramdevice_pr : 2; +}; + +#define d_cr_drp0_offset 0x1400 +#define d_cr_drp0_r_opcode 0x00 + +/* Denverton d-unit */ + +struct d_cr_dsch { + u32 ch0en : 1; + u32 ch1en : 1; + u32 ddr4en : 1; + u32 coldwake : 1; + u32 newbypdis : 1; + u32 chan_width : 1; + u32 rsvd6_6 : 1; + u32 ooodis : 1; + u32 rsvd18_8 : 11; + u32 ic : 1; + u32 rsvd31_20 : 12; +}; + +#define d_cr_dsch_port 0x16 +#define d_cr_dsch_offset 0x0 +#define d_cr_dsch_r_opcode 0x0 + +struct d_cr_ecc_ctrl { + u32 eccen : 1; + u32 rsvd31_1 : 31; +}; + +#define d_cr_ecc_ctrl_offset 0x180 +#define d_cr_ecc_ctrl_r_opcode 0x0 + +struct d_cr_drp { + u32 rken0 : 1; + u32 rken1 : 1; + u32 rken2 : 1; + u32 rken3 : 1; + u32 dimmdwid0 : 2; + u32 dimmdden0 : 2; + u32 dimmdwid1 : 2; + u32 dimmdden1 : 2; + u32 rsvd15_12 : 4; + u32 dimmflip : 1; + u32 rsvd31_17 : 15; +}; + +#define d_cr_drp_offset 0x158 +#define d_cr_drp_r_opcode 0x0 + +struct d_cr_dmap { + u32 ba0 : 5; + u32 ba1 : 5; + u32 bg0 : 5; /* if ddr3, ba2 = bg0 */ + u32 bg1 : 5; /* if ddr3, ba3 = bg1 */ + u32 rs0 : 5; + u32 rs1 : 5; + u32 rsvd : 2; +}; + +#define d_cr_dmap_offset 0x174 +#define d_cr_dmap_r_opcode 0x0 + +struct d_cr_dmap1 { + u32 ca11 : 6; + u32 bxor : 1; + u32 rsvd : 25; +}; + +#define d_cr_dmap1_offset 0xb4 +#define d_cr_dmap1_r_opcode 0x0 + +struct d_cr_dmap2 { + u32 row0 : 5; + u32 row1 : 5; + u32 row2 : 5; + u32 row3 : 5; + u32 row4 : 5; + u32 row5 : 5; + u32 rsvd : 2; +}; + +#define d_cr_dmap2_offset 0x148 +#define d_cr_dmap2_r_opcode 0x0 + +struct d_cr_dmap3 { + u32 row6 : 5; + u32 row7 : 5; + u32 row8 : 5; + u32 row9 : 5; + u32 row10 : 5; + u32 row11 : 5; + u32 rsvd : 2; +}; + +#define d_cr_dmap3_offset 0x14c +#define d_cr_dmap3_r_opcode 0x0 + +struct d_cr_dmap4 { + u32 row12 : 5; + u32 row13 : 5; + u32 row14 : 5; + u32 row15 : 5; + u32 row16 : 5; + u32 row17 : 5; + u32 rsvd : 2; +}; + +#define d_cr_dmap4_offset 0x150 +#define d_cr_dmap4_r_opcode 0x0 + +struct d_cr_dmap5 { + u32 ca3 : 4; + u32 ca4 : 4; + u32 ca5 : 4; + u32 ca6 : 4; + u32 ca7 : 4; + u32 ca8 : 4; + u32 ca9 : 4; + u32 rsvd : 4; +}; + +#define d_cr_dmap5_offset 0x154 +#define d_cr_dmap5_r_opcode 0x0 + +#endif /* _PND2_REGS_H */ -- cgit v1.2.3 From 40ceda09c8c84694c2ca6b00bcc6dc71e8e62d96 Mon Sep 17 00:00:00 2001 From: yong mao Date: Sat, 4 Mar 2017 15:10:03 +0800 Subject: mmc: mediatek: Fixed bug where clock frequency could be set wrong This patch can fix two issues: Issue 1: In previous code, div may be overflow when setting clock frequency as f_min. We can use DIV_ROUND_UP to fix this boundary related issue. Issue 2: In previous code, we can not set the correct clock frequency when div equals 0xff. Signed-off-by: Yong Mao Signed-off-by: Chaotian Jing Reviewed-by: Daniel Kurtz Signed-off-by: Ulf Hansson --- drivers/mmc/host/mtk-sd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index 8e32580c12b5..b235d8da0602 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -580,7 +580,7 @@ static void msdc_set_mclk(struct msdc_host *host, unsigned char timing, u32 hz) } } sdr_set_field(host->base + MSDC_CFG, MSDC_CFG_CKMOD | MSDC_CFG_CKDIV, - (mode << 8) | (div % 0xff)); + (mode << 8) | div); sdr_set_bits(host->base + MSDC_CFG, MSDC_CFG_CKPDN); while (!(readl(host->base + MSDC_CFG) & MSDC_CFG_CKSTB)) cpu_relax(); @@ -1559,7 +1559,7 @@ static int msdc_drv_probe(struct platform_device *pdev) host->src_clk_freq = clk_get_rate(host->src_clk); /* Set host parameters to mmc */ mmc->ops = &mt_msdc_ops; - mmc->f_min = host->src_clk_freq / (4 * 255); + mmc->f_min = DIV_ROUND_UP(host->src_clk_freq, 4 * 255); mmc->caps |= MMC_CAP_ERASE | MMC_CAP_CMD23; /* MMC core transfer sizes tunable parameters */ -- cgit v1.2.3 From e552a8389aa409e257b7dcba74f67f128f979ccc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Mar 2017 13:47:48 +0100 Subject: perf/core: Fix use-after-free in perf_release() Dmitry reported syzcaller tripped a use-after-free in perf_release(). After much puzzlement Oleg spotted the below scenario: Task1 Task2 fork() perf_event_init_task() /* ... */ goto bad_fork_$foo; /* ... */ perf_event_free_task() mutex_lock(ctx->lock) perf_free_event(B) perf_event_release_kernel(A) mutex_lock(A->child_mutex) list_for_each_entry(child, ...) { /* child == B */ ctx = B->ctx; get_ctx(ctx); mutex_unlock(A->child_mutex); mutex_lock(A->child_mutex) list_del_init(B->child_list) mutex_unlock(A->child_mutex) /* ... */ mutex_unlock(ctx->lock); put_ctx() /* >0 */ free_task(); mutex_lock(ctx->lock); mutex_lock(A->child_mutex); /* ... */ mutex_unlock(A->child_mutex); mutex_unlock(ctx->lock) put_ctx() /* 0 */ ctx->task && !TOMBSTONE put_task_struct() /* UAF */ This patch closes the hole by making perf_event_free_task() destroy the task <-> ctx relation such that perf_event_release_kernel() will no longer observe the now dead task. Spotted-by: Oleg Nesterov Reported-by: Dmitry Vyukov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: fweisbec@gmail.com Cc: oleg@redhat.com Cc: stable@vger.kernel.org Fixes: c6e5b73242d2 ("perf: Synchronously clean up child events") Link: http://lkml.kernel.org/r/20170314155949.GE32474@worktop Link: http://lkml.kernel.org/r/20170316125823.140295131@infradead.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 1031bdf9f012..4742909c56e6 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10415,6 +10415,17 @@ void perf_event_free_task(struct task_struct *task) continue; mutex_lock(&ctx->mutex); + raw_spin_lock_irq(&ctx->lock); + /* + * Destroy the task <-> ctx relation and mark the context dead. + * + * This is important because even though the task hasn't been + * exposed yet the context has been (through child_list). + */ + RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], NULL); + WRITE_ONCE(ctx->task, TASK_TOMBSTONE); + put_task_struct(task); /* cannot be last */ + raw_spin_unlock_irq(&ctx->lock); again: list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) -- cgit v1.2.3 From e7cc4865f0f31698ef2f7aac01a50e78968985b7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Mar 2017 13:47:49 +0100 Subject: perf/core: Fix event inheritance on fork() While hunting for clues to a use-after-free, Oleg spotted that perf_event_init_context() can loose an error value with the result that fork() can succeed even though we did not fully inherit the perf event context. Spotted-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Dmitry Vyukov Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: oleg@redhat.com Cc: stable@vger.kernel.org Fixes: 889ff0150661 ("perf/core: Split context's event group list into pinned and non-pinned lists") Link: http://lkml.kernel.org/r/20170316125823.190342547@infradead.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 4742909c56e6..fc7c9a85944d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10679,7 +10679,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn) ret = inherit_task_group(event, parent, parent_ctx, child, ctxn, &inherited_all); if (ret) - break; + goto out_unlock; } /* @@ -10695,7 +10695,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn) ret = inherit_task_group(event, parent, parent_ctx, child, ctxn, &inherited_all); if (ret) - break; + goto out_unlock; } raw_spin_lock_irqsave(&parent_ctx->lock, flags); @@ -10723,6 +10723,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn) } raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); +out_unlock: mutex_unlock(&parent_ctx->mutex); perf_unpin_context(parent_ctx); -- cgit v1.2.3 From 15121c789e001168decac6483d192bdb7ea29e74 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Mar 2017 13:47:50 +0100 Subject: perf/core: Simplify perf_event_free_task() We have ctx->event_list that contains all events; no need to repeatedly iterate the group lists to find them all. Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Dmitry Vyukov Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mathieu Desnoyers Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: fweisbec@gmail.com Link: http://lkml.kernel.org/r/20170316125823.239678244@infradead.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index fc7c9a85944d..5f21e5e09ba4 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10426,21 +10426,11 @@ void perf_event_free_task(struct task_struct *task) WRITE_ONCE(ctx->task, TASK_TOMBSTONE); put_task_struct(task); /* cannot be last */ raw_spin_unlock_irq(&ctx->lock); -again: - list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, - group_entry) - perf_free_event(event, ctx); - list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, - group_entry) + list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry) perf_free_event(event, ctx); - if (!list_empty(&ctx->pinned_groups) || - !list_empty(&ctx->flexible_groups)) - goto again; - mutex_unlock(&ctx->mutex); - put_ctx(ctx); } } -- cgit v1.2.3 From d8a8cfc76919b6c830305266b23ba671623f37ff Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Mar 2017 13:47:51 +0100 Subject: perf/core: Better explain the inherit magic While going through the event inheritance code Oleg got confused. Add some comments to better explain the silent dissapearance of orphaned events. So what happens is that at perf_event_release_kernel() time; when an event looses its connection to userspace (and ceases to exist from the user's perspective) we can still have an arbitrary amount of inherited copies of the event. We want to synchronously find and remove all these child events. Since that requires a bit of lock juggling, there is the possibility that concurrent clone()s will create new child events. Therefore we first mark the parent event as DEAD, which marks all the extant child events as orphaned. We then avoid copying orphaned events; in order to avoid getting more of them. Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Dmitry Vyukov Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mathieu Desnoyers Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: fweisbec@gmail.com Link: http://lkml.kernel.org/r/20170316125823.289567442@infradead.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 5f21e5e09ba4..7298e149b732 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4254,7 +4254,7 @@ int perf_event_release_kernel(struct perf_event *event) raw_spin_lock_irq(&ctx->lock); /* - * Mark this even as STATE_DEAD, there is no external reference to it + * Mark this event as STATE_DEAD, there is no external reference to it * anymore. * * Anybody acquiring event->child_mutex after the below loop _must_ @@ -10468,7 +10468,12 @@ const struct perf_event_attr *perf_event_attrs(struct perf_event *event) } /* - * inherit a event from parent task to child task: + * Inherit a event from parent task to child task. + * + * Returns: + * - valid pointer on success + * - NULL for orphaned events + * - IS_ERR() on error */ static struct perf_event * inherit_event(struct perf_event *parent_event, @@ -10562,6 +10567,16 @@ inherit_event(struct perf_event *parent_event, return child_event; } +/* + * Inherits an event group. + * + * This will quietly suppress orphaned events; !inherit_event() is not an error. + * This matches with perf_event_release_kernel() removing all child events. + * + * Returns: + * - 0 on success + * - <0 on error + */ static int inherit_group(struct perf_event *parent_event, struct task_struct *parent, struct perf_event_context *parent_ctx, @@ -10576,6 +10591,11 @@ static int inherit_group(struct perf_event *parent_event, child, NULL, child_ctx); if (IS_ERR(leader)) return PTR_ERR(leader); + /* + * @leader can be NULL here because of is_orphaned_event(). In this + * case inherit_event() will create individual events, similar to what + * perf_group_detach() would do anyway. + */ list_for_each_entry(sub, &parent_event->sibling_list, group_entry) { child_ctr = inherit_event(sub, parent, parent_ctx, child, leader, child_ctx); @@ -10585,6 +10605,17 @@ static int inherit_group(struct perf_event *parent_event, return 0; } +/* + * Creates the child task context and tries to inherit the event-group. + * + * Clears @inherited_all on !attr.inherited or error. Note that we'll leave + * inherited_all set when we 'fail' to inherit an orphaned event; this is + * consistent with perf_event_release_kernel() removing all child events. + * + * Returns: + * - 0 on success + * - <0 on error + */ static int inherit_task_group(struct perf_event *event, struct task_struct *parent, struct perf_event_context *parent_ctx, @@ -10607,7 +10638,6 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, * First allocate and initialize a context for the * child. */ - child_ctx = alloc_perf_context(parent_ctx->pmu, child); if (!child_ctx) return -ENOMEM; -- cgit v1.2.3 From f1a880a93baaadb14c10a348fd199f1cdb6bcccd Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 15 Mar 2017 15:12:23 -0700 Subject: dm verity fec: limit error correction recursion If the hash tree itself is sufficiently corrupt in addition to data blocks, it's possible for error correction to end up in a deep recursive loop, which eventually causes a kernel panic. This change limits the recursion to a reasonable level during a single I/O operation. Fixes: a739ff3f543a ("dm verity: add support for forward error correction") Signed-off-by: Sami Tolvanen Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # v4.5+ --- drivers/md/dm-verity-fec.c | 12 +++++++++++- drivers/md/dm-verity-fec.h | 4 ++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index 0f0eb8a3d922..c3cc04d89524 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -439,6 +439,13 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io, if (!verity_fec_is_enabled(v)) return -EOPNOTSUPP; + if (fio->level >= DM_VERITY_FEC_MAX_RECURSION) { + DMWARN_LIMIT("%s: FEC: recursion too deep", v->data_dev->name); + return -EIO; + } + + fio->level++; + if (type == DM_VERITY_BLOCK_TYPE_METADATA) block += v->data_blocks; @@ -470,7 +477,7 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io, if (r < 0) { r = fec_decode_rsb(v, io, fio, rsb, offset, true); if (r < 0) - return r; + goto done; } if (dest) @@ -480,6 +487,8 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io, r = verity_for_bv_block(v, io, iter, fec_bv_copy); } +done: + fio->level--; return r; } @@ -520,6 +529,7 @@ void verity_fec_init_io(struct dm_verity_io *io) memset(fio->bufs, 0, sizeof(fio->bufs)); fio->nbufs = 0; fio->output = NULL; + fio->level = 0; } /* diff --git a/drivers/md/dm-verity-fec.h b/drivers/md/dm-verity-fec.h index 7fa0298b995e..bb31ce87a933 100644 --- a/drivers/md/dm-verity-fec.h +++ b/drivers/md/dm-verity-fec.h @@ -27,6 +27,9 @@ #define DM_VERITY_FEC_BUF_MAX \ (1 << (PAGE_SHIFT - DM_VERITY_FEC_BUF_RS_BITS)) +/* maximum recursion level for verity_fec_decode */ +#define DM_VERITY_FEC_MAX_RECURSION 4 + #define DM_VERITY_OPT_FEC_DEV "use_fec_from_device" #define DM_VERITY_OPT_FEC_BLOCKS "fec_blocks" #define DM_VERITY_OPT_FEC_START "fec_start" @@ -58,6 +61,7 @@ struct dm_verity_fec_io { unsigned nbufs; /* number of buffers allocated */ u8 *output; /* buffer for corrected output */ size_t output_pos; + unsigned level; /* recursion level */ }; #ifdef CONFIG_DM_VERITY_FEC -- cgit v1.2.3 From 66822d815ae61ecb2d9dba9031517e8a8476969d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 15 Mar 2017 21:11:46 -0400 Subject: drm/radeon: reinstate oland workaround for sclk Higher sclks seem to be unstable on some boards. bug: https://bugs.freedesktop.org/show_bug.cgi?id=100222 Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/si_dpm.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 72e1588580a1..c7af9fdd20c7 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2985,9 +2985,13 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, max_sclk = 75000; } } else if (rdev->family == CHIP_OLAND) { - if ((rdev->pdev->device == 0x6604) && - (rdev->pdev->subsystem_vendor == 0x1028) && - (rdev->pdev->subsystem_device == 0x066F)) { + if ((rdev->pdev->revision == 0xC7) || + (rdev->pdev->revision == 0x80) || + (rdev->pdev->revision == 0x81) || + (rdev->pdev->revision == 0x83) || + (rdev->pdev->revision == 0x87) || + (rdev->pdev->device == 0x6604) || + (rdev->pdev->device == 0x6605)) { max_sclk = 75000; } } -- cgit v1.2.3 From e11ddff68a7c455e63c4b46154a3e75c699a7b55 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 15 Mar 2017 21:13:25 -0400 Subject: drm/amdgpu: reinstate oland workaround for sclk Higher sclks seem to be unstable on some boards. bug: https://bugs.freedesktop.org/show_bug.cgi?id=100222 Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/si_dpm.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index 33b504bafb88..c5dec210d529 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -3465,9 +3465,13 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, max_sclk = 75000; } } else if (adev->asic_type == CHIP_OLAND) { - if ((adev->pdev->device == 0x6604) && - (adev->pdev->subsystem_vendor == 0x1028) && - (adev->pdev->subsystem_device == 0x066F)) { + if ((adev->pdev->revision == 0xC7) || + (adev->pdev->revision == 0x80) || + (adev->pdev->revision == 0x81) || + (adev->pdev->revision == 0x83) || + (adev->pdev->revision == 0x87) || + (adev->pdev->device == 0x6604) || + (adev->pdev->device == 0x6605)) { max_sclk = 75000; } } -- cgit v1.2.3 From 60a970a6c58dedb4c6b2d90b75f8d6ad7c7b34dc Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Wed, 15 Mar 2017 10:13:32 +0800 Subject: drm/amdgpu: fix the clearing wb size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The clearing wb size should be the one that it is assigned. Signed-off-by: Huang Rui Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a3a105ec99e2..de0cf3315484 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -475,7 +475,7 @@ static int amdgpu_wb_init(struct amdgpu_device *adev) int r; if (adev->wb.wb_obj == NULL) { - r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * 4, + r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t), PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->wb.wb_obj, &adev->wb.gpu_addr, (void **)&adev->wb.wb); @@ -488,7 +488,7 @@ static int amdgpu_wb_init(struct amdgpu_device *adev) memset(&adev->wb.used, 0, sizeof(adev->wb.used)); /* clear wb memory */ - memset((char *)adev->wb.wb, 0, AMDGPU_GPU_PAGE_SIZE); + memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t)); } return 0; -- cgit v1.2.3 From 9986943ef5d61a9bea3c86000d91d3b789f0060e Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 16 Mar 2017 04:22:09 +0000 Subject: ASoC: rcar: dma: remove unnecessary "volatile" commit 2a3af642eb20("ASoC: rcar: clear DE bit only in PDMACHCR...") added rsnd_dmapp_bset(), but it used copy-paste. Thus, it had unnecessary "volatile", and had below warning on x86. This patch fix it. sound/soc/sh/rcar/dma.c: In function 'rsnd_dmapp_bset': >> sound/soc/sh/rcar/dma.c:463:21: warning: passing argument 1 of \ 'ioread32' discards 'volatile' qualifier from pointer target \ type [-Wdiscarded-qualifiers] u32 val = ioread32(addr); ^~~~ In file included from arch/x86/include/asm/io.h:203:0, from arch/x86/include/asm/realmode.h:5, from arch/x86/include/asm/acpi.h:33, from arch/x86/include/asm/fixmap.h:19, from arch/x86/include/asm/apic.h:10, from arch/x86/include/asm/smp.h:12, from include/linux/smp.h:59, from include/linux/topology.h:33, from include/linux/gfp.h:8, from include/linux/idr.h:16, from include/linux/kernfs.h:14, from include/linux/sysfs.h:15, from include/linux/kobject.h:21, from include/linux/of.h:21, from include/linux/of_dma.h:16, from sound/soc/sh/rcar/dma.c:12: include/asm-generic/iomap.h:31:21: note: expected 'void *' \ but argument is of type 'volatile void *' extern unsigned int ioread32(void __iomem *); ^~~~~~~~ Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/sh/rcar/dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sh/rcar/dma.c b/sound/soc/sh/rcar/dma.c index c2e199b4fcf4..241cb3b08a07 100644 --- a/sound/soc/sh/rcar/dma.c +++ b/sound/soc/sh/rcar/dma.c @@ -459,7 +459,7 @@ static void rsnd_dmapp_bset(struct rsnd_dma *dma, u32 data, u32 mask, u32 reg) struct rsnd_mod *mod = rsnd_mod_get(dma); struct rsnd_priv *priv = rsnd_mod_to_priv(mod); struct rsnd_dma_ctrl *dmac = rsnd_priv_to_dmac(priv); - volatile void __iomem *addr = rsnd_dmapp_addr(dmac, dma, reg); + void __iomem *addr = rsnd_dmapp_addr(dmac, dma, reg); u32 val = ioread32(addr); val &= ~mask; -- cgit v1.2.3 From 763811987d5088d0461164f80e9d16869847a040 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Thu, 16 Mar 2017 13:58:40 +0800 Subject: ASoC: rt5665: fix define of RT5665_HP_DRIVER_5X It is (0x3 << 2), not (0x2 << 2). Signed-off-by: Bard Liao Signed-off-by: Mark Brown --- sound/soc/codecs/rt5665.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt5665.h b/sound/soc/codecs/rt5665.h index 12f7080a0d3c..a30f5e6d0628 100644 --- a/sound/soc/codecs/rt5665.h +++ b/sound/soc/codecs/rt5665.h @@ -1106,7 +1106,7 @@ #define RT5665_HP_DRIVER_MASK (0x3 << 2) #define RT5665_HP_DRIVER_1X (0x0 << 2) #define RT5665_HP_DRIVER_3X (0x1 << 2) -#define RT5665_HP_DRIVER_5X (0x2 << 2) +#define RT5665_HP_DRIVER_5X (0x3 << 2) #define RT5665_LDO1_DVO_MASK (0x3) #define RT5665_LDO1_DVO_09 (0x0) #define RT5665_LDO1_DVO_10 (0x1) -- cgit v1.2.3 From 8bcd37d8b21de13d068414c018c9599281294a01 Mon Sep 17 00:00:00 2001 From: "Winkler, Tomas" Date: Thu, 9 Mar 2017 16:58:21 +0200 Subject: mmc: core: mmc_blk_rw_cmd_err - remove unused variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix compilation warning: drivers/mmc/core/block.c:1563:24: warning: variable ‘mq_rq’ set but not used [-Wunused-but-set-variable] struct mmc_queue_req *mq_rq; Signed-off-by: Tomas Winkler Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 05afefcfb611..ff3da960c473 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1560,11 +1560,8 @@ static bool mmc_blk_rw_cmd_err(struct mmc_blk_data *md, struct mmc_card *card, struct mmc_blk_request *brq, struct request *req, bool old_req_pending) { - struct mmc_queue_req *mq_rq; bool req_pending; - mq_rq = container_of(brq, struct mmc_queue_req, brq); - /* * If this is an SD card and we're writing, we can first * mark the known good sectors as ok. -- cgit v1.2.3 From 2ce0c7b65505e0d915e99389cced45b478dc935d Mon Sep 17 00:00:00 2001 From: Romain Izard Date: Thu, 9 Mar 2017 16:18:20 +0100 Subject: mmc: sdhci-of-at91: Support external regulators The SDHCI controller in the SAMA5D2 chip requires a valid voltage set in the power control register, otherwise commands will fail with a timeout error. When using the regulator framework to specify the regulator used by the mmc device, the voltage is not configured, and it is not possible to use the connected device. Implement a custom 'set_power' function for this specific hardware, that configures the voltage in the register in all cases. Signed-off-by: Romain Izard Acked-by: Ludovic Desroches Cc: stable@vger.kernel.org #4.9+ Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-at91.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c index 2f9ad213377a..7fd964256faa 100644 --- a/drivers/mmc/host/sdhci-of-at91.c +++ b/drivers/mmc/host/sdhci-of-at91.c @@ -85,11 +85,30 @@ static void sdhci_at91_set_clock(struct sdhci_host *host, unsigned int clock) sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL); } +/* + * In this specific implementation of the SDHCI controller, the power register + * needs to have a valid voltage set even when the power supply is managed by + * an external regulator. + */ +static void sdhci_at91_set_power(struct sdhci_host *host, unsigned char mode, + unsigned short vdd) +{ + if (!IS_ERR(host->mmc->supply.vmmc)) { + struct mmc_host *mmc = host->mmc; + + spin_unlock_irq(&host->lock); + mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd); + spin_lock_irq(&host->lock); + } + sdhci_set_power_noreg(host, mode, vdd); +} + static const struct sdhci_ops sdhci_at91_sama5d2_ops = { .set_clock = sdhci_at91_set_clock, .set_bus_width = sdhci_set_bus_width, .reset = sdhci_reset, .set_uhs_signaling = sdhci_set_uhs_signaling, + .set_power = sdhci_at91_set_power, }; static const struct sdhci_pltfm_data soc_data_sama5d2 = { -- cgit v1.2.3 From 181302dc7239add8ab1449c23ecab193f52ee6ab Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Mar 2017 13:40:22 +0100 Subject: mmc: ushc: fix NULL-deref at probe Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer should a malicious device lack endpoints. Fixes: 53f3a9e26ed5 ("mmc: USB SD Host Controller (USHC) driver") Cc: stable # 2.6.37 Cc: David Vrabel Signed-off-by: Johan Hovold Signed-off-by: Ulf Hansson --- drivers/mmc/host/ushc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/ushc.c b/drivers/mmc/host/ushc.c index d2c386f09d69..1d843357422e 100644 --- a/drivers/mmc/host/ushc.c +++ b/drivers/mmc/host/ushc.c @@ -426,6 +426,9 @@ static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id struct ushc_data *ushc; int ret; + if (intf->cur_altsetting->desc.bNumEndpoints < 1) + return -ENODEV; + mmc = mmc_alloc_host(sizeof(struct ushc_data), &intf->dev); if (mmc == NULL) return -ENOMEM; -- cgit v1.2.3 From efd4b81abbe1ac753717f2f10cd3dab8bed6c103 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 16 Mar 2017 09:46:14 -0600 Subject: blk-stat: fix blk_stat_sum() if all samples are batched We need to flush the batch _before_ we check the number of samples, otherwise we'll miss all of the batched samples. Fixes: cf43e6b ("block: add scalable completion tracking of requests") Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-stat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-stat.c b/block/blk-stat.c index 9b43efb8933f..186fcb981e9b 100644 --- a/block/blk-stat.c +++ b/block/blk-stat.c @@ -30,11 +30,11 @@ static void blk_stat_flush_batch(struct blk_rq_stat *stat) static void blk_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src) { + blk_stat_flush_batch(src); + if (!src->nr_samples) return; - blk_stat_flush_batch(src); - dst->min = min(dst->min, src->min); dst->max = max(dst->max, src->max); -- cgit v1.2.3 From 29c8bbbd6e21daa0997d1c3ee886b897ee7ad652 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:43 +0000 Subject: afs: Fix missing put_page() In afs_writepages_region(), inside the loop where we find dirty pages to deal with, one of the if-statements is missing a put_page(). Signed-off-by: David Howells --- fs/afs/write.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/afs/write.c b/fs/afs/write.c index c83c1a0e851f..e919e64cd4e0 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -513,6 +513,7 @@ static int afs_writepages_region(struct address_space *mapping, if (PageWriteback(page) || !PageDirty(page)) { unlock_page(page); + put_page(page); continue; } -- cgit v1.2.3 From 5611ef280d814042825ee17688f5751266fc538b Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:43 +0000 Subject: afs: Fix page overput in afs_fill_page() afs_fill_page() loads the page it wants to fill into the afs_read request without incrementing its refcount - but then calls afs_put_read() to clean up afterwards, which then releases a ref on the page. Fix this by getting a ref on the page before calling afs_vnode_fetch_data(). This causes sync after a write to hang in afs_writepages_region() because find_get_pages_tag() gets confused and doesn't return. Fixes: 196ee9cd2d04 ("afs: Make afs_fs_fetch_data() take a list of pages") Reported-by: Marc Dionne Signed-off-by: David Howells Tested-by: Marc Dionne --- fs/afs/write.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/afs/write.c b/fs/afs/write.c index e919e64cd4e0..3ac52f6a96ff 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -101,6 +101,7 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key, req->pos = pos; req->nr_pages = 1; req->pages[0] = page; + get_page(page); i_size = i_size_read(&vnode->vfs_inode); if (pos + PAGE_SIZE > i_size) -- cgit v1.2.3 From 6186f0788b31f44affceeedc7b48eb10faea120d Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Thu, 16 Mar 2017 16:27:43 +0000 Subject: afs: Populate group ID from vnode status The group was hard coded to GLOBAL_ROOT_GID; use the group ID that was received from the server. Signed-off-by: Marc Dionne Signed-off-by: David Howells --- fs/afs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 1e4897a048d2..299dbaeb2e2a 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -70,7 +70,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) set_nlink(inode, vnode->status.nlink); inode->i_uid = vnode->status.owner; - inode->i_gid = GLOBAL_ROOT_GID; + inode->i_gid = vnode->status.group; inode->i_size = vnode->status.size; inode->i_ctime.tv_sec = vnode->status.mtime_server; inode->i_ctime.tv_nsec = 0; -- cgit v1.2.3 From 627f46943ff90bcc32ddeb675d881c043c6fa2ae Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Thu, 16 Mar 2017 16:27:44 +0000 Subject: afs: Adjust mode bits processing Mode bits for an afs file should not be enforced in the usual way. For files, the absence of user bits can restrict file access with respect to what is granted by the server. These bits apply regardless of the owner or the current uid; the rest of the mode bits (group, other) are ignored. Signed-off-by: Marc Dionne Signed-off-by: David Howells --- fs/afs/security.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/afs/security.c b/fs/afs/security.c index 8d010422dc89..bfa9d3428383 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -340,17 +340,22 @@ int afs_permission(struct inode *inode, int mask) } else { if (!(access & AFS_ACE_LOOKUP)) goto permission_denied; + if ((mask & MAY_EXEC) && !(inode->i_mode & S_IXUSR)) + goto permission_denied; if (mask & (MAY_EXEC | MAY_READ)) { if (!(access & AFS_ACE_READ)) goto permission_denied; + if (!(inode->i_mode & S_IRUSR)) + goto permission_denied; } else if (mask & MAY_WRITE) { if (!(access & AFS_ACE_WRITE)) goto permission_denied; + if (!(inode->i_mode & S_IWUSR)) + goto permission_denied; } } key_put(key); - ret = generic_permission(inode, mask); _leave(" = %d", ret); return ret; -- cgit v1.2.3 From bcd89270d93b7edebb5de5e5e7dca1a77a33496e Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Thu, 16 Mar 2017 16:27:44 +0000 Subject: afs: Deal with an empty callback array Servers may send a callback array that is the same size as the FID array, or an empty array. If the callback count is 0, the code would attempt to read (fid_count * 12) bytes of data, which would fail and result in an unmarshalling error. This would lead to stale data for remotely modified files or directories. Store the callback array size in the internal afs_call structure and use that to determine the amount of data to read. Signed-off-by: Marc Dionne --- fs/afs/cmservice.c | 11 +++++------ fs/afs/internal.h | 5 ++++- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 2edbdcbf6432..3062cceb5c2a 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -187,7 +187,6 @@ static int afs_deliver_cb_callback(struct afs_call *call) struct afs_callback *cb; struct afs_server *server; __be32 *bp; - u32 tmp; int ret, loop; _enter("{%u}", call->unmarshall); @@ -249,9 +248,9 @@ static int afs_deliver_cb_callback(struct afs_call *call) if (ret < 0) return ret; - tmp = ntohl(call->tmp); - _debug("CB count: %u", tmp); - if (tmp != call->count && tmp != 0) + call->count2 = ntohl(call->tmp); + _debug("CB count: %u", call->count2); + if (call->count2 != call->count && call->count2 != 0) return -EBADMSG; call->offset = 0; call->unmarshall++; @@ -259,14 +258,14 @@ static int afs_deliver_cb_callback(struct afs_call *call) case 4: _debug("extract CB array"); ret = afs_extract_data(call, call->buffer, - call->count * 3 * 4, false); + call->count2 * 3 * 4, false); if (ret < 0) return ret; _debug("unmarshall CB array"); cb = call->request; bp = call->buffer; - for (loop = call->count; loop > 0; loop--, cb++) { + for (loop = call->count2; loop > 0; loop--, cb++) { cb->version = ntohl(*bp++); cb->expiry = ntohl(*bp++); cb->type = ntohl(*bp++); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 5dfa56903a2d..8499870147ef 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -90,7 +90,10 @@ struct afs_call { unsigned request_size; /* size of request data */ unsigned reply_max; /* maximum size of reply */ unsigned first_offset; /* offset into mapping[first] */ - unsigned last_to; /* amount of mapping[last] */ + union { + unsigned last_to; /* amount of mapping[last] */ + unsigned count2; /* count used in unmarshalling */ + }; unsigned char unmarshall; /* unmarshalling phase */ bool incoming; /* T if incoming call */ bool send_pages; /* T if data from mapping should be sent */ -- cgit v1.2.3 From 6db3ac3c4bc552837d232ec794559a2fae2815a0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:44 +0000 Subject: afs: Handle better the server returning excess or short data When an AFS server is given an FS.FetchData{,64} request to read data from a file, it is permitted by the protocol to return more or less than was requested. kafs currently relies on the latter behaviour in readpage{,s} to handle a partial page at the end of the file (we just ask for a whole page and clear space beyond the short read). However, we don't handle all cases. Add: (1) Handle excess data by discarding it rather than aborting. Note that we use a common static buffer to discard into so that the decryption algorithm advances the PCBC state. (2) Handle a short read that affects more than just the last page. Note that if a read comes up unexpectedly short of long, it's possible that the server's copy of the file changed - in which case the data version number will have been incremented and the callback will have been broken - in which case all the pages currently attached to the inode will be zapped anyway at some point. Signed-off-by: David Howells --- fs/afs/file.c | 7 +++++-- fs/afs/fsclient.c | 49 +++++++++++++++++++++++++++++++++++-------------- 2 files changed, 40 insertions(+), 16 deletions(-) diff --git a/fs/afs/file.c b/fs/afs/file.c index ba7b71fba34b..a38e1c30d110 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -184,10 +184,13 @@ int afs_page_filler(void *data, struct page *page) if (!req) goto enomem; + /* We request a full page. If the page is a partial one at the + * end of the file, the server will return a short read and the + * unmarshalling code will clear the unfilled space. + */ atomic_set(&req->usage, 1); req->pos = (loff_t)page->index << PAGE_SHIFT; - req->len = min_t(size_t, i_size_read(inode) - req->pos, - PAGE_SIZE); + req->len = PAGE_SIZE; req->nr_pages = 1; req->pages[0] = page; get_page(page); diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index ac8e766978dc..bf8904a1a58f 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -16,6 +16,12 @@ #include "internal.h" #include "afs_fs.h" +/* + * We need somewhere to discard into in case the server helpfully returns more + * than we asked for in FS.FetchData{,64}. + */ +static u8 afs_discard_buffer[64]; + /* * decode an AFSFid block */ @@ -353,12 +359,6 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) req->actual_len |= ntohl(call->tmp); _debug("DATA length: %llu", req->actual_len); - /* Check that the server didn't want to send us extra. We - * might want to just discard instead, but that requires - * cooperation from AF_RXRPC. - */ - if (req->actual_len > req->len) - return -EBADMSG; req->remain = req->actual_len; call->offset = req->pos & (PAGE_SIZE - 1); @@ -368,6 +368,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) call->unmarshall++; begin_page: + ASSERTCMP(req->index, <, req->nr_pages); if (req->remain > PAGE_SIZE - call->offset) size = PAGE_SIZE - call->offset; else @@ -390,18 +391,37 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) if (req->page_done) req->page_done(call, req); if (req->remain > 0) { - req->index++; call->offset = 0; + req->index++; + if (req->index >= req->nr_pages) + goto begin_discard; goto begin_page; } } + goto no_more_data; + + /* Discard any excess data the server gave us */ + begin_discard: + case 4: + size = min_t(size_t, sizeof(afs_discard_buffer), req->remain); + call->count = size; + _debug("extract discard %u/%llu %zu/%u", + req->remain, req->actual_len, call->offset, call->count); + + call->offset = 0; + ret = afs_extract_data(call, afs_discard_buffer, call->count, true); + req->remain -= call->offset; + if (ret < 0) + return ret; + if (req->remain > 0) + goto begin_discard; no_more_data: call->offset = 0; - call->unmarshall++; + call->unmarshall = 5; /* extract the metadata */ - case 4: + case 5: ret = afs_extract_data(call, call->buffer, (21 + 3 + 6) * 4, false); if (ret < 0) @@ -416,16 +436,17 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) call->offset = 0; call->unmarshall++; - case 5: + case 6: break; } - if (call->count < PAGE_SIZE) { - buffer = kmap(req->pages[req->index]); - memset(buffer + call->count, 0, PAGE_SIZE - call->count); - kunmap(req->pages[req->index]); + for (; req->index < req->nr_pages; req->index++) { + if (call->count < PAGE_SIZE) + zero_user_segment(req->pages[req->index], + call->count, PAGE_SIZE); if (req->page_done) req->page_done(call, req); + call->count = 0; } _leave(" = 0 [done]"); -- cgit v1.2.3 From 3448e6521755862446aed28e29abf12565d8844e Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:44 +0000 Subject: afs: Kill struct afs_read::pg_offset Kill struct afs_read::pg_offset as nothing uses it. It's unnecessary as pos can be masked off. Signed-off-by: David Howells --- fs/afs/internal.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 8499870147ef..7784a8bc375c 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -135,7 +135,6 @@ struct afs_read { atomic_t usage; unsigned int remain; /* Amount remaining */ unsigned int index; /* Which page we're reading into */ - unsigned int pg_offset; /* Offset in page we're at */ unsigned int nr_pages; void (*page_done)(struct afs_call *, struct afs_read *); struct page *pages[]; -- cgit v1.2.3 From e8e581a88c5f5fc7cf1f636d122b77fbcfc8c2f6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:44 +0000 Subject: afs: Handle a short write to an AFS page Handle the situation where afs_write_begin() is told to expect that a full-page write will be made, but this doesn't happen (EFAULT, CTRL-C, etc.), and so afs_write_end() sees a partial write took place. Currently, no attempt is to deal with the discrepency. Fix this by loading the gap from the server. Reported-by: Al Viro Signed-off-by: David Howells --- fs/afs/fsclient.c | 4 +++- fs/afs/internal.h | 2 +- fs/afs/write.c | 28 +++++++++++++++++++--------- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index bf8904a1a58f..6f917dd1238c 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -393,8 +393,10 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) if (req->remain > 0) { call->offset = 0; req->index++; - if (req->index >= req->nr_pages) + if (req->index >= req->nr_pages) { + call->unmarshall = 4; goto begin_discard; + } goto begin_page; } } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 7784a8bc375c..dc2cb486e127 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -130,7 +130,7 @@ struct afs_call_type { */ struct afs_read { loff_t pos; /* Where to start reading */ - loff_t len; /* How much to read */ + loff_t len; /* How much we're asking for */ loff_t actual_len; /* How much we're actually getting */ atomic_t usage; unsigned int remain; /* Amount remaining */ diff --git a/fs/afs/write.c b/fs/afs/write.c index 3ac52f6a96ff..ea66890fc188 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -84,10 +84,9 @@ void afs_put_writeback(struct afs_writeback *wb) * partly or wholly fill a page that's under preparation for writing */ static int afs_fill_page(struct afs_vnode *vnode, struct key *key, - loff_t pos, struct page *page) + loff_t pos, unsigned int len, struct page *page) { struct afs_read *req; - loff_t i_size; int ret; _enter(",,%llu", (unsigned long long)pos); @@ -99,16 +98,11 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key, atomic_set(&req->usage, 1); req->pos = pos; + req->len = len; req->nr_pages = 1; req->pages[0] = page; get_page(page); - i_size = i_size_read(&vnode->vfs_inode); - if (pos + PAGE_SIZE > i_size) - req->len = i_size - pos; - else - req->len = PAGE_SIZE; - ret = afs_vnode_fetch_data(vnode, key, req); afs_put_read(req); if (ret < 0) { @@ -164,7 +158,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping, /* page won't leak in error case: it eventually gets cleaned off LRU */ if (!PageUptodate(page) && len != PAGE_SIZE) { - ret = afs_fill_page(vnode, key, index << PAGE_SHIFT, page); + ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page); if (ret < 0) { kfree(candidate); _leave(" = %d [prep]", ret); @@ -258,7 +252,9 @@ int afs_write_end(struct file *file, struct address_space *mapping, struct page *page, void *fsdata) { struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); + struct key *key = file->private_data; loff_t i_size, maybe_i_size; + int ret; _enter("{%x:%u},{%lx}", vnode->fid.vid, vnode->fid.vnode, page->index); @@ -274,6 +270,20 @@ int afs_write_end(struct file *file, struct address_space *mapping, spin_unlock(&vnode->writeback_lock); } + if (!PageUptodate(page)) { + if (copied < len) { + /* Try and load any missing data from the server. The + * unmarshalling routine will take care of clearing any + * bits that are beyond the EOF. + */ + ret = afs_fill_page(vnode, key, pos + copied, + len - copied, page); + if (ret < 0) + return ret; + } + SetPageUptodate(page); + } + set_page_dirty(page); if (PageDirty(page)) _debug("dirtied"); -- cgit v1.2.3 From 58fed94dfb17e89556b5705f20f90e5b2971b6a1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:45 +0000 Subject: afs: Flush outstanding writes when an fd is closed Flush outstanding writes in afs when an fd is closed. This is what NFS and CIFS do. Reported-by: Marc Dionne Signed-off-by: David Howells --- fs/afs/file.c | 1 + fs/afs/internal.h | 1 + fs/afs/write.c | 14 ++++++++++++++ 3 files changed, 16 insertions(+) diff --git a/fs/afs/file.c b/fs/afs/file.c index a38e1c30d110..b5829443ff69 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -30,6 +30,7 @@ static int afs_readpages(struct file *filp, struct address_space *mapping, const struct file_operations afs_file_operations = { .open = afs_open, + .flush = afs_flush, .release = afs_release, .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, diff --git a/fs/afs/internal.h b/fs/afs/internal.h index dc2cb486e127..af1d91ec7f2c 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -720,6 +720,7 @@ extern int afs_writepages(struct address_space *, struct writeback_control *); extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *); extern int afs_writeback_all(struct afs_vnode *); +extern int afs_flush(struct file *, fl_owner_t); extern int afs_fsync(struct file *, loff_t, loff_t, int); diff --git a/fs/afs/write.c b/fs/afs/write.c index ea66890fc188..f1450ea09406 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -757,6 +757,20 @@ out: return ret; } +/* + * Flush out all outstanding writes on a file opened for writing when it is + * closed. + */ +int afs_flush(struct file *file, fl_owner_t id) +{ + _enter(""); + + if ((file->f_mode & FMODE_WRITE) == 0) + return 0; + + return vfs_fsync(file, 0); +} + /* * notification that a previously read-only page is about to become writable * - if it returns an error, the caller will deliver a bus error signal -- cgit v1.2.3 From 944c74f472f926785b1948efa0e73e2f1b3b539b Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:45 +0000 Subject: afs: Distinguish mountpoints from symlinks by file mode alone In AFS, mountpoints appear as symlinks with mode 0644 and normal symlinks have mode 0777, so use this to distinguish them rather than reading the content and parsing it. In the case of a mountpoint, the symlink body is a formatted string indicating the location of the target volume. Note that with this, kAFS no longer 'pre-fetches' the contents of symlinks, so afs_readpage() may fail with an access-denial because when the VFS calls d_automount(), it wraps the call in an credentials override that sets the initial creds - thereby preventing access to the caller's keyrings and the authentication keys held therein. To this end, a patch reverting that change to the VFS is required also. Reported-by: Jeffrey Altman Signed-off-by: David Howells --- fs/afs/inode.c | 29 +++++++++++++++-------------- fs/afs/internal.h | 1 - fs/afs/mntpt.c | 53 ----------------------------------------------------- 3 files changed, 15 insertions(+), 68 deletions(-) diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 299dbaeb2e2a..ade6ec3873cf 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -54,8 +54,21 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) inode->i_fop = &afs_dir_file_operations; break; case AFS_FTYPE_SYMLINK: - inode->i_mode = S_IFLNK | vnode->status.mode; - inode->i_op = &page_symlink_inode_operations; + /* Symlinks with a mode of 0644 are actually mountpoints. */ + if ((vnode->status.mode & 0777) == 0644) { + inode->i_flags |= S_AUTOMOUNT; + + spin_lock(&vnode->lock); + set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags); + spin_unlock(&vnode->lock); + + inode->i_mode = S_IFDIR | 0555; + inode->i_op = &afs_mntpt_inode_operations; + inode->i_fop = &afs_mntpt_file_operations; + } else { + inode->i_mode = S_IFLNK | vnode->status.mode; + inode->i_op = &page_symlink_inode_operations; + } inode_nohighmem(inode); break; default: @@ -79,18 +92,6 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) inode->i_generation = vnode->fid.unique; inode->i_version = vnode->status.data_version; inode->i_mapping->a_ops = &afs_fs_aops; - - /* check to see whether a symbolic link is really a mountpoint */ - if (vnode->status.type == AFS_FTYPE_SYMLINK) { - afs_mntpt_check_symlink(vnode, key); - - if (test_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags)) { - inode->i_mode = S_IFDIR | vnode->status.mode; - inode->i_op = &afs_mntpt_inode_operations; - inode->i_fop = &afs_mntpt_file_operations; - } - } - return 0; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index af1d91ec7f2c..39de154fb42e 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -559,7 +559,6 @@ extern const struct inode_operations afs_autocell_inode_operations; extern const struct file_operations afs_mntpt_file_operations; extern struct vfsmount *afs_d_automount(struct path *); -extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *); extern void afs_mntpt_kill_timer(void); /* diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index d4fb0afc0097..bd3b65cde282 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -46,59 +46,6 @@ static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out); static unsigned long afs_mntpt_expiry_timeout = 10 * 60; -/* - * check a symbolic link to see whether it actually encodes a mountpoint - * - sets the AFS_VNODE_MOUNTPOINT flag on the vnode appropriately - */ -int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key) -{ - struct page *page; - size_t size; - char *buf; - int ret; - - _enter("{%x:%u,%u}", - vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); - - /* read the contents of the symlink into the pagecache */ - page = read_cache_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0, - afs_page_filler, key); - if (IS_ERR(page)) { - ret = PTR_ERR(page); - goto out; - } - - ret = -EIO; - if (PageError(page)) - goto out_free; - - buf = kmap(page); - - /* examine the symlink's contents */ - size = vnode->status.size; - _debug("symlink to %*.*s", (int) size, (int) size, buf); - - if (size > 2 && - (buf[0] == '%' || buf[0] == '#') && - buf[size - 1] == '.' - ) { - _debug("symlink is a mountpoint"); - spin_lock(&vnode->lock); - set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags); - vnode->vfs_inode.i_flags |= S_AUTOMOUNT; - spin_unlock(&vnode->lock); - } - - ret = 0; - - kunmap(page); -out_free: - put_page(page); -out: - _leave(" = %d", ret); - return ret; -} - /* * no valid lookup procedure on this sort of dir */ -- cgit v1.2.3 From 1d7e4ebf291d3103466006926329e39aa355944f Mon Sep 17 00:00:00 2001 From: Andreea-Cristina Bernat Date: Thu, 16 Mar 2017 16:27:45 +0000 Subject: afs: inode: Replace rcu_assign_pointer() with RCU_INIT_POINTER() The use of "rcu_assign_pointer()" is NULLing out the pointer. According to RCU_INIT_POINTER()'s block comment: "1. This use of RCU_INIT_POINTER() is NULLing out the pointer" it is better to use it instead of rcu_assign_pointer() because it has a smaller overhead. The following Coccinelle semantic patch was used: @@ @@ - rcu_assign_pointer + RCU_INIT_POINTER (..., NULL) Signed-off-by: Andreea-Cristina Bernat Signed-off-by: David Howells --- fs/afs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/inode.c b/fs/afs/inode.c index ade6ec3873cf..e083e086b7ca 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -445,7 +445,7 @@ void afs_evict_inode(struct inode *inode) mutex_lock(&vnode->permits_lock); permits = vnode->permits; - rcu_assign_pointer(vnode->permits, NULL); + RCU_INIT_POINTER(vnode->permits, NULL); mutex_unlock(&vnode->permits_lock); if (permits) call_rcu(&permits->rcu, afs_zap_permits); -- cgit v1.2.3 From df8a09d1b8f9e693ec3f6b7e0162fc817f2cf0db Mon Sep 17 00:00:00 2001 From: Andreea-Cristina Bernat Date: Thu, 16 Mar 2017 16:27:45 +0000 Subject: afs: security: Replace rcu_assign_pointer() with RCU_INIT_POINTER() The use of "rcu_assign_pointer()" is NULLing out the pointer. According to RCU_INIT_POINTER()'s block comment: "1. This use of RCU_INIT_POINTER() is NULLing out the pointer" it is better to use it instead of rcu_assign_pointer() because it has a smaller overhead. The following Coccinelle semantic patch was used: @@ @@ - rcu_assign_pointer + RCU_INIT_POINTER (..., NULL) Signed-off-by: Andreea-Cristina Bernat Signed-off-by: David Howells --- fs/afs/security.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/security.c b/fs/afs/security.c index bfa9d3428383..ecb86a670180 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -114,7 +114,7 @@ void afs_clear_permits(struct afs_vnode *vnode) mutex_lock(&vnode->permits_lock); permits = vnode->permits; - rcu_assign_pointer(vnode->permits, NULL); + RCU_INIT_POINTER(vnode->permits, NULL); mutex_unlock(&vnode->permits_lock); if (permits) -- cgit v1.2.3 From 8a79790bf0b7da216627ffb85f52cfb4adbf1e4e Mon Sep 17 00:00:00 2001 From: Tina Ruchandani Date: Thu, 16 Mar 2017 16:27:46 +0000 Subject: afs: Migrate vlocation fields to 64-bit get_seconds() returns real wall-clock seconds. On 32-bit systems this value will overflow in year 2038 and beyond. This patch changes afs's vlocation record to use ktime_get_real_seconds() instead, for the fields time_of_death and update_at. Signed-off-by: Tina Ruchandani Signed-off-by: David Howells --- fs/afs/callback.c | 7 ++++--- fs/afs/internal.h | 7 ++++--- fs/afs/server.c | 6 +++--- fs/afs/vlocation.c | 16 +++++++++------- 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/fs/afs/callback.c b/fs/afs/callback.c index b29447e03ede..25d404d22cae 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -362,7 +362,7 @@ static void afs_callback_updater(struct work_struct *work) { struct afs_server *server; struct afs_vnode *vnode, *xvnode; - time_t now; + time64_t now; long timeout; int ret; @@ -370,7 +370,7 @@ static void afs_callback_updater(struct work_struct *work) _enter(""); - now = get_seconds(); + now = ktime_get_real_seconds(); /* find the first vnode to update */ spin_lock(&server->cb_lock); @@ -424,7 +424,8 @@ static void afs_callback_updater(struct work_struct *work) /* and then reschedule */ _debug("reschedule"); - vnode->update_at = get_seconds() + afs_vnode_update_timeout; + vnode->update_at = ktime_get_real_seconds() + + afs_vnode_update_timeout; spin_lock(&server->cb_lock); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 39de154fb42e..97a16ce200be 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -249,7 +250,7 @@ struct afs_cache_vhash { */ struct afs_vlocation { atomic_t usage; - time_t time_of_death; /* time at which put reduced usage to 0 */ + time64_t time_of_death; /* time at which put reduced usage to 0 */ struct list_head link; /* link in cell volume location list */ struct list_head grave; /* link in master graveyard list */ struct list_head update; /* link in master update list */ @@ -260,7 +261,7 @@ struct afs_vlocation { struct afs_cache_vlocation vldb; /* volume information DB record */ struct afs_volume *vols[3]; /* volume access record pointer (index by type) */ wait_queue_head_t waitq; /* status change waitqueue */ - time_t update_at; /* time at which record should be updated */ + time64_t update_at; /* time at which record should be updated */ spinlock_t lock; /* access lock */ afs_vlocation_state_t state; /* volume location state */ unsigned short upd_rej_cnt; /* ENOMEDIUM count during update */ @@ -273,7 +274,7 @@ struct afs_vlocation { */ struct afs_server { atomic_t usage; - time_t time_of_death; /* time at which put reduced usage to 0 */ + time64_t time_of_death; /* time at which put reduced usage to 0 */ struct in_addr addr; /* server address */ struct afs_cell *cell; /* cell in which server resides */ struct list_head link; /* link in cell's server list */ diff --git a/fs/afs/server.c b/fs/afs/server.c index d4066ab7dd55..c001b1f2455f 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -242,7 +242,7 @@ void afs_put_server(struct afs_server *server) spin_lock(&afs_server_graveyard_lock); if (atomic_read(&server->usage) == 0) { list_move_tail(&server->grave, &afs_server_graveyard); - server->time_of_death = get_seconds(); + server->time_of_death = ktime_get_real_seconds(); queue_delayed_work(afs_wq, &afs_server_reaper, afs_server_timeout * HZ); } @@ -277,9 +277,9 @@ static void afs_reap_server(struct work_struct *work) LIST_HEAD(corpses); struct afs_server *server; unsigned long delay, expiry; - time_t now; + time64_t now; - now = get_seconds(); + now = ktime_get_real_seconds(); spin_lock(&afs_server_graveyard_lock); while (!list_empty(&afs_server_graveyard)) { diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index d7d8dd8c0b31..37b7c3b342a6 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c @@ -340,7 +340,8 @@ static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl) struct afs_vlocation *xvl; /* wait at least 10 minutes before updating... */ - vl->update_at = get_seconds() + afs_vlocation_update_timeout; + vl->update_at = ktime_get_real_seconds() + + afs_vlocation_update_timeout; spin_lock(&afs_vlocation_updates_lock); @@ -506,7 +507,7 @@ void afs_put_vlocation(struct afs_vlocation *vl) if (atomic_read(&vl->usage) == 0) { _debug("buried"); list_move_tail(&vl->grave, &afs_vlocation_graveyard); - vl->time_of_death = get_seconds(); + vl->time_of_death = ktime_get_real_seconds(); queue_delayed_work(afs_wq, &afs_vlocation_reap, afs_vlocation_timeout * HZ); @@ -543,11 +544,11 @@ static void afs_vlocation_reaper(struct work_struct *work) LIST_HEAD(corpses); struct afs_vlocation *vl; unsigned long delay, expiry; - time_t now; + time64_t now; _enter(""); - now = get_seconds(); + now = ktime_get_real_seconds(); spin_lock(&afs_vlocation_graveyard_lock); while (!list_empty(&afs_vlocation_graveyard)) { @@ -622,13 +623,13 @@ static void afs_vlocation_updater(struct work_struct *work) { struct afs_cache_vlocation vldb; struct afs_vlocation *vl, *xvl; - time_t now; + time64_t now; long timeout; int ret; _enter(""); - now = get_seconds(); + now = ktime_get_real_seconds(); /* find a record to update */ spin_lock(&afs_vlocation_updates_lock); @@ -684,7 +685,8 @@ static void afs_vlocation_updater(struct work_struct *work) /* and then reschedule */ _debug("reschedule"); - vl->update_at = get_seconds() + afs_vlocation_update_timeout; + vl->update_at = ktime_get_real_seconds() + + afs_vlocation_update_timeout; spin_lock(&afs_vlocation_updates_lock); -- cgit v1.2.3 From 56e714312e7dbd6bb83b2f78d3ec19a404c7649f Mon Sep 17 00:00:00 2001 From: Tina Ruchandani Date: Thu, 16 Mar 2017 16:27:46 +0000 Subject: afs: Prevent callback expiry timer overflow get_seconds() returns real wall-clock seconds. On 32-bit systems this value will overflow in year 2038 and beyond. This patch changes afs_vnode record to use ktime_get_real_seconds() instead, for the fields cb_expires and cb_expires_at. Signed-off-by: Tina Ruchandani Signed-off-by: David Howells --- fs/afs/fsclient.c | 2 +- fs/afs/inode.c | 7 ++++--- fs/afs/internal.h | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 6f917dd1238c..c05452a09398 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -145,7 +145,7 @@ static void xdr_decode_AFSCallBack(const __be32 **_bp, struct afs_vnode *vnode) vnode->cb_version = ntohl(*bp++); vnode->cb_expiry = ntohl(*bp++); vnode->cb_type = ntohl(*bp++); - vnode->cb_expires = vnode->cb_expiry + get_seconds(); + vnode->cb_expires = vnode->cb_expiry + ktime_get_real_seconds(); *_bp = bp; } diff --git a/fs/afs/inode.c b/fs/afs/inode.c index e083e086b7ca..4079c832ff27 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -246,12 +246,13 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, vnode->cb_version = 0; vnode->cb_expiry = 0; vnode->cb_type = 0; - vnode->cb_expires = get_seconds(); + vnode->cb_expires = ktime_get_real_seconds(); } else { vnode->cb_version = cb->version; vnode->cb_expiry = cb->expiry; vnode->cb_type = cb->type; - vnode->cb_expires = vnode->cb_expiry + get_seconds(); + vnode->cb_expires = vnode->cb_expiry + + ktime_get_real_seconds(); } } @@ -324,7 +325,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) !test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) && !test_bit(AFS_VNODE_MODIFIED, &vnode->flags) && !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) { - if (vnode->cb_expires < get_seconds() + 10) { + if (vnode->cb_expires < ktime_get_real_seconds() + 10) { _debug("callback expired"); set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); } else { diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 97a16ce200be..832555003d03 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -377,8 +377,8 @@ struct afs_vnode { struct rb_node server_rb; /* link in server->fs_vnodes */ struct rb_node cb_promise; /* link in server->cb_promises */ struct work_struct cb_broken_work; /* work to be done on callback break */ - time_t cb_expires; /* time at which callback expires */ - time_t cb_expires_at; /* time used to order cb_promise */ + time64_t cb_expires; /* time at which callback expires */ + time64_t cb_expires_at; /* time used to order cb_promise */ unsigned cb_version; /* callback version */ unsigned cb_expiry; /* callback expiry time */ afs_callback_type_t cb_type; /* type of callback */ -- cgit v1.2.3 From 29f069853287dcb46eaf45a50dbf1232c1444ac6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:46 +0000 Subject: afs: Fix AFS read bug Fix a bug in AFS read whereby the request page afs_read::index isn't incremented after calling ->page_done() if ->remain reaches 0, indicating that the data read is complete. Without this a NULL pointer exception happens when ->page_done() is called twice for the last page because the page clearing loop will call it also and afs_readpages_page_done() clears the current entry in the page list. BUG: unable to handle kernel NULL pointer dereference at (null) IP: afs_readpages_page_done+0x21/0xa4 [kafs] PGD 0 Oops: 0002 [#1] SMP Modules linked in: kafs(E) CPU: 2 PID: 3002 Comm: md5sum Tainted: G E 4.10.0-fscache #485 Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014 task: ffff8804017d86c0 task.stack: ffff8803fc1d8000 RIP: 0010:afs_readpages_page_done+0x21/0xa4 [kafs] RSP: 0018:ffff8803fc1db978 EFLAGS: 00010282 RAX: ffff880405d39af8 RBX: 0000000000000000 RCX: ffff880407d83ed4 RDX: 0000000000000000 RSI: ffff880405d39a00 RDI: ffff880405c6f400 RBP: ffff8803fc1db988 R08: 0000000000000000 R09: 0000000000000001 R10: ffff8803fc1db820 R11: ffff88040cf56000 R12: ffff8804088f1780 R13: ffff8804017d86c0 R14: ffff8804088f1780 R15: 0000000000003840 FS: 00007f8154469700(0000) GS:ffff88041fb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000004016ec000 CR4: 00000000001406e0 Call Trace: afs_deliver_fs_fetch_data+0x5b9/0x60e [kafs] ? afs_make_call+0x316/0x4e8 [kafs] ? afs_make_call+0x359/0x4e8 [kafs] afs_deliver_to_call+0x173/0x2e8 [kafs] ? afs_make_call+0x316/0x4e8 [kafs] afs_make_call+0x37a/0x4e8 [kafs] ? wake_up_q+0x4f/0x4f ? __init_waitqueue_head+0x36/0x49 afs_fs_fetch_data+0x21c/0x227 [kafs] ? afs_fs_fetch_data+0x21c/0x227 [kafs] afs_vnode_fetch_data+0xf3/0x1d2 [kafs] afs_readpages+0x314/0x3fd [kafs] __do_page_cache_readahead+0x208/0x2c5 ondemand_readahead+0x3a2/0x3b7 ? ondemand_readahead+0x3a2/0x3b7 page_cache_async_readahead+0x5e/0x67 generic_file_read_iter+0x23b/0x70c ? __inode_security_revalidate+0x2f/0x62 __vfs_read+0xc4/0xe8 vfs_read+0xd1/0x15a SyS_read+0x4c/0x89 do_syscall_64+0x80/0x191 entry_SYSCALL64_slow_path+0x25/0x25 Reported-by: Marc Dionne Signed-off-by: David Howells Tested-by: Marc Dionne --- fs/afs/fsclient.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index c05452a09398..4314f9e63a2c 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -390,9 +390,9 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) if (call->offset == PAGE_SIZE) { if (req->page_done) req->page_done(call, req); + req->index++; if (req->remain > 0) { call->offset = 0; - req->index++; if (req->index >= req->nr_pages) { call->unmarshall = 4; goto begin_discard; -- cgit v1.2.3 From 6a0e3999e5cb3daa0468073fcdee0767422a4056 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:46 +0000 Subject: afs: Make struct afs_read::remain 64-bit Make struct afs_read::remain 64-bit so that it can handle huge transfers if we ever request them or the server decides to give us a bit extra data (the other fields there are already 64-bit). Signed-off-by: David Howells Tested-by: Marc Dionne --- fs/afs/fsclient.c | 8 ++++---- fs/afs/internal.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 4314f9e63a2c..0778c5b6b59b 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -321,7 +321,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) void *buffer; int ret; - _enter("{%u,%zu/%u;%u/%llu}", + _enter("{%u,%zu/%u;%llu/%llu}", call->unmarshall, call->offset, call->count, req->remain, req->actual_len); @@ -379,7 +379,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) /* extract the returned data */ case 3: - _debug("extract data %u/%llu %zu/%u", + _debug("extract data %llu/%llu %zu/%u", req->remain, req->actual_len, call->offset, call->count); buffer = kmap(req->pages[req->index]); @@ -405,9 +405,9 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) /* Discard any excess data the server gave us */ begin_discard: case 4: - size = min_t(size_t, sizeof(afs_discard_buffer), req->remain); + size = min_t(loff_t, sizeof(afs_discard_buffer), req->remain); call->count = size; - _debug("extract discard %u/%llu %zu/%u", + _debug("extract discard %llu/%llu %zu/%u", req->remain, req->actual_len, call->offset, call->count); call->offset = 0; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 832555003d03..a6901360fb81 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -133,8 +133,8 @@ struct afs_read { loff_t pos; /* Where to start reading */ loff_t len; /* How much we're asking for */ loff_t actual_len; /* How much we're actually getting */ + loff_t remain; /* Amount remaining */ atomic_t usage; - unsigned int remain; /* Amount remaining */ unsigned int index; /* Which page we're reading into */ unsigned int nr_pages; void (*page_done)(struct afs_call *, struct afs_read *); -- cgit v1.2.3 From 2f5705a5c805e7f761f2228820656bb9363a3d8c Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:46 +0000 Subject: afs: Use a bvec rather than a kvec in afs_send_pages() Use a bvec rather than a kvec in afs_send_pages() as we don't then have to call kmap() in advance. This allows us to pass the array of contiguous pages that we extracted through to rxrpc in one go rather than passing a single page at a time. Signed-off-by: David Howells --- fs/afs/rxrpc.c | 97 +++++++++++++++++++++++++++++++--------------------------- 1 file changed, 52 insertions(+), 45 deletions(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 419ef05dcb5e..bf45307ff201 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -259,67 +259,74 @@ void afs_flat_call_destructor(struct afs_call *call) call->buffer = NULL; } +#define AFS_BVEC_MAX 8 + +/* + * Load the given bvec with the next few pages. + */ +static void afs_load_bvec(struct afs_call *call, struct msghdr *msg, + struct bio_vec *bv, pgoff_t first, pgoff_t last, + unsigned offset) +{ + struct page *pages[AFS_BVEC_MAX]; + unsigned int nr, n, i, to, bytes = 0; + + nr = min_t(pgoff_t, last - first + 1, AFS_BVEC_MAX); + n = find_get_pages_contig(call->mapping, first, nr, pages); + ASSERTCMP(n, ==, nr); + + msg->msg_flags |= MSG_MORE; + for (i = 0; i < nr; i++) { + to = PAGE_SIZE; + if (first + i >= last) { + to = call->last_to; + msg->msg_flags &= ~MSG_MORE; + } + bv[i].bv_page = pages[i]; + bv[i].bv_len = to - offset; + bv[i].bv_offset = offset; + bytes += to - offset; + offset = 0; + } + + iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC, bv, nr, bytes); +} + /* * attach the data from a bunch of pages on an inode to a call */ static int afs_send_pages(struct afs_call *call, struct msghdr *msg) { - struct page *pages[8]; - unsigned count, n, loop, offset, to; + struct bio_vec bv[AFS_BVEC_MAX]; + unsigned int bytes, nr, loop, offset; pgoff_t first = call->first, last = call->last; int ret; - _enter(""); - offset = call->first_offset; call->first_offset = 0; do { - _debug("attach %lx-%lx", first, last); - - count = last - first + 1; - if (count > ARRAY_SIZE(pages)) - count = ARRAY_SIZE(pages); - n = find_get_pages_contig(call->mapping, first, count, pages); - ASSERTCMP(n, ==, count); - - loop = 0; - do { - struct bio_vec bvec = {.bv_page = pages[loop], - .bv_offset = offset}; - msg->msg_flags = 0; - to = PAGE_SIZE; - if (first + loop >= last) - to = call->last_to; - else - msg->msg_flags = MSG_MORE; - bvec.bv_len = to - offset; - offset = 0; - - _debug("- range %u-%u%s", - offset, to, msg->msg_flags ? " [more]" : ""); - iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC, - &bvec, 1, to - offset); - - /* have to change the state *before* sending the last - * packet as RxRPC might give us the reply before it - * returns from sending the request */ - if (first + loop >= last) - call->state = AFS_CALL_AWAIT_REPLY; - ret = rxrpc_kernel_send_data(afs_socket, call->rxcall, - msg, to - offset); - if (ret < 0) - break; - } while (++loop < count); - first += count; - - for (loop = 0; loop < count; loop++) - put_page(pages[loop]); + afs_load_bvec(call, msg, bv, first, last, offset); + offset = 0; + bytes = msg->msg_iter.count; + nr = msg->msg_iter.nr_segs; + + /* Have to change the state *before* sending the last + * packet as RxRPC might give us the reply before it + * returns from sending the request. + */ + if (first + nr >= last) + call->state = AFS_CALL_AWAIT_REPLY; + ret = rxrpc_kernel_send_data(afs_socket, call->rxcall, + msg, bytes); + for (loop = 0; loop < nr; loop++) + put_page(bv[loop].bv_page); if (ret < 0) break; + + first += nr; } while (first <= last); - _leave(" = %d", ret); return ret; } -- cgit v1.2.3 From 146a1192783697810b63a1e41c4d59fc93387340 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:47 +0000 Subject: afs: Fix the maths in afs_fs_store_data() afs_fs_store_data() works out of the size of the write it's going to make, but it uses 32-bit unsigned subtraction in one place that gets automatically cast to loff_t. However, if to < offset, then the number goes negative, but as the result isn't signed, this doesn't get sign-extended to 64-bits when placed in a loff_t. Fix by casting the operands to loff_t. Signed-off-by: David Howells --- fs/afs/fsclient.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 0778c5b6b59b..d9234b767287 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -1236,7 +1236,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, _enter(",%x,{%x:%u},,", key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode); - size = to - offset; + size = (loff_t)to - (loff_t)offset; if (first != last) size += (loff_t)(last - first) << PAGE_SHIFT; pos = (loff_t)first << PAGE_SHIFT; -- cgit v1.2.3 From 1157f153f37a8586765034470e4f00a4a6c4ce6f Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:47 +0000 Subject: afs: Invalid op ID should abort with RXGEN_OPCODE When we are given an invalid operation ID, we should abort that with RXGEN_OPCODE rather than RX_INVALID_OPERATION. Also map RXGEN_OPCODE to -ENOTSUPP. Signed-off-by: David Howells --- fs/afs/misc.c | 2 ++ fs/afs/rxrpc.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/afs/misc.c b/fs/afs/misc.c index 91ea1aa0d8b3..100b207efc9e 100644 --- a/fs/afs/misc.c +++ b/fs/afs/misc.c @@ -84,6 +84,8 @@ int afs_abort_to_error(u32 abort_code) case RXKADDATALEN: return -EKEYREJECTED; case RXKADILLEGALLEVEL: return -EKEYREJECTED; + case RXGEN_OPCODE: return -ENOTSUPP; + default: return -EREMOTEIO; } } diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index bf45307ff201..bf7761fe6ef5 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -465,7 +465,7 @@ static void afs_deliver_to_call(struct afs_call *call) abort_code, -ret, "KNC"); goto do_abort; case -ENOTSUPP: - abort_code = RX_INVALID_OPERATION; + abort_code = RXGEN_OPCODE; rxrpc_kernel_abort_call(afs_socket, call->rxcall, abort_code, -ret, "KIV"); goto do_abort; -- cgit v1.2.3 From 70af0e3bd65142f9e674961c975451638a7ce1d5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:47 +0000 Subject: afs: Better abort and net error handling If we receive a network error, a remote abort or a protocol error whilst we're still transmitting data, make sure we return an appropriate error to the caller rather than ESHUTDOWN or ECONNABORTED. Signed-off-by: David Howells --- fs/afs/rxrpc.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index bf7761fe6ef5..22d26b369070 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -340,6 +340,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, struct rxrpc_call *rxcall; struct msghdr msg; struct kvec iov[1]; + size_t offset; + u32 abort_code; int ret; _enter("%x,{%d},", addr->s_addr, ntohs(call->port)); @@ -388,9 +390,11 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, msg.msg_controllen = 0; msg.msg_flags = (call->send_pages ? MSG_MORE : 0); - /* have to change the state *before* sending the last packet as RxRPC - * might give us the reply before it returns from sending the - * request */ + /* We have to change the state *before* sending the last packet as + * rxrpc might give us the reply before it returns from sending the + * request. Further, if the send fails, we may already have been given + * a notification and may have collected it. + */ if (!call->send_pages) call->state = AFS_CALL_AWAIT_REPLY; ret = rxrpc_kernel_send_data(afs_socket, rxcall, @@ -412,7 +416,17 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, return afs_wait_for_call_to_complete(call); error_do_abort: - rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, -ret, "KSD"); + call->state = AFS_CALL_COMPLETE; + if (ret != -ECONNABORTED) { + rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, + -ret, "KSD"); + } else { + abort_code = 0; + offset = 0; + rxrpc_kernel_recv_data(afs_socket, rxcall, NULL, 0, &offset, + false, &abort_code); + ret = call->type->abort_to_error(abort_code); + } error_kill_call: afs_put_call(call); _leave(" = %d", ret); @@ -459,16 +473,18 @@ static void afs_deliver_to_call(struct afs_call *call) case -EINPROGRESS: case -EAGAIN: goto out; + case -ECONNABORTED: + goto call_complete; case -ENOTCONN: abort_code = RX_CALL_DEAD; rxrpc_kernel_abort_call(afs_socket, call->rxcall, abort_code, -ret, "KNC"); - goto do_abort; + goto save_error; case -ENOTSUPP: abort_code = RXGEN_OPCODE; rxrpc_kernel_abort_call(afs_socket, call->rxcall, abort_code, -ret, "KIV"); - goto do_abort; + goto save_error; case -ENODATA: case -EBADMSG: case -EMSGSIZE: @@ -478,7 +494,7 @@ static void afs_deliver_to_call(struct afs_call *call) abort_code = RXGEN_SS_UNMARSHAL; rxrpc_kernel_abort_call(afs_socket, call->rxcall, abort_code, EBADMSG, "KUM"); - goto do_abort; + goto save_error; } } @@ -489,8 +505,9 @@ out: _leave(""); return; -do_abort: +save_error: call->error = ret; +call_complete: call->state = AFS_CALL_COMPLETE; goto done; } @@ -538,6 +555,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) _debug("call incomplete"); rxrpc_kernel_abort_call(afs_socket, call->rxcall, RX_CALL_DEAD, -ret, abort_why); + } else if (call->error < 0) { + ret = call->error; } _debug("call complete"); -- cgit v1.2.3 From ab94f5d0dd6fd82e7eeca5e7c8096eaea0a0261f Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Thu, 16 Mar 2017 16:27:47 +0000 Subject: afs: Populate and use client modification time The inode timestamps should be set from the client time in the status received from the server, rather than the server time which is meant for internal server use. Set AFS_SET_MTIME and populate the mtime for operations that take an input status, such as file/dir creation and StoreData. If an input time is not provided the server will set the vnode times based on the current server time. In a situation where the server has some skew with the client, this could lead to the client seeing a timestamp in the future for a file that it just created or wrote. Signed-off-by: Marc Dionne Signed-off-by: David Howells --- fs/afs/fsclient.c | 18 +++++++++--------- fs/afs/inode.c | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index d9234b767287..19f76ae36982 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -111,7 +111,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, vnode->vfs_inode.i_mode = mode; } - vnode->vfs_inode.i_ctime.tv_sec = status->mtime_server; + vnode->vfs_inode.i_ctime.tv_sec = status->mtime_client; vnode->vfs_inode.i_mtime = vnode->vfs_inode.i_ctime; vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime; vnode->vfs_inode.i_version = data_version; @@ -734,8 +734,8 @@ int afs_fs_create(struct afs_server *server, memset(bp, 0, padsz); bp = (void *) bp + padsz; } - *bp++ = htonl(AFS_SET_MODE); - *bp++ = 0; /* mtime */ + *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME); + *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */ *bp++ = 0; /* owner */ *bp++ = 0; /* group */ *bp++ = htonl(mode & S_IALLUGO); /* unix mode */ @@ -1003,8 +1003,8 @@ int afs_fs_symlink(struct afs_server *server, memset(bp, 0, c_padsz); bp = (void *) bp + c_padsz; } - *bp++ = htonl(AFS_SET_MODE); - *bp++ = 0; /* mtime */ + *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME); + *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */ *bp++ = 0; /* owner */ *bp++ = 0; /* group */ *bp++ = htonl(S_IRWXUGO); /* unix mode */ @@ -1203,8 +1203,8 @@ static int afs_fs_store_data64(struct afs_server *server, *bp++ = htonl(vnode->fid.vnode); *bp++ = htonl(vnode->fid.unique); - *bp++ = 0; /* mask */ - *bp++ = 0; /* mtime */ + *bp++ = htonl(AFS_SET_MTIME); /* mask */ + *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */ *bp++ = 0; /* owner */ *bp++ = 0; /* group */ *bp++ = 0; /* unix mode */ @@ -1280,8 +1280,8 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, *bp++ = htonl(vnode->fid.vnode); *bp++ = htonl(vnode->fid.unique); - *bp++ = 0; /* mask */ - *bp++ = 0; /* mtime */ + *bp++ = htonl(AFS_SET_MTIME); /* mask */ + *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */ *bp++ = 0; /* owner */ *bp++ = 0; /* group */ *bp++ = 0; /* unix mode */ diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 4079c832ff27..aae55dd15108 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -85,7 +85,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) inode->i_uid = vnode->status.owner; inode->i_gid = vnode->status.group; inode->i_size = vnode->status.size; - inode->i_ctime.tv_sec = vnode->status.mtime_server; + inode->i_ctime.tv_sec = vnode->status.mtime_client; inode->i_ctime.tv_nsec = 0; inode->i_atime = inode->i_mtime = inode->i_ctime; inode->i_blocks = 0; -- cgit v1.2.3 From 68ae849d7e674b83610bc7fdf74b21621a09b9ac Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:48 +0000 Subject: afs: Don't set PG_error on local EINTR or ENOMEM when filling a page Don't set PG_error on a page if we get local EINTR or ENOMEM when filling a page for writing. Signed-off-by: David Howells --- fs/afs/file.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/afs/file.c b/fs/afs/file.c index b5829443ff69..0d5b8508869b 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -212,7 +212,13 @@ int afs_page_filler(void *data, struct page *page) fscache_uncache_page(vnode->cache, page); #endif BUG_ON(PageFsCache(page)); - goto error; + + if (ret == -EINTR || + ret == -ENOMEM || + ret == -ERESTARTSYS || + ret == -EAGAIN) + goto error; + goto io_error; } SetPageUptodate(page); @@ -231,10 +237,12 @@ int afs_page_filler(void *data, struct page *page) _leave(" = 0"); return 0; +io_error: + SetPageError(page); + goto error; enomem: ret = -ENOMEM; error: - SetPageError(page); unlock_page(page); _leave(" = %d", ret); return ret; -- cgit v1.2.3 From 6d06b0d25209c80e99c1e89700f1e09694a3766b Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:48 +0000 Subject: afs: Fix page leak in afs_write_begin() afs_write_begin() leaks a ref and a lock on a page if afs_fill_page() fails. Fix the leak by unlocking and releasing the page in the error path. Signed-off-by: David Howells --- fs/afs/write.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/afs/write.c b/fs/afs/write.c index f1450ea09406..6e13e96c3db0 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -154,12 +154,12 @@ int afs_write_begin(struct file *file, struct address_space *mapping, kfree(candidate); return -ENOMEM; } - *pagep = page; - /* page won't leak in error case: it eventually gets cleaned off LRU */ if (!PageUptodate(page) && len != PAGE_SIZE) { ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page); if (ret < 0) { + unlock_page(page); + put_page(page); kfree(candidate); _leave(" = %d [prep]", ret); return ret; @@ -167,6 +167,9 @@ int afs_write_begin(struct file *file, struct address_space *mapping, SetPageUptodate(page); } + /* page won't leak in error case: it eventually gets cleaned off LRU */ + *pagep = page; + try_again: spin_lock(&vnode->writeback_lock); -- cgit v1.2.3 From 7286a35e893176169b09715096a4aca557e2ccd2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:48 +0000 Subject: afs: Fix afs_kill_pages() Fix afs_kill_pages() in two ways: (1) If a writeback has been partially flushed, then if we try and kill the pages it contains, some of them may no longer be undergoing writeback and end_page_writeback() will assert. Fix this by checking to see whether the page in question is actually undergoing writeback before ending that writeback. (2) The loop that scans for pages to kill doesn't increase the first page index, and so the loop may not terminate, but it will try to process the same pages over and over again. Fix this by increasing the first page index to one after the last page we processed. Signed-off-by: David Howells --- fs/afs/write.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/afs/write.c b/fs/afs/write.c index 6e13e96c3db0..134de0667898 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -321,10 +321,14 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error, ASSERTCMP(pv.nr, ==, count); for (loop = 0; loop < count; loop++) { - ClearPageUptodate(pv.pages[loop]); + struct page *page = pv.pages[loop]; + ClearPageUptodate(page); if (error) - SetPageError(pv.pages[loop]); - end_page_writeback(pv.pages[loop]); + SetPageError(page); + if (PageWriteback(page)) + end_page_writeback(page); + if (page->index >= first) + first = page->index + 1; } __pagevec_release(&pv); -- cgit v1.2.3 From 445783d0ec173a52bef2e9b129de7d716a19b9fa Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:48 +0000 Subject: afs: Fix an off-by-one error in afs_send_pages() afs_send_pages() should only put the call into the AFS_CALL_AWAIT_REPLY state if it has sent all the pages - but the check it makes is incorrect and sometimes it will finish the loop early. Signed-off-by: David Howells --- fs/afs/rxrpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 22d26b369070..b12da6aa5412 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -315,7 +315,7 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg) * packet as RxRPC might give us the reply before it * returns from sending the request. */ - if (first + nr >= last) + if (first + nr - 1 >= last) call->state = AFS_CALL_AWAIT_REPLY; ret = rxrpc_kernel_send_data(afs_socket, call->rxcall, msg, bytes); -- cgit v1.2.3 From 954cd6dc02a65065aecb7150962c0870c5b0e322 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:49 +0000 Subject: afs: Fix abort on signal while waiting for call completion Fix the way in which a call that's in progress and being waited for is aborted in the case that EINTR is detected. We should be sending RX_USER_ABORT rather than RX_CALL_DEAD as the abort code. Note that since the only two ways out of the loop are if the call completes or if a signal happens, the kill-the-call clause after the loop has finished can only happen in the case of EINTR. This means that we only have one abort case to deal with, not two, and the "KWC" case can never happen and so can be deleted. Note further that simply aborting the call isn't necessarily the best thing here since at this point: the request has been entirely sent and it's likely the server will do the operation anyway - whether we abort it or not. In future, we should punt the handling of the remainder of the call off to a background thread. Reported-by: Marc Dionne Signed-off-by: David Howells --- fs/afs/rxrpc.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index b12da6aa5412..8f76b13d5549 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -517,7 +517,6 @@ call_complete: */ static int afs_wait_for_call_to_complete(struct afs_call *call) { - const char *abort_why; int ret; DECLARE_WAITQUEUE(myself, current); @@ -536,13 +535,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) continue; } - abort_why = "KWC"; - ret = call->error; - if (call->state == AFS_CALL_COMPLETE) - break; - abort_why = "KWI"; - ret = -EINTR; - if (signal_pending(current)) + if (call->state == AFS_CALL_COMPLETE || + signal_pending(current)) break; schedule(); } @@ -550,15 +544,14 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) remove_wait_queue(&call->waitq, &myself); __set_current_state(TASK_RUNNING); - /* kill the call */ + /* Kill off the call if it's still live. */ if (call->state < AFS_CALL_COMPLETE) { - _debug("call incomplete"); + _debug("call interrupted"); rxrpc_kernel_abort_call(afs_socket, call->rxcall, - RX_CALL_DEAD, -ret, abort_why); - } else if (call->error < 0) { - ret = call->error; + RX_USER_ABORT, -EINTR, "KWI"); } + ret = call->error; _debug("call complete"); afs_put_call(call); _leave(" = %d", ret); -- cgit v1.2.3 From 65a151094edeb04e8f5f6f1502028e2383e81bb8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:49 +0000 Subject: afs: ->writepage() shouldn't call clear_page_dirty_for_io() The ->writepage() op shouldn't call clear_page_dirty_for_io() as that has already been called by the caller. Fix afs_writepage() by moving the call out of afs_write_back_from_locked_page() to afs_writepages_region() where it is needed. Signed-off-by: David Howells --- fs/afs/write.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/afs/write.c b/fs/afs/write.c index 134de0667898..e5f150bccfb5 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -231,7 +231,7 @@ flush_conflicting_wb: if (wb->state == AFS_WBACK_PENDING) wb->state = AFS_WBACK_CONFLICTING; spin_unlock(&vnode->writeback_lock); - if (PageDirty(page)) { + if (clear_page_dirty_for_io(page)) { ret = afs_write_back_from_locked_page(wb, page); if (ret < 0) { afs_put_writeback(candidate); @@ -353,8 +353,6 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb, _enter(",%lx", primary_page->index); count = 1; - if (!clear_page_dirty_for_io(primary_page)) - BUG(); if (test_set_page_writeback(primary_page)) BUG(); @@ -542,6 +540,8 @@ static int afs_writepages_region(struct address_space *mapping, wb->state = AFS_WBACK_WRITING; spin_unlock(&wb->vnode->writeback_lock); + if (!clear_page_dirty_for_io(page)) + BUG(); ret = afs_write_back_from_locked_page(wb, page); unlock_page(page); put_page(page); -- cgit v1.2.3 From c5051c7bc777dffa5661569dec5997f432b9a34a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:49 +0000 Subject: afs: Don't wait for page writeback with the page lock held Drop the page lock before waiting for page writeback. Signed-off-by: David Howells --- fs/afs/write.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/afs/write.c b/fs/afs/write.c index e5f150bccfb5..2d2fccd5044b 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -518,17 +518,16 @@ static int afs_writepages_region(struct address_space *mapping, */ lock_page(page); - if (page->mapping != mapping) { + if (page->mapping != mapping || !PageDirty(page)) { unlock_page(page); put_page(page); continue; } - if (wbc->sync_mode != WB_SYNC_NONE) - wait_on_page_writeback(page); - - if (PageWriteback(page) || !PageDirty(page)) { + if (PageWriteback(page)) { unlock_page(page); + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); put_page(page); continue; } -- cgit v1.2.3 From e4c5d3762e2d6d274bd1cc948c47063becfa2103 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 27 Feb 2017 18:44:45 +0200 Subject: nvme-loop: fix a possible use-after-free when destroying the admin queue we need to destroy the nvmet sq and let it finish gracefully before continue to cleanup the queue. Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/target/loop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index d1f06e7768ff..74e04a0855d4 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -288,9 +288,9 @@ static struct blk_mq_ops nvme_loop_admin_mq_ops = { static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl) { + nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); blk_cleanup_queue(ctrl->ctrl.admin_q); blk_mq_free_tag_set(&ctrl->admin_tag_set); - nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); } static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl) -- cgit v1.2.3 From d11ea004a458b982e19b188c386e25a9b66ec446 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 6 Mar 2017 18:46:20 +0200 Subject: nvmet: confirm sq percpu has scheduled and switched to atomic percpu_ref_kill is not enough to prevent subsequent percpu_ref_tryget_live from failing. Hence call perfcpu_ref_kill_confirm to make it safe. Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/target/core.c | 11 ++++++++++- drivers/nvme/target/nvmet.h | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 11b0a0a5f661..798653b329b2 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -425,6 +425,13 @@ void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, ctrl->sqs[qid] = sq; } +static void nvmet_confirm_sq(struct percpu_ref *ref) +{ + struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref); + + complete(&sq->confirm_done); +} + void nvmet_sq_destroy(struct nvmet_sq *sq) { /* @@ -433,7 +440,8 @@ void nvmet_sq_destroy(struct nvmet_sq *sq) */ if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq) nvmet_async_events_free(sq->ctrl); - percpu_ref_kill(&sq->ref); + percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); + wait_for_completion(&sq->confirm_done); wait_for_completion(&sq->free_done); percpu_ref_exit(&sq->ref); @@ -461,6 +469,7 @@ int nvmet_sq_init(struct nvmet_sq *sq) return ret; } init_completion(&sq->free_done); + init_completion(&sq->confirm_done); return 0; } diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 1370eee0a3c0..f7ff15f17ca9 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -73,6 +73,7 @@ struct nvmet_sq { u16 qid; u16 size; struct completion free_done; + struct completion confirm_done; }; /** -- cgit v1.2.3 From b25634e2a051bef4b2524b11adddfbfa6448f6cd Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 9 Mar 2017 13:45:52 +0200 Subject: nvmet-rdma: Fix a possible uninitialized variable dereference When handling a new recv command, we grab a new rsp resource and check for the queue state being live. In case the queue is not in live state, we simply restore the rsp back to the free list. However in this flow we didn't set rsp->queue yet, so we cannot dereference it. Instead, make sure to initialize rsp->queue (and other rsp members) as soon as possible so we won't reference uninitialized variables. Reported-by: Yi Zhang Reported-by: Raju Rangoju Reviewed-by: Christoph Hellwig Tested-by: Raju Rangoju Signed-off-by: Sagi Grimberg --- drivers/nvme/target/rdma.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 9aa1da3778b3..ecc4fe862561 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -703,11 +703,6 @@ static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue, { u16 status; - cmd->queue = queue; - cmd->n_rdma = 0; - cmd->req.port = queue->port; - - ib_dma_sync_single_for_cpu(queue->dev->device, cmd->cmd->sge[0].addr, cmd->cmd->sge[0].length, DMA_FROM_DEVICE); @@ -760,9 +755,12 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) cmd->queue = queue; rsp = nvmet_rdma_get_rsp(queue); + rsp->queue = queue; rsp->cmd = cmd; rsp->flags = 0; rsp->req.cmd = cmd->nvme_cmd; + rsp->req.port = queue->port; + rsp->n_rdma = 0; if (unlikely(queue->state != NVMET_RDMA_Q_LIVE)) { unsigned long flags; -- cgit v1.2.3 From b334e19ae9381f12a7521976883022385d2b7eef Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 13 Jan 2017 16:40:01 +0100 Subject: Kbuild: use cc-disable-warning consistently for maybe-uninitialized In commit a76bcf557ef4 ("Kbuild: enable -Wmaybe-uninitialized warning for "make W=1""), I reverted another change that happened to fix a problem with old compilers, and now we get this report again with old compilers (prior to gcc-4.8) and GCOV enabled: cc1: warnings being treated as errors drivers/gpu/drm/i915/intel_ringbuffer.c: In function 'intel_ring_setup_status_page': drivers/gpu/drm/i915/intel_ringbuffer.c:438: error: 'mmio.reg' may be used uninitialized in this function At top level: >> cc1: error: unrecognized command line option "-Wno-maybe-uninitialized" The problem is that we turn off the warning conditionally in a number of places as we should, but one of them does it unconditionally. Instead, change it to call cc-disable-warning as we do elsewhere. The original patch that caused it was merged into linux-4.7, then 4.8 removed the change and 4.9 brought it back, so we probably want a backport to 4.9 once this is merged. Use a ':=' assignment instead of '=' to force the cc-disable-warning call to only be evaluated once instead of every time. Cc: stable@vger.kernel.org Fixes: a76bcf557ef4 ("Kbuild: enable -Wmaybe-uninitialized warning for "make W=1"") Fixes: e72e2dfe7c16 ("gcov: disable -Wmaybe-uninitialized warning") Reported-by: kbuild test robot Signed-off-by: Arnd Bergmann Signed-off-by: Masahiro Yamada --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 165cf9783a5d..b8a6b862ed73 100644 --- a/Makefile +++ b/Makefile @@ -372,7 +372,7 @@ LDFLAGS_MODULE = CFLAGS_KERNEL = AFLAGS_KERNEL = LDFLAGS_vmlinux = -CFLAGS_GCOV = -fprofile-arcs -ftest-coverage -fno-tree-loop-im -Wno-maybe-uninitialized +CFLAGS_GCOV := -fprofile-arcs -ftest-coverage -fno-tree-loop-im $(call cc-disable-warning,maybe-uninitialized,) CFLAGS_KCOV := $(call cc-option,-fsanitize-coverage=trace-pc,) -- cgit v1.2.3 From 8f3dbfd79ed9ef9770305a7cc4e13dfd31ad2cd0 Mon Sep 17 00:00:00 2001 From: Kris Murphy Date: Thu, 16 Mar 2017 10:51:28 -0500 Subject: openvswitch: Add missing case OVS_TUNNEL_KEY_ATTR_PAD Added a case for OVS_TUNNEL_KEY_ATTR_PAD to the switch statement in ip_tun_from_nlattr in order to prevent the default case returning an error. Fixes: b46f6ded906e ("libnl: nla_put_be64(): align on a 64-bit area") Signed-off-by: Kris Murphy Acked-by: Joe Stringer Signed-off-by: David S. Miller --- net/openvswitch/flow_netlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index a08ff834676b..1105a838bab8 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -665,6 +665,8 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, tun_flags |= TUNNEL_VXLAN_OPT; opts_type = type; break; + case OVS_TUNNEL_KEY_ATTR_PAD: + break; default: OVS_NLERR(log, "Unknown IP tunnel attribute %d", type); -- cgit v1.2.3 From 59cf8bed44a79ec42303151dd014fdb6434254bb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 16 Mar 2017 11:34:02 -0700 Subject: Input: iforce - validate number of endpoints before using them Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer or accessing memory that lie beyond the end of the endpoint array should a malicious device lack the expected endpoints. Signed-off-by: Johan Hovold Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/iforce/iforce-usb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/joystick/iforce/iforce-usb.c b/drivers/input/joystick/iforce/iforce-usb.c index d96aa27dfcdc..db64adfbe1af 100644 --- a/drivers/input/joystick/iforce/iforce-usb.c +++ b/drivers/input/joystick/iforce/iforce-usb.c @@ -141,6 +141,9 @@ static int iforce_usb_probe(struct usb_interface *intf, interface = intf->cur_altsetting; + if (interface->desc.bNumEndpoints < 2) + return -ENODEV; + epirq = &interface->endpoint[0].desc; epout = &interface->endpoint[1].desc; -- cgit v1.2.3 From ac2ee9ba953afe88f7a673e1c0c839227b1d7891 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 16 Mar 2017 11:35:12 -0700 Subject: Input: cm109 - validate number of endpoints before using them Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer should a malicious device lack endpoints. Fixes: c04148f915e5 ("Input: add driver for USB VoIP phones with CM109...") Signed-off-by: Johan Hovold Cc: stable@vger.kernel.org # 2.6.28 Signed-off-by: Dmitry Torokhov --- drivers/input/misc/cm109.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/input/misc/cm109.c b/drivers/input/misc/cm109.c index 9cc6d057c302..23c191a2a071 100644 --- a/drivers/input/misc/cm109.c +++ b/drivers/input/misc/cm109.c @@ -700,6 +700,10 @@ static int cm109_usb_probe(struct usb_interface *intf, int error = -ENOMEM; interface = intf->cur_altsetting; + + if (interface->desc.bNumEndpoints < 1) + return -ENODEV; + endpoint = &interface->endpoint[0].desc; if (!usb_endpoint_is_int_in(endpoint)) -- cgit v1.2.3 From 1916d319271664241b7aa0cd2b05e32bdb310ce9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 16 Mar 2017 11:36:13 -0700 Subject: Input: ims-pcu - validate number of endpoints before using them Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer should a malicious device lack control-interface endpoints. Fixes: 628329d52474 ("Input: add IMS Passenger Control Unit driver") Signed-off-by: Johan Hovold Cc: stable@vger.kernel.org # 3.10 Signed-off-by: Dmitry Torokhov --- drivers/input/misc/ims-pcu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/input/misc/ims-pcu.c b/drivers/input/misc/ims-pcu.c index 9c0ea36913b4..f4e8fbec6a94 100644 --- a/drivers/input/misc/ims-pcu.c +++ b/drivers/input/misc/ims-pcu.c @@ -1667,6 +1667,10 @@ static int ims_pcu_parse_cdc_data(struct usb_interface *intf, struct ims_pcu *pc return -EINVAL; alt = pcu->ctrl_intf->cur_altsetting; + + if (alt->desc.bNumEndpoints < 1) + return -ENODEV; + pcu->ep_ctrl = &alt->endpoint[0].desc; pcu->max_ctrl_size = usb_endpoint_maxp(pcu->ep_ctrl); -- cgit v1.2.3 From 5cc4a1a9f5c179795c8a1f2b0f4361829d6a070e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 16 Mar 2017 11:37:01 -0700 Subject: Input: yealink - validate number of endpoints before using them Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer should a malicious device lack endpoints. Fixes: aca951a22a1d ("[PATCH] input-driver-yealink-P1K-usb-phone") Signed-off-by: Johan Hovold Cc: stable@vger.kernel.org # 2.6.14 Signed-off-by: Dmitry Torokhov --- drivers/input/misc/yealink.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/input/misc/yealink.c b/drivers/input/misc/yealink.c index 79c964c075f1..6e7ff9561d92 100644 --- a/drivers/input/misc/yealink.c +++ b/drivers/input/misc/yealink.c @@ -875,6 +875,10 @@ static int usb_probe(struct usb_interface *intf, const struct usb_device_id *id) int ret, pipe, i; interface = intf->cur_altsetting; + + if (interface->desc.bNumEndpoints < 1) + return -ENODEV; + endpoint = &interface->endpoint[0].desc; if (!usb_endpoint_is_int_in(endpoint)) return -ENODEV; -- cgit v1.2.3 From ba340d7b83703768ce566f53f857543359aa1b98 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 16 Mar 2017 11:39:29 -0700 Subject: Input: hanwang - validate number of endpoints before using them Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer should a malicious device lack endpoints. Fixes: bba5394ad3bd ("Input: add support for Hanwang tablets") Signed-off-by: Johan Hovold Cc: stable@vger.kernel.org # 2.6.37 Signed-off-by: Dmitry Torokhov --- drivers/input/tablet/hanwang.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/tablet/hanwang.c b/drivers/input/tablet/hanwang.c index cd852059b99e..df4bea96d7ed 100644 --- a/drivers/input/tablet/hanwang.c +++ b/drivers/input/tablet/hanwang.c @@ -340,6 +340,9 @@ static int hanwang_probe(struct usb_interface *intf, const struct usb_device_id int error; int i; + if (intf->cur_altsetting->desc.bNumEndpoints < 1) + return -ENODEV; + hanwang = kzalloc(sizeof(struct hanwang), GFP_KERNEL); input_dev = input_allocate_device(); if (!hanwang || !input_dev) { -- cgit v1.2.3 From cb1b494663e037253337623bf1ef2df727883cb7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 16 Mar 2017 11:41:55 -0700 Subject: Input: kbtab - validate number of endpoints before using them Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer should a malicious device lack endpoints. Signed-off-by: Johan Hovold Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/tablet/kbtab.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/tablet/kbtab.c b/drivers/input/tablet/kbtab.c index e850d7e8afbc..4d9d64908b59 100644 --- a/drivers/input/tablet/kbtab.c +++ b/drivers/input/tablet/kbtab.c @@ -122,6 +122,9 @@ static int kbtab_probe(struct usb_interface *intf, const struct usb_device_id *i struct input_dev *input_dev; int error = -ENOMEM; + if (intf->cur_altsetting->desc.bNumEndpoints < 1) + return -ENODEV; + kbtab = kzalloc(sizeof(struct kbtab), GFP_KERNEL); input_dev = input_allocate_device(); if (!kbtab || !input_dev) -- cgit v1.2.3 From 92461f5d723037530c1f36cce93640770037812c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 16 Mar 2017 11:43:09 -0700 Subject: Input: sur40 - validate number of endpoints before using them Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer or accessing memory that lie beyond the end of the endpoint array should a malicious device lack the expected endpoints. Fixes: bdb5c57f209c ("Input: add sur40 driver for Samsung SUR40... ") Signed-off-by: Johan Hovold Cc: stable@vger.kernel.org # 3.13 Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/sur40.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/touchscreen/sur40.c b/drivers/input/touchscreen/sur40.c index aefb6e11f88a..4c0eecae065c 100644 --- a/drivers/input/touchscreen/sur40.c +++ b/drivers/input/touchscreen/sur40.c @@ -527,6 +527,9 @@ static int sur40_probe(struct usb_interface *interface, if (iface_desc->desc.bInterfaceClass != 0xFF) return -ENODEV; + if (iface_desc->desc.bNumEndpoints < 5) + return -ENODEV; + /* Use endpoint #4 (0x86). */ endpoint = &iface_desc->endpoint[4].desc; if (endpoint->bEndpointAddress != TOUCH_ENDPOINT) -- cgit v1.2.3 From acfa28b3649ec07775efaac0c00de2db39d71634 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Wed, 1 Mar 2017 18:02:28 -0500 Subject: ARM: dts: NSP: GPIO reboot open-source The libgpio code pre-sets the GPIO values for the gpio-reset in the device tree. This results in the device being reset during bringup. To prevent this pre-setting, use the "open-source" flag in the device tree. Signed-off-by: Jon Mason Fixes: b1aaf88 ("ARM: dts: NSP: Add GPIO reboot method to bcm958625hr DTS file") Fixes: 10baed1 ("ARM: dts: NSP: Add GPIO reboot method to bcm958625xmc DTS file") Fixes: 088e3148 ("ARM: dts: NSP: Add new DT file for bcm958522er") Fixes: e3227c1 ("ARM: dts: NSP: Add new DT file for bcm958525er") Fixes: 2f8bc00 ("ARM: dts: NSP: Add new DT file for bcm958622hr") Fixes: d454c37 ("ARM: dts: NSP: Add new DT file for bcm958623hr") Fixes: f27eacf ("ARM: dts: NSP: Add new DT file for bcm988312hr") Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/bcm958522er.dts | 1 + arch/arm/boot/dts/bcm958525er.dts | 1 + arch/arm/boot/dts/bcm958525xmc.dts | 1 + arch/arm/boot/dts/bcm958622hr.dts | 1 + arch/arm/boot/dts/bcm958623hr.dts | 1 + arch/arm/boot/dts/bcm958625hr.dts | 1 + arch/arm/boot/dts/bcm988312hr.dts | 1 + 7 files changed, 7 insertions(+) diff --git a/arch/arm/boot/dts/bcm958522er.dts b/arch/arm/boot/dts/bcm958522er.dts index 3f04a40eb90c..df05e7f568af 100644 --- a/arch/arm/boot/dts/bcm958522er.dts +++ b/arch/arm/boot/dts/bcm958522er.dts @@ -55,6 +55,7 @@ gpio-restart { compatible = "gpio-restart"; gpios = <&gpioa 15 GPIO_ACTIVE_LOW>; + open-source; priority = <200>; }; }; diff --git a/arch/arm/boot/dts/bcm958525er.dts b/arch/arm/boot/dts/bcm958525er.dts index 9fd542200d3d..4a3ab19c6281 100644 --- a/arch/arm/boot/dts/bcm958525er.dts +++ b/arch/arm/boot/dts/bcm958525er.dts @@ -55,6 +55,7 @@ gpio-restart { compatible = "gpio-restart"; gpios = <&gpioa 15 GPIO_ACTIVE_LOW>; + open-source; priority = <200>; }; }; diff --git a/arch/arm/boot/dts/bcm958525xmc.dts b/arch/arm/boot/dts/bcm958525xmc.dts index 41e7fd350fcd..81f78435d8c7 100644 --- a/arch/arm/boot/dts/bcm958525xmc.dts +++ b/arch/arm/boot/dts/bcm958525xmc.dts @@ -55,6 +55,7 @@ gpio-restart { compatible = "gpio-restart"; gpios = <&gpioa 31 GPIO_ACTIVE_LOW>; + open-source; priority = <200>; }; }; diff --git a/arch/arm/boot/dts/bcm958622hr.dts b/arch/arm/boot/dts/bcm958622hr.dts index 477c4860db52..c88b8fefcb2f 100644 --- a/arch/arm/boot/dts/bcm958622hr.dts +++ b/arch/arm/boot/dts/bcm958622hr.dts @@ -55,6 +55,7 @@ gpio-restart { compatible = "gpio-restart"; gpios = <&gpioa 15 GPIO_ACTIVE_LOW>; + open-source; priority = <200>; }; }; diff --git a/arch/arm/boot/dts/bcm958623hr.dts b/arch/arm/boot/dts/bcm958623hr.dts index c0a499d5ba44..d503fa0dde31 100644 --- a/arch/arm/boot/dts/bcm958623hr.dts +++ b/arch/arm/boot/dts/bcm958623hr.dts @@ -55,6 +55,7 @@ gpio-restart { compatible = "gpio-restart"; gpios = <&gpioa 15 GPIO_ACTIVE_LOW>; + open-source; priority = <200>; }; }; diff --git a/arch/arm/boot/dts/bcm958625hr.dts b/arch/arm/boot/dts/bcm958625hr.dts index f7eb5854a224..cc0363b843c1 100644 --- a/arch/arm/boot/dts/bcm958625hr.dts +++ b/arch/arm/boot/dts/bcm958625hr.dts @@ -55,6 +55,7 @@ gpio-restart { compatible = "gpio-restart"; gpios = <&gpioa 15 GPIO_ACTIVE_LOW>; + open-source; priority = <200>; }; }; diff --git a/arch/arm/boot/dts/bcm988312hr.dts b/arch/arm/boot/dts/bcm988312hr.dts index 16666324fda8..74e15a3cd9f8 100644 --- a/arch/arm/boot/dts/bcm988312hr.dts +++ b/arch/arm/boot/dts/bcm988312hr.dts @@ -55,6 +55,7 @@ gpio-restart { compatible = "gpio-restart"; gpios = <&gpioa 15 GPIO_ACTIVE_LOW>; + open-source; priority = <200>; }; }; -- cgit v1.2.3 From bf33f87dd04c371ea33feb821b60d63d754e3124 Mon Sep 17 00:00:00 2001 From: peter chang Date: Wed, 15 Feb 2017 14:11:54 -0800 Subject: scsi: sg: check length passed to SG_NEXT_CMD_LEN The user can control the size of the next command passed along, but the value passed to the ioctl isn't checked against the usable max command size. Cc: Signed-off-by: Peter Chang Acked-by: Douglas Gilbert Signed-off-by: Martin K. Petersen --- drivers/scsi/sg.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index e831e01f9fa6..849ff8104be2 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -996,6 +996,8 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) result = get_user(val, ip); if (result) return result; + if (val > SG_MAX_CDB_SIZE) + return -ENOMEM; sfp->next_cmd_len = (val > 0) ? val : 0; return 0; case SG_GET_VERSION_NUM: -- cgit v1.2.3 From 271df90e4e530c17f237b27034d6341cb2c2f536 Mon Sep 17 00:00:00 2001 From: Vitaly Wool Date: Thu, 16 Mar 2017 16:40:19 -0700 Subject: z3fold: fix spinlock unlocking in page reclaim Commmit 5a27aa822029 ("z3fold: add kref refcounting") introduced a bug in z3fold_reclaim_page() with function exit that may leave pool->lock spinlock held. Here comes the trivial fix. Fixes: 5a27aa822029 ("z3fold: add kref refcounting") Link: http://lkml.kernel.org/r/20170311222239.7b83d8e7ef1914e05497649f@gmail.com Reported-by: Alexey Khoroshilov Signed-off-by: Vitaly Wool Cc: Dan Streetman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/z3fold.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/z3fold.c b/mm/z3fold.c index 8970a2fd3b1a..f9492bccfd79 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -667,6 +667,7 @@ next: z3fold_page_unlock(zhdr); spin_lock(&pool->lock); if (kref_put(&zhdr->refcount, release_z3fold_page)) { + spin_unlock(&pool->lock); atomic64_dec(&pool->pages_nr); return 0; } -- cgit v1.2.3 From 5be9b730b09c45c358bbfe7f51d254e306cccc07 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 16 Mar 2017 16:40:21 -0700 Subject: kasan: add a prototype of task_struct to avoid warning Add a prototype of task_struct to fix below warning on arm64. In file included from arch/arm64/kernel/probes/kprobes.c:19:0: include/linux/kasan.h:81:132: error: 'struct task_struct' declared inside parameter list will not be visible outside of this definition or declaration [-Werror] static inline void kasan_unpoison_task_stack(struct task_struct *task) {} As same as other types (kmem_cache, page, and vm_struct) this adds a prototype of task_struct data structure on top of kasan.h. [arnd] A related warning was fixed before, but now appears in a different line in the same file in v4.11-rc2. The patch from Masami Hiramatsu still seems appropriate, so let's take his version. Fixes: 71af2ed5eeea ("kasan, sched/headers: Remove from ") Link: https://patchwork.kernel.org/patch/9569839/ Link: http://lkml.kernel.org/r/20170313141517.3397802-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Signed-off-by: Masami Hiramatsu Acked-by: Alexander Potapenko Acked-by: Andrey Ryabinin Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 1c823bef4c15..5734480c9590 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -6,6 +6,7 @@ struct kmem_cache; struct page; struct vm_struct; +struct task_struct; #ifdef CONFIG_KASAN -- cgit v1.2.3 From d0f33ac9ae7b2a727fb678235ae37baf1d0608d5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 16 Mar 2017 16:40:24 -0700 Subject: mm, x86: fix native_pud_clear build error We still get a build error in random configurations, after this has been modified a few times: In file included from include/linux/mm.h:68:0, from include/linux/suspend.h:8, from arch/x86/kernel/asm-offsets.c:12: arch/x86/include/asm/pgtable.h:66:26: error: redefinition of 'native_pud_clear' #define pud_clear(pud) native_pud_clear(pud) My interpretation is that the build error comes from a typo in __PAGETABLE_PUD_FOLDED, so fix that typo now, and remove the incorrect #ifdef around the native_pud_clear definition. Fixes: 3e761a42e19c ("mm, x86: fix HIGHMEM64 && PARAVIRT build config for native_pud_clear()") Fixes: a00cc7d9dd93 ("mm, x86: add support for PUD-sized transparent hugepages") Link: http://lkml.kernel.org/r/20170314121330.182155-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Ackedy-by: Dave Jiang Cc: Matthew Wilcox Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Garnier Cc: Kees Cook Cc: Dave Hansen Cc: Hugh Dickins Cc: Borislav Petkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/pgtable-3level.h | 3 --- arch/x86/include/asm/pgtable.h | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 72277b1028a5..50d35e3185f5 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -121,12 +121,9 @@ static inline void native_pmd_clear(pmd_t *pmd) *(tmp + 1) = 0; } -#if !defined(CONFIG_SMP) || (defined(CONFIG_HIGHMEM64G) && \ - defined(CONFIG_PARAVIRT)) static inline void native_pud_clear(pud_t *pudp) { } -#endif static inline void pud_clear(pud_t *pudp) { diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 1cfb36b8c024..585ee0d42d18 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -62,7 +62,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page); # define set_pud(pudp, pud) native_set_pud(pudp, pud) #endif -#ifndef __PAGETABLE_PMD_FOLDED +#ifndef __PAGETABLE_PUD_FOLDED #define pud_clear(pud) native_pud_clear(pud) #endif -- cgit v1.2.3 From 171012f561274784160f666f8398af8b42216e1f Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Thu, 16 Mar 2017 16:40:27 -0700 Subject: mm: don't warn when vmalloc() fails due to a fatal signal When vmalloc() fails it prints a very lengthy message with all the details about memory consumption assuming that it happened due to OOM. However, vmalloc() can also fail due to fatal signal pending. In such case the message is quite confusing because it suggests that it is OOM but the numbers suggest otherwise. The messages can also pollute console considerably. Don't warn when vmalloc() fails due to fatal signal pending. Link: http://lkml.kernel.org/r/20170313114425.72724-1-dvyukov@google.com Signed-off-by: Dmitry Vyukov Reviewed-by: Matthew Wilcox Acked-by: Kirill A. Shutemov Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 0dd80222b20b..0b057628a7ba 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1683,7 +1683,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, if (fatal_signal_pending(current)) { area->nr_pages = i; - goto fail; + goto fail_no_warn; } if (node == NUMA_NO_NODE) @@ -1709,6 +1709,7 @@ fail: warn_alloc(gfp_mask, NULL, "vmalloc: allocation failure, allocated %ld of %ld bytes", (area->nr_pages*PAGE_SIZE), area->size); +fail_no_warn: vfree(area->addr); return NULL; } -- cgit v1.2.3 From 55adc1d05dca9e949cdf46c747cb1e91c0e9143d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 16 Mar 2017 16:40:30 -0700 Subject: mm: add private lock to serialize memory hotplug operations Commit bfc8c90139eb ("mem-hotplug: implement get/put_online_mems") introduced new functions get/put_online_mems() and mem_hotplug_begin/end() in order to allow similar semantics for memory hotplug like for cpu hotplug. The corresponding functions for cpu hotplug are get/put_online_cpus() and cpu_hotplug_begin/done() for cpu hotplug. The commit however missed to introduce functions that would serialize memory hotplug operations like they are done for cpu hotplug with cpu_maps_update_begin/done(). This basically leaves mem_hotplug.active_writer unprotected and allows concurrent writers to modify it, which may lead to problems as outlined by commit f931ab479dd2 ("mm: fix devm_memremap_pages crash, use mem_hotplug_{begin, done}"). That commit was extended again with commit b5d24fda9c3d ("mm, devm_memremap_pages: hold device_hotplug lock over mem_hotplug_{begin, done}") which serializes memory hotplug operations for some call sites by using the device_hotplug lock. In addition with commit 3fc21924100b ("mm: validate device_hotplug is held for memory hotplug") a sanity check was added to mem_hotplug_begin() to verify that the device_hotplug lock is held. This in turn triggers the following warning on s390: WARNING: CPU: 6 PID: 1 at drivers/base/core.c:643 assert_held_device_hotplug+0x4a/0x58 Call Trace: assert_held_device_hotplug+0x40/0x58) mem_hotplug_begin+0x34/0xc8 add_memory_resource+0x7e/0x1f8 add_memory+0xda/0x130 add_memory_merged+0x15c/0x178 sclp_detect_standby_memory+0x2ae/0x2f8 do_one_initcall+0xa2/0x150 kernel_init_freeable+0x228/0x2d8 kernel_init+0x2a/0x140 kernel_thread_starter+0x6/0xc One possible fix would be to add more lock_device_hotplug() and unlock_device_hotplug() calls around each call site of mem_hotplug_begin/end(). But that would give the device_hotplug lock additional semantics it better should not have (serialize memory hotplug operations). Instead add a new memory_add_remove_lock which has the similar semantics like cpu_add_remove_lock for cpu hotplug. To keep things hopefully a bit easier the lock will be locked and unlocked within the mem_hotplug_begin/end() functions. Link: http://lkml.kernel.org/r/20170314125226.16779-2-heiko.carstens@de.ibm.com Signed-off-by: Heiko Carstens Reported-by: Sebastian Ott Acked-by: Dan Williams Acked-by: Rafael J. Wysocki Cc: Michal Hocko Cc: Vladimir Davydov Cc: Ben Hutchings Cc: Gerald Schaefer Cc: Martin Schwidefsky Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/memremap.c | 4 ---- mm/memory_hotplug.c | 6 +++++- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/memremap.c b/kernel/memremap.c index 06123234f118..07e85e5229da 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -247,11 +247,9 @@ static void devm_memremap_pages_release(struct device *dev, void *data) align_start = res->start & ~(SECTION_SIZE - 1); align_size = ALIGN(resource_size(res), SECTION_SIZE); - lock_device_hotplug(); mem_hotplug_begin(); arch_remove_memory(align_start, align_size); mem_hotplug_done(); - unlock_device_hotplug(); untrack_pfn(NULL, PHYS_PFN(align_start), align_size); pgmap_radix_release(res); @@ -364,11 +362,9 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, if (error) goto err_pfn_remap; - lock_device_hotplug(); mem_hotplug_begin(); error = arch_add_memory(nid, align_start, align_size, true); mem_hotplug_done(); - unlock_device_hotplug(); if (error) goto err_add_memory; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 295479b792ec..6fa7208bcd56 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -125,9 +125,12 @@ void put_online_mems(void) } +/* Serializes write accesses to mem_hotplug.active_writer. */ +static DEFINE_MUTEX(memory_add_remove_lock); + void mem_hotplug_begin(void) { - assert_held_device_hotplug(); + mutex_lock(&memory_add_remove_lock); mem_hotplug.active_writer = current; @@ -147,6 +150,7 @@ void mem_hotplug_done(void) mem_hotplug.active_writer = NULL; mutex_unlock(&mem_hotplug.lock); memhp_lock_release(); + mutex_unlock(&memory_add_remove_lock); } /* add this memory to iomem resource */ -- cgit v1.2.3 From 15c9e10d9ad4d41d076148bbff1de7f659f68852 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 16 Mar 2017 16:40:33 -0700 Subject: drivers core: remove assert_held_device_hotplug() The last caller of assert_held_device_hotplug() is gone, so remove it again. Link: http://lkml.kernel.org/r/20170314125226.16779-3-heiko.carstens@de.ibm.com Signed-off-by: Heiko Carstens Acked-by: Dan Williams Cc: Michal Hocko Cc: "Rafael J. Wysocki" Cc: Vladimir Davydov Cc: Ben Hutchings Cc: Gerald Schaefer Cc: Martin Schwidefsky Cc: Sebastian Ott Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/core.c | 5 ----- include/linux/device.h | 1 - 2 files changed, 6 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 684bda4d14a1..6bb60fb6a30b 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -639,11 +639,6 @@ int lock_device_hotplug_sysfs(void) return restart_syscall(); } -void assert_held_device_hotplug(void) -{ - lockdep_assert_held(&device_hotplug_lock); -} - #ifdef CONFIG_BLOCK static inline int device_is_not_partition(struct device *dev) { diff --git a/include/linux/device.h b/include/linux/device.h index 30c4570e928d..9ef518af5515 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1140,7 +1140,6 @@ static inline bool device_supports_offline(struct device *dev) extern void lock_device_hotplug(void); extern void unlock_device_hotplug(void); extern int lock_device_hotplug_sysfs(void); -void assert_held_device_hotplug(void); extern int device_offline(struct device *dev); extern int device_online(struct device *dev); extern void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode); -- cgit v1.2.3 From 966fa72a716ceafc69de901a31f7cc1f52b35f81 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Tue, 14 Mar 2017 08:17:00 +0530 Subject: kernfs: Check KERNFS_HAS_RELEASE before calling kernfs_release_file() Recently started seeing a kernel oops when a module tries removing a memory mapped sysfs bin_attribute. On closer investigation the root cause seems to be kernfs_release_file() trying to call kernfs_op.release() callback that's NULL for such sysfs bin_attributes. The oops occurs when kernfs_release_file() is called from kernfs_drain_open_files() to cleanup any open handles with active memory mappings. The patch fixes this by checking for flag KERNFS_HAS_RELEASE before calling kernfs_release_file() in function kernfs_drain_open_files(). On ppc64-le arch with cxl module the oops back-trace is of the form below: [ 861.381126] Unable to handle kernel paging request for instruction fetch [ 861.381360] Faulting instruction address: 0x00000000 [ 861.381428] Oops: Kernel access of bad area, sig: 11 [#1] .... [ 861.382481] NIP: 0000000000000000 LR: c000000000362c60 CTR: 0000000000000000 .... Call Trace: [c000000f1680b750] [c000000000362c34] kernfs_drain_open_files+0x104/0x1d0 (unreliable) [c000000f1680b790] [c00000000035fa00] __kernfs_remove+0x260/0x2c0 [c000000f1680b820] [c000000000360da0] kernfs_remove_by_name_ns+0x60/0xe0 [c000000f1680b8b0] [c0000000003638f4] sysfs_remove_bin_file+0x24/0x40 [c000000f1680b8d0] [c00000000062a164] device_remove_bin_file+0x24/0x40 [c000000f1680b8f0] [d000000009b7b22c] cxl_sysfs_afu_remove+0x144/0x170 [cxl] [c000000f1680b940] [d000000009b7c7e4] cxl_remove+0x6c/0x1a0 [cxl] [c000000f1680b990] [c00000000052f694] pci_device_remove+0x64/0x110 [c000000f1680b9d0] [c0000000006321d4] device_release_driver_internal+0x1f4/0x2b0 [c000000f1680ba20] [c000000000525cb0] pci_stop_bus_device+0xa0/0xd0 [c000000f1680ba60] [c000000000525e80] pci_stop_and_remove_bus_device+0x20/0x40 [c000000f1680ba90] [c00000000004a6c4] pci_hp_remove_devices+0x84/0xc0 [c000000f1680bad0] [c00000000004a688] pci_hp_remove_devices+0x48/0xc0 [c000000f1680bb10] [c0000000009dfda4] eeh_reset_device+0xb0/0x290 [c000000f1680bbb0] [c000000000032b4c] eeh_handle_normal_event+0x47c/0x530 [c000000f1680bc60] [c000000000032e64] eeh_handle_event+0x174/0x350 [c000000f1680bd10] [c000000000033228] eeh_event_handler+0x1e8/0x1f0 [c000000f1680bdc0] [c0000000000d384c] kthread+0x14c/0x190 [c000000f1680be30] [c00000000000b5a0] ret_from_kernel_thread+0x5c/0xbc Fixes: f83f3c515654 ("kernfs: fix locking around kernfs_ops->release() callback") Signed-off-by: Vaibhav Jain Acked-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 8e4dc7ab584c..ac2dfe0c5a9c 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -809,7 +809,8 @@ void kernfs_drain_open_files(struct kernfs_node *kn) if (kn->flags & KERNFS_HAS_MMAP) unmap_mapping_range(inode->i_mapping, 0, 0, 1); - kernfs_release_file(kn, of); + if (kn->flags & KERNFS_HAS_RELEASE) + kernfs_release_file(kn, of); } mutex_unlock(&kernfs_open_file_mutex); -- cgit v1.2.3 From 4cbe4dac82e423ecc9a0ba46af24a860853259f4 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 13 Mar 2017 19:29:08 +0200 Subject: net/mlx4_core: Avoid delays during VF driver device shutdown Some Hypervisors detach VFs from VMs by instantly causing an FLR event to be generated for a VF. In the mlx4 case, this will cause that VF's comm channel to be disabled before the VM has an opportunity to invoke the VF device's "shutdown" method. For such Hypervisors, there is a race condition between the VF's shutdown method and its internal-error detection/reset thread. The internal-error detection/reset thread (which runs every 5 seconds) also detects a disabled comm channel. If the internal-error detection/reset flow wins the race, we still get delays (while that flow tries repeatedly to detect comm-channel recovery). The cited commit fixed the command timeout problem when the internal-error detection/reset flow loses the race. This commit avoids the unneeded delays when the internal-error detection/reset flow wins. Fixes: d585df1c5ccf ("net/mlx4_core: Avoid command timeouts during VF driver device shutdown") Signed-off-by: Jack Morgenstein Reported-by: Simon Xiao Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 11 +++++++++++ drivers/net/ethernet/mellanox/mlx4/main.c | 11 +++++++++++ include/linux/mlx4/device.h | 1 + 3 files changed, 23 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index e8c105164931..0e0fa7030565 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2305,6 +2305,17 @@ static int sync_toggles(struct mlx4_dev *dev) rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)); if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) { /* PCI might be offline */ + + /* If device removal has been requested, + * do not continue retrying. + */ + if (dev->persist->interface_state & + MLX4_INTERFACE_STATE_NOWAIT) { + mlx4_warn(dev, + "communication channel is offline\n"); + return -EIO; + } + msleep(100); wr_toggle = swab32(readl(&priv->mfunc.comm-> slave_write)); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 21377c315083..703205475524 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1940,6 +1940,14 @@ static int mlx4_comm_check_offline(struct mlx4_dev *dev) (u32)(1 << COMM_CHAN_OFFLINE_OFFSET)); if (!offline_bit) return 0; + + /* If device removal has been requested, + * do not continue retrying. + */ + if (dev->persist->interface_state & + MLX4_INTERFACE_STATE_NOWAIT) + break; + /* There are cases as part of AER/Reset flow that PF needs * around 100 msec to load. We therefore sleep for 100 msec * to allow other tasks to make use of that CPU during this @@ -3955,6 +3963,9 @@ static void mlx4_remove_one(struct pci_dev *pdev) struct devlink *devlink = priv_to_devlink(priv); int active_vfs = 0; + if (mlx4_is_slave(dev)) + persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT; + mutex_lock(&persist->interface_state_mutex); persist->interface_state |= MLX4_INTERFACE_STATE_DELETION; mutex_unlock(&persist->interface_state_mutex); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 7e66e4f62858..1beb1ec2fbdf 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -476,6 +476,7 @@ enum { enum { MLX4_INTERFACE_STATE_UP = 1 << 0, MLX4_INTERFACE_STATE_DELETION = 1 << 1, + MLX4_INTERFACE_STATE_NOWAIT = 1 << 2, }; #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ -- cgit v1.2.3 From bc9ab9231ec8c08352ea860480523d88a221a68f Mon Sep 17 00:00:00 2001 From: David Arcari Date: Mon, 13 Mar 2017 19:07:16 -0400 Subject: net: ethernet: aquantia: set net_device mtu when mtu is changed When the aquantia device mtu is changed the net_device structure is not updated. As a result the ip command does not properly reflect the mtu change. Commit 5513e16421cb incorrectly assumed that __dev_set_mtu() was making the assignment ndev->mtu = new_mtu; This is not true in the case where the driver has a ndo_change_mtu routine. Fixes: 5513e16421cb ("net: ethernet: aquantia: Fixes for aq_ndev_change_mtu") Cc: Pavel Belous Signed-off-by: David Arcari Tested-by: Pavel Belous Reviewed-by: Jarod Wilson Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c index dad63623be6a..d05fbfdce5e5 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c @@ -98,6 +98,7 @@ static int aq_ndev_change_mtu(struct net_device *ndev, int new_mtu) if (err < 0) goto err_exit; + ndev->mtu = new_mtu; if (netif_running(ndev)) { aq_ndev_close(ndev); -- cgit v1.2.3 From 61733c91c454a61be0ffc93fe46a5d5f2f048c1c Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 13 Mar 2017 16:49:10 -0700 Subject: net: mpls: Fix nexthop alive tracking on down events Alive tracking of nexthops can account for a link twice if the carrier goes down followed by an admin down of the same link rendering multipath routes useless. This is similar to 79099aab38c8 for UNREGISTER events and DOWN events. Fix by tracking number of alive nexthops in mpls_ifdown similar to the logic in mpls_ifup. Checking the flags per nexthop once after all events have been processed is simpler than trying to maintian a running count through all event combinations. Also, WRITE_ONCE is used instead of ACCESS_ONCE to set rt_nhn_alive per a comment from checkpatch: WARNING: Prefer WRITE_ONCE(, ) over ACCESS_ONCE() = Fixes: c89359a42e2a4 ("mpls: support for dead routes") Signed-off-by: David Ahern Acked-by: Robert Shearman Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 33211f9a2656..6414079aa729 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1269,6 +1269,8 @@ static void mpls_ifdown(struct net_device *dev, int event) { struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); + unsigned int nh_flags = RTNH_F_DEAD | RTNH_F_LINKDOWN; + unsigned int alive; unsigned index; platform_label = rtnl_dereference(net->mpls.platform_label); @@ -1278,9 +1280,11 @@ static void mpls_ifdown(struct net_device *dev, int event) if (!rt) continue; + alive = 0; change_nexthops(rt) { if (rtnl_dereference(nh->nh_dev) != dev) - continue; + goto next; + switch (event) { case NETDEV_DOWN: case NETDEV_UNREGISTER: @@ -1288,13 +1292,16 @@ static void mpls_ifdown(struct net_device *dev, int event) /* fall through */ case NETDEV_CHANGE: nh->nh_flags |= RTNH_F_LINKDOWN; - if (event != NETDEV_UNREGISTER) - ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1; break; } if (event == NETDEV_UNREGISTER) RCU_INIT_POINTER(nh->nh_dev, NULL); +next: + if (!(nh->nh_flags & nh_flags)) + alive++; } endfor_nexthops(rt); + + WRITE_ONCE(rt->rt_nhn_alive, alive); } } -- cgit v1.2.3 From 4ee39733fbecf04cf9f346de2d64788c35028079 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 15 Mar 2017 18:14:33 -0700 Subject: net: ipv6: set route type for anycast routes Anycast routes have the RTF_ANYCAST flag set, but when dumping routes for userspace the route type is not set to RTN_ANYCAST. Make it so. Fixes: 58c4fb86eabcb ("[IPV6]: Flag RTF_ANYCAST for anycast routes") CC: Hideaki YOSHIFUJI Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 35c58b669ebd..9db1418993f2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3423,6 +3423,8 @@ static int rt6_fill_node(struct net *net, } else if (rt->rt6i_flags & RTF_LOCAL) rtm->rtm_type = RTN_LOCAL; + else if (rt->rt6i_flags & RTF_ANYCAST) + rtm->rtm_type = RTN_ANYCAST; else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) rtm->rtm_type = RTN_LOCAL; else -- cgit v1.2.3 From 687e0687f71ec00e0132a21fef802dee88c2f1ad Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 14 Mar 2017 17:55:45 +0100 Subject: USB: usbtmc: add missing endpoint sanity check USBTMC devices are required to have a bulk-in and a bulk-out endpoint, but the driver failed to verify this, something which could lead to the endpoint addresses being taken from uninitialised memory. Make sure to zero all private data as part of allocation, and add the missing endpoint sanity check. Note that this also addresses a more recently introduced issue, where the interrupt-in-presence flag would also be uninitialised whenever the optional interrupt-in endpoint is not present. This in turn could lead to an interrupt urb being allocated, initialised and submitted based on uninitialised values. Fixes: dbf3e7f654c0 ("Implement an ioctl to support the USMTMC-USB488 READ_STATUS_BYTE operation.") Fixes: 5b775f672cc9 ("USB: add USB test and measurement class driver") Cc: stable # 2.6.28 Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usbtmc.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index f03692ec5520..5e3446db4513 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -1381,7 +1381,7 @@ static int usbtmc_probe(struct usb_interface *intf, dev_dbg(&intf->dev, "%s called\n", __func__); - data = kmalloc(sizeof(*data), GFP_KERNEL); + data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; @@ -1444,6 +1444,13 @@ static int usbtmc_probe(struct usb_interface *intf, break; } } + + if (!data->bulk_out || !data->bulk_in) { + dev_err(&intf->dev, "bulk endpoints not found\n"); + retcode = -ENODEV; + goto err_put; + } + /* Find int endpoint */ for (n = 0; n < iface_desc->desc.bNumEndpoints; n++) { endpoint = &iface_desc->endpoint[n].desc; @@ -1512,6 +1519,7 @@ error_register: sysfs_remove_group(&intf->dev.kobj, &capability_attr_grp); sysfs_remove_group(&intf->dev.kobj, &data_attr_grp); usbtmc_free_int(data); +err_put: kref_put(&data->kref, usbtmc_delete); return retcode; } -- cgit v1.2.3 From 2e47c53503eb9faff42b3cfa144a833344dd1f89 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 14 Mar 2017 17:55:46 +0100 Subject: USB: usbtmc: fix probe error path Make sure to initialise the return value to avoid having allocation failures going unnoticed when allocating interrupt-endpoint resources. This prevents use-after-free or worse when the device is later unbound. Fixes: dbf3e7f654c0 ("Implement an ioctl to support the USMTMC-USB488 READ_STATUS_BYTE operation.") Cc: stable # 4.6 Cc: Dave Penkler Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usbtmc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c index 5e3446db4513..8fb309a0ff6b 100644 --- a/drivers/usb/class/usbtmc.c +++ b/drivers/usb/class/usbtmc.c @@ -1476,8 +1476,10 @@ static int usbtmc_probe(struct usb_interface *intf, if (data->iin_ep_present) { /* allocate int urb */ data->iin_urb = usb_alloc_urb(0, GFP_KERNEL); - if (!data->iin_urb) + if (!data->iin_urb) { + retcode = -ENOMEM; goto error_register; + } /* Protect interrupt in endpoint data until iin_urb is freed */ kref_get(&data->kref); @@ -1485,8 +1487,10 @@ static int usbtmc_probe(struct usb_interface *intf, /* allocate buffer for interrupt in */ data->iin_buffer = kmalloc(data->iin_wMaxPacketSize, GFP_KERNEL); - if (!data->iin_buffer) + if (!data->iin_buffer) { + retcode = -ENOMEM; goto error_register; + } /* fill interrupt urb */ usb_fill_int_urb(data->iin_urb, data->usb_dev, -- cgit v1.2.3 From cdd7928df0d2efaa3270d711963773a08a4cc8ab Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 14 Mar 2017 12:09:56 +0100 Subject: ACM gadget: fix endianness in notifications MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The gadget code exports the bitfield for serial status changes over the wire in its internal endianness. The fix is to convert to little endian before sending it over the wire. Signed-off-by: Oliver Neukum Tested-by: 家瑋 CC: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_acm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_acm.c b/drivers/usb/gadget/function/f_acm.c index a30766ca4226..5e3828d9dac7 100644 --- a/drivers/usb/gadget/function/f_acm.c +++ b/drivers/usb/gadget/function/f_acm.c @@ -535,13 +535,15 @@ static int acm_notify_serial_state(struct f_acm *acm) { struct usb_composite_dev *cdev = acm->port.func.config->cdev; int status; + __le16 serial_state; spin_lock(&acm->lock); if (acm->notify_req) { dev_dbg(&cdev->gadget->dev, "acm ttyGS%d serial state %04x\n", acm->port_num, acm->serial_state); + serial_state = cpu_to_le16(acm->serial_state); status = acm_cdc_notify(acm, USB_CDC_NOTIFY_SERIAL_STATE, - 0, &acm->serial_state, sizeof(acm->serial_state)); + 0, &serial_state, sizeof(acm->serial_state)); } else { acm->pending = true; status = 0; -- cgit v1.2.3 From 9501df3cd9204f5859f649182431616a31ee88a1 Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Wed, 15 Mar 2017 23:38:07 -0400 Subject: ibmvnic: Free tx/rx scrq pointer array when releasing sub-crqs The pointer array for the tx/rx sub crqs should be free'ed when releasing the tx/rx sub crqs. Signed-off-by: Nathan Fontenot Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 5f11b4dc95d2..b23d6545f835 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1257,6 +1257,7 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter) release_sub_crq_queue(adapter, adapter->tx_scrq[i]); } + kfree(adapter->tx_scrq); adapter->tx_scrq = NULL; } @@ -1269,6 +1270,7 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter) release_sub_crq_queue(adapter, adapter->rx_scrq[i]); } + kfree(adapter->rx_scrq); adapter->rx_scrq = NULL; } } -- cgit v1.2.3 From 4d4a6ac73e7466c2085c307fac41f74ce4568a45 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:10 +0000 Subject: rxrpc: Ignore BUSY packets on old calls If we receive a BUSY packet for a call we think we've just completed, the packet is handed off to the connection processor to deal with - but the connection processor doesn't expect a BUSY packet and so flags a protocol error. Fix this by simply ignoring the BUSY packet for the moment. The symptom of this may appear as a system call failing with EPROTO. This may be triggered by pressing ctrl-C under some circumstances. This comes about we abort calls due to interruption by a signal (which we shouldn't do, but that's going to be a large fix and mostly in fs/afs/). What happens is that we abort the call and may also abort follow up calls too (this needs offloading somehoe). So we see a transmission of something like the following sequence of packets: DATA for call N ABORT call N DATA for call N+1 ABORT call N+1 in very quick succession on the same channel. However, the peer may have deferred the processing of the ABORT from the call N to a background thread and thus sees the DATA message from the call N+1 coming in before it has cleared the channel. Thus it sends a BUSY packet[*]. [*] Note that some implementations (OpenAFS, for example) mark the BUSY packet with one plus the callNumber of the call prior to call N. Ordinarily, this would be call N, but there's no requirement for the calls on a channel to be numbered strictly sequentially (the number is required to increase). This is wrong and means that the callNumber in the BUSY packet should be ignored (it really ought to be N+1 since that's what it's in response to). Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/conn_event.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 3f9d8d7ec632..b099b64366f3 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -275,6 +275,10 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, rxrpc_conn_retransmit_call(conn, skb); return 0; + case RXRPC_PACKET_TYPE_BUSY: + /* Just ignore BUSY packets for now. */ + return 0; + case RXRPC_PACKET_TYPE_ABORT: if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), &wtmp, sizeof(wtmp)) < 0) -- cgit v1.2.3 From d12c917691b45d9dffcfe7c2362d25caa40905fd Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 16 Mar 2017 10:32:42 -0700 Subject: bridge: resolve a false alarm of lockdep Andrei reported a false alarm of lockdep at net/bridge/br_fdb.c:109, this is because in Andrei's case, a spin_bug() was already triggered before this, therefore the debug_locks is turned off, lockdep_is_held() is no longer accurate after that. We should use lockdep_assert_held_once() instead of lockdep_is_held() to respect debug_locks. Fixes: 410b3d48f5111 ("bridge: fdb: add proper lock checks in searching functions") Reported-by: Andrei Vagin Signed-off-by: Cong Wang Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 2 +- net/bridge/br_private.h | 9 --------- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 4f598dc2d916..6e08b7199dd7 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -106,7 +106,7 @@ static struct net_bridge_fdb_entry *br_fdb_find(struct net_bridge *br, struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; struct net_bridge_fdb_entry *fdb; - WARN_ON_ONCE(!br_hash_lock_held(br)); + lockdep_assert_held_once(&br->hash_lock); rcu_read_lock(); fdb = fdb_find_rcu(head, addr, vid); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 2288fca7756c..61368186edea 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -531,15 +531,6 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid); -static inline bool br_hash_lock_held(struct net_bridge *br) -{ -#ifdef CONFIG_LOCKDEP - return lockdep_is_held(&br->hash_lock); -#else - return true; -#endif -} - /* br_forward.c */ enum br_pkt_type { BR_PKT_UNICAST, -- cgit v1.2.3 From e14b4db7a567ff507453ecd9c64da51bbc2b6d23 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 16 Mar 2017 12:21:32 -0700 Subject: netvsc: fix race during initialization When device is being setup on boot, there is a small race where network device callback is registered, but the netvsc_device pointer is not set yet. This can cause a NULL ptr dereference if packet arrives during this window. Fixes: 46b4f7f5d1f7 ("netvsc: eliminate per-device outstanding send counter") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 4c1d8cca247b..8dd0b8770328 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1231,8 +1231,11 @@ void netvsc_channel_cb(void *context) return; net_device = net_device_to_netvsc_device(ndev); - if (unlikely(net_device->destroy) && - netvsc_channel_idle(net_device, q_idx)) + if (unlikely(!net_device)) + return; + + if (unlikely(net_device->destroy && + netvsc_channel_idle(net_device, q_idx))) return; /* commit_rd_index() -> hv_signal_on_read() needs this. */ -- cgit v1.2.3 From 7b2db29fbb4e766fcd02207eb2e2087170bd6ebc Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 8 Mar 2017 10:19:36 -0800 Subject: usb: hub: Fix crash after failure to read BOS descriptor If usb_get_bos_descriptor() returns an error, usb->bos will be NULL. Nevertheless, it is dereferenced unconditionally in hub_set_initial_usb2_lpm_policy() if usb2_hw_lpm_capable is set. This results in a crash. usb 5-1: unable to get BOS descriptor ... Unable to handle kernel NULL pointer dereference at virtual address 00000008 pgd = ffffffc00165f000 [00000008] *pgd=000000000174f003, *pud=000000000174f003, *pmd=0000000001750003, *pte=00e8000001751713 Internal error: Oops: 96000005 [#1] PREEMPT SMP Modules linked in: uinput uvcvideo videobuf2_vmalloc cmac [ ... ] CPU: 5 PID: 3353 Comm: kworker/5:3 Tainted: G B 4.4.52 #480 Hardware name: Google Kevin (DT) Workqueue: events driver_set_config_work task: ffffffc0c3690000 ti: ffffffc0ae9a8000 task.ti: ffffffc0ae9a8000 PC is at hub_port_init+0xc3c/0xd10 LR is at hub_port_init+0xc3c/0xd10 ... Call trace: [] hub_port_init+0xc3c/0xd10 [] usb_reset_and_verify_device+0x15c/0x82c [] usb_reset_device+0xe4/0x298 [] rtl8152_probe+0x84/0x9b0 [r8152] [] usb_probe_interface+0x244/0x2f8 [] driver_probe_device+0x180/0x3b4 [] __device_attach_driver+0xb4/0xe0 [] bus_for_each_drv+0xb4/0xe4 [] __device_attach+0xd0/0x158 [] device_initial_probe+0x24/0x30 [] bus_probe_device+0x50/0xe4 [] device_add+0x414/0x738 [] usb_set_configuration+0x89c/0x914 [] driver_set_config_work+0xc0/0xf0 [] process_one_work+0x390/0x6b8 [] worker_thread+0x480/0x610 [] kthread+0x164/0x178 [] ret_from_fork+0x10/0x40 Since we don't know anything about LPM capabilities without BOS descriptor, don't attempt to enable LPM if it is not available. Fixes: 890dae886721 ("xhci: Enable LPM support only for hardwired ...") Cc: stable Cc: Mathias Nyman Signed-off-by: Guenter Roeck Acked-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index f0dd08198d74..5286bf67869a 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -4275,7 +4275,7 @@ static void hub_set_initial_usb2_lpm_policy(struct usb_device *udev) struct usb_hub *hub = usb_hub_to_struct_hub(udev->parent); int connect_type = USB_PORT_CONNECT_TYPE_UNKNOWN; - if (!udev->usb2_hw_lpm_capable) + if (!udev->usb2_hw_lpm_capable || !udev->bos) return; if (hub) -- cgit v1.2.3 From db7f00b8dba6d687b6ab1f2e9309acfd214fcb4b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Mar 2017 15:43:19 -0700 Subject: tcp: tcp_get_info() should read tcp_time_stamp later Commit b369e7fd41f7 ("tcp: make TCP_INFO more consistent") moved lock_sock_fast() earlier in tcp_get_info() This has the minor effect that jiffies value being sampled at the beginning of tcp_get_info() is more likely to be off by one, and we report big tcpi_last_data_sent values (like 0xFFFFFFFF). Since we lock the socket, fetching tcp_time_stamp right before doing the jiffies_to_msecs() calls is enough to remove these wrong values. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index cf4555581282..1e319a525d51 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2770,7 +2770,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) { const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */ const struct inet_connection_sock *icsk = inet_csk(sk); - u32 now = tcp_time_stamp, intv; + u32 now, intv; u64 rate64; bool slow; u32 rate; @@ -2839,6 +2839,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_retrans = tp->retrans_out; info->tcpi_fackets = tp->fackets_out; + now = tcp_time_stamp; info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime); info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime); info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp); -- cgit v1.2.3 From b767ad726c2aa6219318bf0da83fbe690e653d9a Mon Sep 17 00:00:00 2001 From: Aleksey Makarov Date: Wed, 1 Mar 2017 18:23:02 +0300 Subject: Revert "tty: serial: pl011: add ttyAMA for matching pl011 console" The original patch makes the condition always true, so it is wrong. It masks (but not fixes) the bug described in the commit message but introduces a regression (no console is selected by SPCR) in regular (no 'console=ttyAMA') case. s/||/&&/ would not fix the problem as the root cause was identified incorrectly. This reverts commit aea9a80ba98a0c9b4de88850260e9fbdcc98360b. Signed-off-by: Aleksey Makarov Signed-off-by: Greg Kroah-Hartman Acked-by: Sudeep Holla Tested-by: Jayachandran C Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 8789ea423ccf..56f92d7348bf 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -2373,7 +2373,7 @@ static int __init pl011_console_match(struct console *co, char *name, int idx, if (strcmp(name, "qdf2400_e44") == 0) { pr_info_once("UART: Working around QDF2400 SoC erratum 44"); qdf2400_e44_present = true; - } else if (strcmp(name, "pl011") != 0 || strcmp(name, "ttyAMA") != 0) { + } else if (strcmp(name, "pl011") != 0) { return -ENODEV; } -- cgit v1.2.3 From 5362544bebe85071188dd9e479b5a5040841c895 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Sat, 4 Mar 2017 14:55:19 +0100 Subject: tty: don't panic on OOM in tty_set_ldisc() If tty_ldisc_open() fails in tty_set_ldisc(), it tries to go back to the old discipline or N_TTY. But that can fail as well, in such case it panics. This is not a graceful way to handle OOM. Leave ldisc==NULL if all attempts fail instead. Also use existing tty_ldisc_reinit() helper function instead of tty_ldisc_restore(). Also don't WARN/BUG in tty_ldisc_reinit() if N_TTY fails, which would have the same net effect of bringing kernel down on OOM. Instead print a single line message about what has happened. Signed-off-by: Dmitry Vyukov Cc: syzkaller@googlegroups.com Cc: linux-kernel@vger.kernel.org Cc: Greg Kroah-Hartman Cc: Jiri Slaby Cc: Peter Hurley Cc: One Thousand Gnomes Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_ldisc.c | 85 ++++++++++--------------------------------------- 1 file changed, 16 insertions(+), 69 deletions(-) diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index 68947f6de5ad..c3956ca022e4 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -488,41 +488,6 @@ static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld) tty_ldisc_debug(tty, "%p: closed\n", ld); } -/** - * tty_ldisc_restore - helper for tty ldisc change - * @tty: tty to recover - * @old: previous ldisc - * - * Restore the previous line discipline or N_TTY when a line discipline - * change fails due to an open error - */ - -static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old) -{ - struct tty_ldisc *new_ldisc; - int r; - - /* There is an outstanding reference here so this is safe */ - old = tty_ldisc_get(tty, old->ops->num); - WARN_ON(IS_ERR(old)); - tty->ldisc = old; - tty_set_termios_ldisc(tty, old->ops->num); - if (tty_ldisc_open(tty, old) < 0) { - tty_ldisc_put(old); - /* This driver is always present */ - new_ldisc = tty_ldisc_get(tty, N_TTY); - if (IS_ERR(new_ldisc)) - panic("n_tty: get"); - tty->ldisc = new_ldisc; - tty_set_termios_ldisc(tty, N_TTY); - r = tty_ldisc_open(tty, new_ldisc); - if (r < 0) - panic("Couldn't open N_TTY ldisc for " - "%s --- error %d.", - tty_name(tty), r); - } -} - /** * tty_set_ldisc - set line discipline * @tty: the terminal to set @@ -536,12 +501,7 @@ static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old) int tty_set_ldisc(struct tty_struct *tty, int disc) { - int retval; - struct tty_ldisc *old_ldisc, *new_ldisc; - - new_ldisc = tty_ldisc_get(tty, disc); - if (IS_ERR(new_ldisc)) - return PTR_ERR(new_ldisc); + int retval, old_disc; tty_lock(tty); retval = tty_ldisc_lock(tty, 5 * HZ); @@ -554,7 +514,8 @@ int tty_set_ldisc(struct tty_struct *tty, int disc) } /* Check the no-op case */ - if (tty->ldisc->ops->num == disc) + old_disc = tty->ldisc->ops->num; + if (old_disc == disc) goto out; if (test_bit(TTY_HUPPED, &tty->flags)) { @@ -563,34 +524,25 @@ int tty_set_ldisc(struct tty_struct *tty, int disc) goto out; } - old_ldisc = tty->ldisc; - - /* Shutdown the old discipline. */ - tty_ldisc_close(tty, old_ldisc); - - /* Now set up the new line discipline. */ - tty->ldisc = new_ldisc; - tty_set_termios_ldisc(tty, disc); - - retval = tty_ldisc_open(tty, new_ldisc); + retval = tty_ldisc_reinit(tty, disc); if (retval < 0) { /* Back to the old one or N_TTY if we can't */ - tty_ldisc_put(new_ldisc); - tty_ldisc_restore(tty, old_ldisc); + if (tty_ldisc_reinit(tty, old_disc) < 0) { + pr_err("tty: TIOCSETD failed, reinitializing N_TTY\n"); + if (tty_ldisc_reinit(tty, N_TTY) < 0) { + /* At this point we have tty->ldisc == NULL. */ + pr_err("tty: reinitializing N_TTY failed\n"); + } + } } - if (tty->ldisc->ops->num != old_ldisc->ops->num && tty->ops->set_ldisc) { + if (tty->ldisc && tty->ldisc->ops->num != old_disc && + tty->ops->set_ldisc) { down_read(&tty->termios_rwsem); tty->ops->set_ldisc(tty); up_read(&tty->termios_rwsem); } - /* At this point we hold a reference to the new ldisc and a - reference to the old ldisc, or we hold two references to - the old ldisc (if it was restored as part of error cleanup - above). In either case, releasing a single reference from - the old ldisc is correct. */ - new_ldisc = old_ldisc; out: tty_ldisc_unlock(tty); @@ -598,7 +550,6 @@ out: already running */ tty_buffer_restart_work(tty->port); err: - tty_ldisc_put(new_ldisc); /* drop the extra reference */ tty_unlock(tty); return retval; } @@ -659,10 +610,8 @@ int tty_ldisc_reinit(struct tty_struct *tty, int disc) int retval; ld = tty_ldisc_get(tty, disc); - if (IS_ERR(ld)) { - BUG_ON(disc == N_TTY); + if (IS_ERR(ld)) return PTR_ERR(ld); - } if (tty->ldisc) { tty_ldisc_close(tty, tty->ldisc); @@ -674,10 +623,8 @@ int tty_ldisc_reinit(struct tty_struct *tty, int disc) tty_set_termios_ldisc(tty, disc); retval = tty_ldisc_open(tty, tty->ldisc); if (retval) { - if (!WARN_ON(disc == N_TTY)) { - tty_ldisc_put(tty->ldisc); - tty->ldisc = NULL; - } + tty_ldisc_put(tty->ldisc); + tty->ldisc = NULL; } return retval; } -- cgit v1.2.3 From a4a3e061149f09c075f108b6f1cf04d9739a6bc2 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Sat, 4 Mar 2017 13:46:12 +0100 Subject: tty: fix data race in tty_ldisc_ref_wait() tty_ldisc_ref_wait() checks tty->ldisc under tty->ldisc_sem. But if ldisc==NULL it releases them sem and reloads tty->ldisc without holding the sem. This is wrong and can lead to returning non-NULL ldisc without protection. Don't reload tty->ldisc second time. Signed-off-by: Dmitry Vyukov Cc: syzkaller@googlegroups.com Cc: linux-kernel@vger.kernel.org Cc: Greg Kroah-Hartman Cc: Jiri Slaby Cc: Peter Hurley Cc: One Thousand Gnomes Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_ldisc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index c3956ca022e4..b0500a0a87b8 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -271,10 +271,13 @@ const struct file_operations tty_ldiscs_proc_fops = { struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty) { + struct tty_ldisc *ld; + ldsem_down_read(&tty->ldisc_sem, MAX_SCHEDULE_TIMEOUT); - if (!tty->ldisc) + ld = tty->ldisc; + if (!ld) ldsem_up_read(&tty->ldisc_sem); - return tty->ldisc; + return ld; } EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait); -- cgit v1.2.3 From 8971e1c79d3f6c9a5e6f7a65c50c41f434a4dae6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 17 Mar 2017 16:02:35 +1100 Subject: powerpc/pseries: Don't give a warning when HPT resizing isn't available As of commit 438cc81a41e8 ("powerpc/pseries: Automatically resize HPT for memory hot add/remove"), when running on the pseries platform, we always attempt to use the PAPR extension to resize the hashed page table (HPT) when we add or remove memory. This is fine, but when the extension is not available we'll give a harmless, but scary warning. Instead check if the firmware supports HPT resizing before populating the mmu_hash_ops.resize_hpt pointer. Signed-off-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/lpar.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 251060cf1713..8b1fe895daa3 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -751,7 +751,9 @@ void __init hpte_init_pseries(void) mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range; mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all; mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; - mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt; + + if (firmware_has_feature(FW_FEATURE_HPT_RESIZE)) + mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt; } void radix_init_pseries(void) -- cgit v1.2.3 From 5dc855d44c2ad960a86f593c60461f1ae1566b6d Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 16 Mar 2017 12:59:39 -0700 Subject: x86/perf: Fix CR4.PCE propagation to use active_mm instead of mm If one thread mmaps a perf event while another thread in the same mm is in some context where active_mm != mm (which can happen in the scheduler, for example), refresh_pce() would write the wrong value to CR4.PCE. This broke some PAPI tests. Reported-and-tested-by: Vince Weaver Signed-off-by: Andy Lutomirski Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: 7911d3f7af14 ("perf/x86: Only allow rdpmc if a perf_event is mapped") Link: http://lkml.kernel.org/r/0c5b38a76ea50e405f9abe07a13dfaef87c173a1.1489694270.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 1635c0c8df23..e07b36c5588a 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2100,8 +2100,8 @@ static int x86_pmu_event_init(struct perf_event *event) static void refresh_pce(void *ignored) { - if (current->mm) - load_mm_cr4(current->mm); + if (current->active_mm) + load_mm_cr4(current->active_mm); } static void x86_pmu_event_mapped(struct perf_event *event) -- cgit v1.2.3 From 4b07372a32c0c1505a7634ad7e607d83340ef645 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 16 Mar 2017 12:59:40 -0700 Subject: x86/perf: Clarify why x86_pmu_event_mapped() isn't racy Naively, it looks racy, but ->mmap_sem saves it. Add a comment and a lockdep assertion. Signed-off-by: Andy Lutomirski Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/03a1e629063899168dfc4707f3bb6e581e21f5c6.1489694270.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index e07b36c5588a..183a972f9210 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2109,6 +2109,18 @@ static void x86_pmu_event_mapped(struct perf_event *event) if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) return; + /* + * This function relies on not being called concurrently in two + * tasks in the same mm. Otherwise one task could observe + * perf_rdpmc_allowed > 1 and return all the way back to + * userspace with CR4.PCE clear while another task is still + * doing on_each_cpu_mask() to propagate CR4.PCE. + * + * For now, this can't happen because all callers hold mmap_sem + * for write. If this changes, we'll need a different solution. + */ + lockdep_assert_held_exclusive(¤t->mm->mmap_sem); + if (atomic_inc_return(¤t->mm->context.perf_rdpmc_allowed) == 1) on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1); } -- cgit v1.2.3 From 2cd29f2387be70de9feb4c9f8dbc7c0bd55748ce Mon Sep 17 00:00:00 2001 From: Robert Middleton Date: Wed, 15 Mar 2017 16:56:47 -0400 Subject: gpio:mcp23s08 Fixed missing interrupts When an interrupt occurs on an MCP23S08 chip, the INTF register will only contain one bit as causing the interrupt. If more than two pins change at the same time on the chip, this causes one of the pins to not be reported. This patch fixes the logic for checking if a pin has changed, so that multiple pins will always cause more than one change. Cc: stable@vger.kernel.org Signed-off-by: Robert Middleton Tested-by: Phil Reid Signed-off-by: Linus Walleij --- drivers/gpio/gpio-mcp23s08.c | 65 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 60 insertions(+), 5 deletions(-) diff --git a/drivers/gpio/gpio-mcp23s08.c b/drivers/gpio/gpio-mcp23s08.c index bdb692345428..2a57d024481d 100644 --- a/drivers/gpio/gpio-mcp23s08.c +++ b/drivers/gpio/gpio-mcp23s08.c @@ -270,8 +270,10 @@ mcp23s08_direction_output(struct gpio_chip *chip, unsigned offset, int value) static irqreturn_t mcp23s08_irq(int irq, void *data) { struct mcp23s08 *mcp = data; - int intcap, intf, i; + int intcap, intf, i, gpio, gpio_orig, intcap_mask; unsigned int child_irq; + bool intf_set, intcap_changed, gpio_bit_changed, + defval_changed, gpio_set; mutex_lock(&mcp->lock); if (mcp_read(mcp, MCP_INTF, &intf) < 0) { @@ -287,14 +289,67 @@ static irqreturn_t mcp23s08_irq(int irq, void *data) } mcp->cache[MCP_INTCAP] = intcap; + + /* This clears the interrupt(configurable on S18) */ + if (mcp_read(mcp, MCP_GPIO, &gpio) < 0) { + mutex_unlock(&mcp->lock); + return IRQ_HANDLED; + } + gpio_orig = mcp->cache[MCP_GPIO]; + mcp->cache[MCP_GPIO] = gpio; mutex_unlock(&mcp->lock); + if (mcp->cache[MCP_INTF] == 0) { + /* There is no interrupt pending */ + return IRQ_HANDLED; + } + + dev_dbg(mcp->chip.parent, + "intcap 0x%04X intf 0x%04X gpio_orig 0x%04X gpio 0x%04X\n", + intcap, intf, gpio_orig, gpio); for (i = 0; i < mcp->chip.ngpio; i++) { - if ((BIT(i) & mcp->cache[MCP_INTF]) && - ((BIT(i) & intcap & mcp->irq_rise) || - (mcp->irq_fall & ~intcap & BIT(i)) || - (BIT(i) & mcp->cache[MCP_INTCON]))) { + /* We must check all of the inputs on the chip, + * otherwise we may not notice a change on >=2 pins. + * + * On at least the mcp23s17, INTCAP is only updated + * one byte at a time(INTCAPA and INTCAPB are + * not written to at the same time - only on a per-bank + * basis). + * + * INTF only contains the single bit that caused the + * interrupt per-bank. On the mcp23s17, there is + * INTFA and INTFB. If two pins are changed on the A + * side at the same time, INTF will only have one bit + * set. If one pin on the A side and one pin on the B + * side are changed at the same time, INTF will have + * two bits set. Thus, INTF can't be the only check + * to see if the input has changed. + */ + + intf_set = BIT(i) & mcp->cache[MCP_INTF]; + if (i < 8 && intf_set) + intcap_mask = 0x00FF; + else if (i >= 8 && intf_set) + intcap_mask = 0xFF00; + else + intcap_mask = 0x00; + + intcap_changed = (intcap_mask & + (BIT(i) & mcp->cache[MCP_INTCAP])) != + (intcap_mask & (BIT(i) & gpio_orig)); + gpio_set = BIT(i) & mcp->cache[MCP_GPIO]; + gpio_bit_changed = (BIT(i) & gpio_orig) != + (BIT(i) & mcp->cache[MCP_GPIO]); + defval_changed = (BIT(i) & mcp->cache[MCP_INTCON]) && + ((BIT(i) & mcp->cache[MCP_GPIO]) != + (BIT(i) & mcp->cache[MCP_DEFVAL])); + + if (((gpio_bit_changed || intcap_changed) && + (BIT(i) & mcp->irq_rise) && gpio_set) || + ((gpio_bit_changed || intcap_changed) && + (BIT(i) & mcp->irq_fall) && !gpio_set) || + defval_changed) { child_irq = irq_find_mapping(mcp->chip.irqdomain, i); handle_nested_irq(child_irq); } -- cgit v1.2.3 From 4938ca90166d6d3061793789e2eef42cd934fa97 Mon Sep 17 00:00:00 2001 From: Zhao Yan Date: Thu, 9 Mar 2017 10:09:44 +0800 Subject: drm/i915/gvt: handle force-nonpriv registers, cmd parser part this patch adds force non-priv registers check in LRI cmds handler v4: transform is_force_nonpriv_mmio() from macro to inline fuction to eliminate checkpatch warning v3: per zhenyu's comment, fix some style warnings v2: per zhenyu's comment, refine the code to remove cascaded ifs Signed-off-by: Zhao Yan Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 23 +++++++++++++++++++++++ drivers/gpu/drm/i915/gvt/handlers.c | 17 +++++++++++++++++ drivers/gpu/drm/i915/gvt/mmio.h | 3 +++ 3 files changed, 43 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 7ae6e2b241c8..919c83abaeb1 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -817,6 +817,25 @@ static bool is_shadowed_mmio(unsigned int offset) return ret; } +static inline bool is_force_nonpriv_mmio(unsigned int offset) +{ + return (offset >= 0x24d0 && offset < 0x2500); +} + +static int force_nonpriv_reg_handler(struct parser_exec_state *s, + unsigned int offset, unsigned int index) +{ + struct intel_gvt *gvt = s->vgpu->gvt; + unsigned int data = cmd_val(s, index + 1); + + if (!intel_gvt_in_force_nonpriv_whitelist(gvt, data)) { + gvt_err("Unexpected forcenonpriv 0x%x LRI write, value=0x%x\n", + offset, data); + return -EINVAL; + } + return 0; +} + static int cmd_reg_handler(struct parser_exec_state *s, unsigned int offset, unsigned int index, char *cmd) { @@ -841,6 +860,10 @@ static int cmd_reg_handler(struct parser_exec_state *s, return 0; } + if (is_force_nonpriv_mmio(offset) && + force_nonpriv_reg_handler(s, offset, index)) + return -EINVAL; + if (offset == i915_mmio_reg_offset(DERRMR) || offset == i915_mmio_reg_offset(FORCEWAKE_MT)) { /* Writing to HW VGT_PVINFO_PAGE offset will be discarded */ diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 8e43395c748a..de975f40aebf 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -2988,3 +2988,20 @@ int intel_vgpu_default_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, write_vreg(vgpu, offset, p_data, bytes); return 0; } + +/** + * intel_gvt_in_force_nonpriv_whitelist - if a mmio is in whitelist to be + * force-nopriv register + * + * @gvt: a GVT device + * @offset: register offset + * + * Returns: + * True if the register is in force-nonpriv whitelist; + * False if outside; + */ +bool intel_gvt_in_force_nonpriv_whitelist(struct intel_gvt *gvt, + unsigned int offset) +{ + return in_whitelist(offset); +} diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h index 3bc620f56f35..a3a027025cd0 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.h +++ b/drivers/gpu/drm/i915/gvt/mmio.h @@ -107,4 +107,7 @@ int intel_vgpu_default_mmio_read(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes); int intel_vgpu_default_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes); + +bool intel_gvt_in_force_nonpriv_whitelist(struct intel_gvt *gvt, + unsigned int offset); #endif -- cgit v1.2.3 From 695fbc08d80f93ecca18a1abd8f52c2ab77fdc8d Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Fri, 10 Mar 2017 04:26:53 -0500 Subject: drm/i915/gvt: replace the gvt_err with gvt_vgpu_err gvt_err should be used only for the very few critical error message during host i915 drvier initialization. This patch 1. removes the redundant gvt_err; 2. creates a new gvt_vgpu_err to show errors caused by vgpu; 3. replaces the most gvt_err with gvt_vgpu_err; 4. leaves very few gvt_err for dumping gvt error during host gvt initialization. v2. change name to gvt_vgpu_err and add vgpu id to the message. (Kevin) add gpu id to gvt_vgpu_err. (Zhi) v3. remove gpu id from gvt_vgpu_err caller. (Zhi) v4. add vgpu check to the gvt_vgpu_err macro. (Zhiyuan) v5. add comments for v3 and v4. v6. split the big patch into two, with this patch only for checking gvt_vgpu_err. (Zhenyu) v7. rebase to staging branch v8. rebase to fix branch Signed-off-by: Tina Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/aperture_gm.c | 8 ++-- drivers/gpu/drm/i915/gvt/cmd_parser.c | 84 +++++++++++++++++++--------------- drivers/gpu/drm/i915/gvt/debug.h | 8 ++++ drivers/gpu/drm/i915/gvt/edid.c | 13 +++--- drivers/gpu/drm/i915/gvt/execlist.c | 29 ++++++------ drivers/gpu/drm/i915/gvt/gtt.c | 74 +++++++++++++++--------------- drivers/gpu/drm/i915/gvt/handlers.c | 28 ++++++------ drivers/gpu/drm/i915/gvt/kvmgt.c | 33 +++++++------ drivers/gpu/drm/i915/gvt/mmio.c | 38 +++++++-------- drivers/gpu/drm/i915/gvt/opregion.c | 10 ++-- drivers/gpu/drm/i915/gvt/render.c | 2 +- drivers/gpu/drm/i915/gvt/scheduler.c | 11 +++-- 12 files changed, 180 insertions(+), 158 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c index 3b6caaca9751..325618d969fe 100644 --- a/drivers/gpu/drm/i915/gvt/aperture_gm.c +++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c @@ -242,7 +242,7 @@ static int alloc_resource(struct intel_vgpu *vgpu, const char *item; if (!param->low_gm_sz || !param->high_gm_sz || !param->fence_sz) { - gvt_err("Invalid vGPU creation params\n"); + gvt_vgpu_err("Invalid vGPU creation params\n"); return -EINVAL; } @@ -285,9 +285,9 @@ static int alloc_resource(struct intel_vgpu *vgpu, return 0; no_enough_resource: - gvt_err("vgpu%d: fail to allocate resource %s\n", vgpu->id, item); - gvt_err("vgpu%d: request %luMB avail %luMB max %luMB taken %luMB\n", - vgpu->id, BYTES_TO_MB(request), BYTES_TO_MB(avail), + gvt_vgpu_err("fail to allocate resource %s\n", item); + gvt_vgpu_err("request %luMB avail %luMB max %luMB taken %luMB\n", + BYTES_TO_MB(request), BYTES_TO_MB(avail), BYTES_TO_MB(max), BYTES_TO_MB(taken)); return -ENOSPC; } diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 919c83abaeb1..2ca0506d8d8c 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -843,20 +843,19 @@ static int cmd_reg_handler(struct parser_exec_state *s, struct intel_gvt *gvt = vgpu->gvt; if (offset + 4 > gvt->device_info.mmio_size) { - gvt_err("%s access to (%x) outside of MMIO range\n", + gvt_vgpu_err("%s access to (%x) outside of MMIO range\n", cmd, offset); return -EINVAL; } if (!intel_gvt_mmio_is_cmd_access(gvt, offset)) { - gvt_err("vgpu%d: %s access to non-render register (%x)\n", - s->vgpu->id, cmd, offset); + gvt_vgpu_err("%s access to non-render register (%x)\n", + cmd, offset); return 0; } if (is_shadowed_mmio(offset)) { - gvt_err("vgpu%d: found access of shadowed MMIO %x\n", - s->vgpu->id, offset); + gvt_vgpu_err("found access of shadowed MMIO %x\n", offset); return 0; } @@ -1152,6 +1151,7 @@ static int skl_decode_mi_display_flip(struct parser_exec_state *s, struct mi_display_flip_command_info *info) { struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv; + struct intel_vgpu *vgpu = s->vgpu; u32 dword0 = cmd_val(s, 0); u32 dword1 = cmd_val(s, 1); u32 dword2 = cmd_val(s, 2); @@ -1190,7 +1190,7 @@ static int skl_decode_mi_display_flip(struct parser_exec_state *s, break; default: - gvt_err("unknown plane code %d\n", plane); + gvt_vgpu_err("unknown plane code %d\n", plane); return -EINVAL; } @@ -1297,25 +1297,26 @@ static int update_plane_mmio_from_mi_display_flip( static int cmd_handler_mi_display_flip(struct parser_exec_state *s) { struct mi_display_flip_command_info info; + struct intel_vgpu *vgpu = s->vgpu; int ret; int i; int len = cmd_length(s); ret = decode_mi_display_flip(s, &info); if (ret) { - gvt_err("fail to decode MI display flip command\n"); + gvt_vgpu_err("fail to decode MI display flip command\n"); return ret; } ret = check_mi_display_flip(s, &info); if (ret) { - gvt_err("invalid MI display flip command\n"); + gvt_vgpu_err("invalid MI display flip command\n"); return ret; } ret = update_plane_mmio_from_mi_display_flip(s, &info); if (ret) { - gvt_err("fail to update plane mmio\n"); + gvt_vgpu_err("fail to update plane mmio\n"); return ret; } @@ -1373,7 +1374,8 @@ static inline int cmd_address_audit(struct parser_exec_state *s, int ret; if (op_size > max_surface_size) { - gvt_err("command address audit fail name %s\n", s->info->name); + gvt_vgpu_err("command address audit fail name %s\n", + s->info->name); return -EINVAL; } @@ -1390,7 +1392,7 @@ static inline int cmd_address_audit(struct parser_exec_state *s, } return 0; err: - gvt_err("cmd_parser: Malicious %s detected, addr=0x%lx, len=%d!\n", + gvt_vgpu_err("cmd_parser: Malicious %s detected, addr=0x%lx, len=%d!\n", s->info->name, guest_gma, op_size); pr_err("cmd dump: "); @@ -1435,8 +1437,10 @@ static int cmd_handler_mi_store_data_imm(struct parser_exec_state *s) static inline int unexpected_cmd(struct parser_exec_state *s) { - gvt_err("vgpu%d: Unexpected %s in command buffer!\n", - s->vgpu->id, s->info->name); + struct intel_vgpu *vgpu = s->vgpu; + + gvt_vgpu_err("Unexpected %s in command buffer!\n", s->info->name); + return -EINVAL; } @@ -1539,7 +1543,7 @@ static int copy_gma_to_hva(struct intel_vgpu *vgpu, struct intel_vgpu_mm *mm, while (gma != end_gma) { gpa = intel_vgpu_gma_to_gpa(mm, gma); if (gpa == INTEL_GVT_INVALID_ADDR) { - gvt_err("invalid gma address: %lx\n", gma); + gvt_vgpu_err("invalid gma address: %lx\n", gma); return -EFAULT; } @@ -1580,6 +1584,7 @@ static uint32_t find_bb_size(struct parser_exec_state *s) uint32_t bb_size = 0; uint32_t cmd_len = 0; bool met_bb_end = false; + struct intel_vgpu *vgpu = s->vgpu; u32 cmd; /* get the start gm address of the batch buffer */ @@ -1588,7 +1593,7 @@ static uint32_t find_bb_size(struct parser_exec_state *s) info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id); if (info == NULL) { - gvt_err("unknown cmd 0x%x, opcode=0x%x\n", + gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", cmd, get_opcode(cmd, s->ring_id)); return -EINVAL; } @@ -1597,7 +1602,7 @@ static uint32_t find_bb_size(struct parser_exec_state *s) gma, gma + 4, &cmd); info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id); if (info == NULL) { - gvt_err("unknown cmd 0x%x, opcode=0x%x\n", + gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", cmd, get_opcode(cmd, s->ring_id)); return -EINVAL; } @@ -1622,6 +1627,7 @@ static uint32_t find_bb_size(struct parser_exec_state *s) static int perform_bb_shadow(struct parser_exec_state *s) { struct intel_shadow_bb_entry *entry_obj; + struct intel_vgpu *vgpu = s->vgpu; unsigned long gma = 0; uint32_t bb_size; void *dst = NULL; @@ -1656,7 +1662,7 @@ static int perform_bb_shadow(struct parser_exec_state *s) ret = i915_gem_object_set_to_cpu_domain(entry_obj->obj, false); if (ret) { - gvt_err("failed to set shadow batch to CPU\n"); + gvt_vgpu_err("failed to set shadow batch to CPU\n"); goto unmap_src; } @@ -1668,7 +1674,7 @@ static int perform_bb_shadow(struct parser_exec_state *s) gma, gma + bb_size, dst); if (ret) { - gvt_err("fail to copy guest ring buffer\n"); + gvt_vgpu_err("fail to copy guest ring buffer\n"); goto unmap_src; } @@ -1699,15 +1705,16 @@ static int cmd_handler_mi_batch_buffer_start(struct parser_exec_state *s) { bool second_level; int ret = 0; + struct intel_vgpu *vgpu = s->vgpu; if (s->buf_type == BATCH_BUFFER_2ND_LEVEL) { - gvt_err("Found MI_BATCH_BUFFER_START in 2nd level BB\n"); + gvt_vgpu_err("Found MI_BATCH_BUFFER_START in 2nd level BB\n"); return -EINVAL; } second_level = BATCH_BUFFER_2ND_LEVEL_BIT(cmd_val(s, 0)) == 1; if (second_level && (s->buf_type != BATCH_BUFFER_INSTRUCTION)) { - gvt_err("Jumping to 2nd level BB from RB is not allowed\n"); + gvt_vgpu_err("Jumping to 2nd level BB from RB is not allowed\n"); return -EINVAL; } @@ -1725,7 +1732,7 @@ static int cmd_handler_mi_batch_buffer_start(struct parser_exec_state *s) if (batch_buffer_needs_scan(s)) { ret = perform_bb_shadow(s); if (ret < 0) - gvt_err("invalid shadow batch buffer\n"); + gvt_vgpu_err("invalid shadow batch buffer\n"); } else { /* emulate a batch buffer end to do return right */ ret = cmd_handler_mi_batch_buffer_end(s); @@ -2452,6 +2459,7 @@ static int cmd_parser_exec(struct parser_exec_state *s) int ret = 0; cycles_t t0, t1, t2; struct parser_exec_state s_before_advance_custom; + struct intel_vgpu *vgpu = s->vgpu; t0 = get_cycles(); @@ -2459,7 +2467,7 @@ static int cmd_parser_exec(struct parser_exec_state *s) info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id); if (info == NULL) { - gvt_err("unknown cmd 0x%x, opcode=0x%x\n", + gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", cmd, get_opcode(cmd, s->ring_id)); return -EINVAL; } @@ -2475,7 +2483,7 @@ static int cmd_parser_exec(struct parser_exec_state *s) if (info->handler) { ret = info->handler(s); if (ret < 0) { - gvt_err("%s handler error\n", info->name); + gvt_vgpu_err("%s handler error\n", info->name); return ret; } } @@ -2486,7 +2494,7 @@ static int cmd_parser_exec(struct parser_exec_state *s) if (!(info->flag & F_IP_ADVANCE_CUSTOM)) { ret = cmd_advance_default(s); if (ret) { - gvt_err("%s IP advance error\n", info->name); + gvt_vgpu_err("%s IP advance error\n", info->name); return ret; } } @@ -2509,6 +2517,7 @@ static int command_scan(struct parser_exec_state *s, unsigned long gma_head, gma_tail, gma_bottom; int ret = 0; + struct intel_vgpu *vgpu = s->vgpu; gma_head = rb_start + rb_head; gma_tail = rb_start + rb_tail; @@ -2520,7 +2529,7 @@ static int command_scan(struct parser_exec_state *s, if (s->buf_type == RING_BUFFER_INSTRUCTION) { if (!(s->ip_gma >= rb_start) || !(s->ip_gma < gma_bottom)) { - gvt_err("ip_gma %lx out of ring scope." + gvt_vgpu_err("ip_gma %lx out of ring scope." "(base:0x%lx, bottom: 0x%lx)\n", s->ip_gma, rb_start, gma_bottom); @@ -2528,7 +2537,7 @@ static int command_scan(struct parser_exec_state *s, return -EINVAL; } if (gma_out_of_range(s->ip_gma, gma_head, gma_tail)) { - gvt_err("ip_gma %lx out of range." + gvt_vgpu_err("ip_gma %lx out of range." "base 0x%lx head 0x%lx tail 0x%lx\n", s->ip_gma, rb_start, rb_head, rb_tail); @@ -2538,7 +2547,7 @@ static int command_scan(struct parser_exec_state *s, } ret = cmd_parser_exec(s); if (ret) { - gvt_err("cmd parser error\n"); + gvt_vgpu_err("cmd parser error\n"); parser_exec_state_dump(s); break; } @@ -2662,7 +2671,7 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) gma_head, gma_top, workload->shadow_ring_buffer_va); if (ret) { - gvt_err("fail to copy guest ring buffer\n"); + gvt_vgpu_err("fail to copy guest ring buffer\n"); return ret; } copy_len = gma_top - gma_head; @@ -2674,7 +2683,7 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) gma_head, gma_tail, workload->shadow_ring_buffer_va + copy_len); if (ret) { - gvt_err("fail to copy guest ring buffer\n"); + gvt_vgpu_err("fail to copy guest ring buffer\n"); return ret; } ring->tail += workload->rb_len; @@ -2685,16 +2694,17 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) { int ret; + struct intel_vgpu *vgpu = workload->vgpu; ret = shadow_workload_ring_buffer(workload); if (ret) { - gvt_err("fail to shadow workload ring_buffer\n"); + gvt_vgpu_err("fail to shadow workload ring_buffer\n"); return ret; } ret = scan_workload(workload); if (ret) { - gvt_err("scan workload error\n"); + gvt_vgpu_err("scan workload error\n"); return ret; } return 0; @@ -2704,6 +2714,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx) { int ctx_size = wa_ctx->indirect_ctx.size; unsigned long guest_gma = wa_ctx->indirect_ctx.guest_gma; + struct intel_vgpu *vgpu = wa_ctx->workload->vgpu; struct drm_i915_gem_object *obj; int ret = 0; void *map; @@ -2717,14 +2728,14 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx) /* get the va of the shadow batch buffer */ map = i915_gem_object_pin_map(obj, I915_MAP_WB); if (IS_ERR(map)) { - gvt_err("failed to vmap shadow indirect ctx\n"); + gvt_vgpu_err("failed to vmap shadow indirect ctx\n"); ret = PTR_ERR(map); goto put_obj; } ret = i915_gem_object_set_to_cpu_domain(obj, false); if (ret) { - gvt_err("failed to set shadow indirect ctx to CPU\n"); + gvt_vgpu_err("failed to set shadow indirect ctx to CPU\n"); goto unmap_src; } @@ -2733,7 +2744,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx) guest_gma, guest_gma + ctx_size, map); if (ret) { - gvt_err("fail to copy guest indirect ctx\n"); + gvt_vgpu_err("fail to copy guest indirect ctx\n"); goto unmap_src; } @@ -2767,13 +2778,14 @@ static int combine_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) { int ret; + struct intel_vgpu *vgpu = wa_ctx->workload->vgpu; if (wa_ctx->indirect_ctx.size == 0) return 0; ret = shadow_indirect_ctx(wa_ctx); if (ret) { - gvt_err("fail to shadow indirect ctx\n"); + gvt_vgpu_err("fail to shadow indirect ctx\n"); return ret; } @@ -2781,7 +2793,7 @@ int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) ret = scan_wa_ctx(wa_ctx); if (ret) { - gvt_err("scan wa ctx error\n"); + gvt_vgpu_err("scan wa ctx error\n"); return ret; } diff --git a/drivers/gpu/drm/i915/gvt/debug.h b/drivers/gpu/drm/i915/gvt/debug.h index 68cba7bd980a..b0cff4dc2684 100644 --- a/drivers/gpu/drm/i915/gvt/debug.h +++ b/drivers/gpu/drm/i915/gvt/debug.h @@ -27,6 +27,14 @@ #define gvt_err(fmt, args...) \ DRM_ERROR("gvt: "fmt, ##args) +#define gvt_vgpu_err(fmt, args...) \ +do { \ + if (IS_ERR_OR_NULL(vgpu)) \ + DRM_DEBUG_DRIVER("gvt: "fmt, ##args); \ + else \ + DRM_DEBUG_DRIVER("gvt: vgpu %d: "fmt, vgpu->id, ##args);\ +} while (0) + #define gvt_dbg_core(fmt, args...) \ DRM_DEBUG_DRIVER("gvt: core: "fmt, ##args) diff --git a/drivers/gpu/drm/i915/gvt/edid.c b/drivers/gpu/drm/i915/gvt/edid.c index bda85dff7b2a..f1648fe5e5ea 100644 --- a/drivers/gpu/drm/i915/gvt/edid.c +++ b/drivers/gpu/drm/i915/gvt/edid.c @@ -52,16 +52,16 @@ static unsigned char edid_get_byte(struct intel_vgpu *vgpu) unsigned char chr = 0; if (edid->state == I2C_NOT_SPECIFIED || !edid->slave_selected) { - gvt_err("Driver tries to read EDID without proper sequence!\n"); + gvt_vgpu_err("Driver tries to read EDID without proper sequence!\n"); return 0; } if (edid->current_edid_read >= EDID_SIZE) { - gvt_err("edid_get_byte() exceeds the size of EDID!\n"); + gvt_vgpu_err("edid_get_byte() exceeds the size of EDID!\n"); return 0; } if (!edid->edid_available) { - gvt_err("Reading EDID but EDID is not available!\n"); + gvt_vgpu_err("Reading EDID but EDID is not available!\n"); return 0; } @@ -72,7 +72,7 @@ static unsigned char edid_get_byte(struct intel_vgpu *vgpu) chr = edid_data->edid_block[edid->current_edid_read]; edid->current_edid_read++; } else { - gvt_err("No EDID available during the reading?\n"); + gvt_vgpu_err("No EDID available during the reading?\n"); } return chr; } @@ -223,7 +223,7 @@ static int gmbus1_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, vgpu_vreg(vgpu, PCH_GMBUS2) |= GMBUS_ACTIVE; break; default: - gvt_err("Unknown/reserved GMBUS cycle detected!\n"); + gvt_vgpu_err("Unknown/reserved GMBUS cycle detected!\n"); break; } /* @@ -292,8 +292,7 @@ static int gmbus3_mmio_read(struct intel_vgpu *vgpu, unsigned int offset, */ } else { memcpy(p_data, &vgpu_vreg(vgpu, offset), bytes); - gvt_err("vgpu%d: warning: gmbus3 read with nothing returned\n", - vgpu->id); + gvt_vgpu_err("warning: gmbus3 read with nothing returned\n"); } return 0; } diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index 46eb9fd3c03f..f1f426a97aa9 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -172,6 +172,7 @@ static int emulate_execlist_ctx_schedule_out( struct intel_vgpu_execlist *execlist, struct execlist_ctx_descriptor_format *ctx) { + struct intel_vgpu *vgpu = execlist->vgpu; struct intel_vgpu_execlist_slot *running = execlist->running_slot; struct intel_vgpu_execlist_slot *pending = execlist->pending_slot; struct execlist_ctx_descriptor_format *ctx0 = &running->ctx[0]; @@ -183,7 +184,7 @@ static int emulate_execlist_ctx_schedule_out( gvt_dbg_el("schedule out context id %x\n", ctx->context_id); if (WARN_ON(!same_context(ctx, execlist->running_context))) { - gvt_err("schedule out context is not running context," + gvt_vgpu_err("schedule out context is not running context," "ctx id %x running ctx id %x\n", ctx->context_id, execlist->running_context->context_id); @@ -254,7 +255,7 @@ static struct intel_vgpu_execlist_slot *get_next_execlist_slot( status.udw = vgpu_vreg(vgpu, status_reg + 4); if (status.execlist_queue_full) { - gvt_err("virtual execlist slots are full\n"); + gvt_vgpu_err("virtual execlist slots are full\n"); return NULL; } @@ -270,11 +271,12 @@ static int emulate_execlist_schedule_in(struct intel_vgpu_execlist *execlist, struct execlist_ctx_descriptor_format *ctx0, *ctx1; struct execlist_context_status_format status; + struct intel_vgpu *vgpu = execlist->vgpu; gvt_dbg_el("emulate schedule-in\n"); if (!slot) { - gvt_err("no available execlist slot\n"); + gvt_vgpu_err("no available execlist slot\n"); return -EINVAL; } @@ -375,7 +377,6 @@ static void prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) vma = i915_gem_object_ggtt_pin(entry_obj->obj, NULL, 0, 4, 0); if (IS_ERR(vma)) { - gvt_err("Cannot pin\n"); return; } @@ -428,7 +429,6 @@ static void prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) vma = i915_gem_object_ggtt_pin(wa_ctx->indirect_ctx.obj, NULL, 0, CACHELINE_BYTES, 0); if (IS_ERR(vma)) { - gvt_err("Cannot pin indirect ctx obj\n"); return; } @@ -561,6 +561,7 @@ static int prepare_mm(struct intel_vgpu_workload *workload) { struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc; struct intel_vgpu_mm *mm; + struct intel_vgpu *vgpu = workload->vgpu; int page_table_level; u32 pdp[8]; @@ -569,7 +570,7 @@ static int prepare_mm(struct intel_vgpu_workload *workload) } else if (desc->addressing_mode == 3) { /* legacy 64 bit */ page_table_level = 4; } else { - gvt_err("Advanced Context mode(SVM) is not supported!\n"); + gvt_vgpu_err("Advanced Context mode(SVM) is not supported!\n"); return -EINVAL; } @@ -583,7 +584,7 @@ static int prepare_mm(struct intel_vgpu_workload *workload) mm = intel_vgpu_create_mm(workload->vgpu, INTEL_GVT_MM_PPGTT, pdp, page_table_level, 0); if (IS_ERR(mm)) { - gvt_err("fail to create mm object.\n"); + gvt_vgpu_err("fail to create mm object.\n"); return PTR_ERR(mm); } } @@ -609,7 +610,7 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id, ring_context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, (u32)((desc->lrca + 1) << GTT_PAGE_SHIFT)); if (ring_context_gpa == INTEL_GVT_INVALID_ADDR) { - gvt_err("invalid guest context LRCA: %x\n", desc->lrca); + gvt_vgpu_err("invalid guest context LRCA: %x\n", desc->lrca); return -EINVAL; } @@ -724,8 +725,7 @@ int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id) continue; if (!desc[i]->privilege_access) { - gvt_err("vgpu%d: unexpected GGTT elsp submission\n", - vgpu->id); + gvt_vgpu_err("unexpected GGTT elsp submission\n"); return -EINVAL; } @@ -735,15 +735,13 @@ int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id) } if (!valid_desc_bitmap) { - gvt_err("vgpu%d: no valid desc in a elsp submission\n", - vgpu->id); + gvt_vgpu_err("no valid desc in a elsp submission\n"); return -EINVAL; } if (!test_bit(0, (void *)&valid_desc_bitmap) && test_bit(1, (void *)&valid_desc_bitmap)) { - gvt_err("vgpu%d: weird elsp submission, desc 0 is not valid\n", - vgpu->id); + gvt_vgpu_err("weird elsp submission, desc 0 is not valid\n"); return -EINVAL; } @@ -752,8 +750,7 @@ int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id) ret = submit_context(vgpu, ring_id, &valid_desc[i], emulate_schedule_in); if (ret) { - gvt_err("vgpu%d: fail to schedule workload\n", - vgpu->id); + gvt_vgpu_err("fail to schedule workload\n"); return ret; } emulate_schedule_in = false; diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 6a5ff23ded90..da7312715824 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -49,8 +49,8 @@ bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size) { if ((!vgpu_gmadr_is_valid(vgpu, addr)) || (size && !vgpu_gmadr_is_valid(vgpu, addr + size - 1))) { - gvt_err("vgpu%d: invalid range gmadr 0x%llx size 0x%x\n", - vgpu->id, addr, size); + gvt_vgpu_err("invalid range gmadr 0x%llx size 0x%x\n", + addr, size); return false; } return true; @@ -430,7 +430,7 @@ static int gtt_entry_p2m(struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *p, mfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, gfn); if (mfn == INTEL_GVT_INVALID_ADDR) { - gvt_err("fail to translate gfn: 0x%lx\n", gfn); + gvt_vgpu_err("fail to translate gfn: 0x%lx\n", gfn); return -ENXIO; } @@ -611,7 +611,7 @@ static inline int init_shadow_page(struct intel_vgpu *vgpu, daddr = dma_map_page(kdev, p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL); if (dma_mapping_error(kdev, daddr)) { - gvt_err("fail to map dma addr\n"); + gvt_vgpu_err("fail to map dma addr\n"); return -EINVAL; } @@ -735,7 +735,7 @@ retry: if (reclaim_one_mm(vgpu->gvt)) goto retry; - gvt_err("fail to allocate ppgtt shadow page\n"); + gvt_vgpu_err("fail to allocate ppgtt shadow page\n"); return ERR_PTR(-ENOMEM); } @@ -750,14 +750,14 @@ retry: */ ret = init_shadow_page(vgpu, &spt->shadow_page, type); if (ret) { - gvt_err("fail to initialize shadow page for spt\n"); + gvt_vgpu_err("fail to initialize shadow page for spt\n"); goto err; } ret = intel_vgpu_init_guest_page(vgpu, &spt->guest_page, gfn, ppgtt_write_protection_handler, NULL); if (ret) { - gvt_err("fail to initialize guest page for spt\n"); + gvt_vgpu_err("fail to initialize guest page for spt\n"); goto err; } @@ -776,8 +776,7 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_find_shadow_page( if (p) return shadow_page_to_ppgtt_spt(p); - gvt_err("vgpu%d: fail to find ppgtt shadow page: 0x%lx\n", - vgpu->id, mfn); + gvt_vgpu_err("fail to find ppgtt shadow page: 0x%lx\n", mfn); return NULL; } @@ -827,8 +826,8 @@ static int ppgtt_invalidate_shadow_page_by_shadow_entry(struct intel_vgpu *vgpu, } s = ppgtt_find_shadow_page(vgpu, ops->get_pfn(e)); if (!s) { - gvt_err("vgpu%d: fail to find shadow page: mfn: 0x%lx\n", - vgpu->id, ops->get_pfn(e)); + gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n", + ops->get_pfn(e)); return -ENXIO; } return ppgtt_invalidate_shadow_page(s); @@ -836,6 +835,7 @@ static int ppgtt_invalidate_shadow_page_by_shadow_entry(struct intel_vgpu *vgpu, static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) { + struct intel_vgpu *vgpu = spt->vgpu; struct intel_gvt_gtt_entry e; unsigned long index; int ret; @@ -854,7 +854,7 @@ static int ppgtt_invalidate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) for_each_present_shadow_entry(spt, &e, index) { if (!gtt_type_is_pt(get_next_pt_type(e.type))) { - gvt_err("GVT doesn't support pse bit for now\n"); + gvt_vgpu_err("GVT doesn't support pse bit for now\n"); return -EINVAL; } ret = ppgtt_invalidate_shadow_page_by_shadow_entry( @@ -868,8 +868,8 @@ release: ppgtt_free_shadow_page(spt); return 0; fail: - gvt_err("vgpu%d: fail: shadow page %p shadow entry 0x%llx type %d\n", - spt->vgpu->id, spt, e.val64, e.type); + gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n", + spt, e.val64, e.type); return ret; } @@ -914,8 +914,8 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_shadow_page_by_guest_entry( } return s; fail: - gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d\n", - vgpu->id, s, we->val64, we->type); + gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", + s, we->val64, we->type); return ERR_PTR(ret); } @@ -953,7 +953,7 @@ static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) for_each_present_guest_entry(spt, &ge, i) { if (!gtt_type_is_pt(get_next_pt_type(ge.type))) { - gvt_err("GVT doesn't support pse bit now\n"); + gvt_vgpu_err("GVT doesn't support pse bit now\n"); ret = -EINVAL; goto fail; } @@ -969,8 +969,8 @@ static int ppgtt_populate_shadow_page(struct intel_vgpu_ppgtt_spt *spt) } return 0; fail: - gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d\n", - vgpu->id, spt, ge.val64, ge.type); + gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", + spt, ge.val64, ge.type); return ret; } @@ -999,7 +999,7 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt, struct intel_vgpu_ppgtt_spt *s = ppgtt_find_shadow_page(vgpu, ops->get_pfn(&e)); if (!s) { - gvt_err("fail to find guest page\n"); + gvt_vgpu_err("fail to find guest page\n"); ret = -ENXIO; goto fail; } @@ -1011,8 +1011,8 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt, ppgtt_set_shadow_entry(spt, &e, index); return 0; fail: - gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d\n", - vgpu->id, spt, e.val64, e.type); + gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n", + spt, e.val64, e.type); return ret; } @@ -1046,8 +1046,8 @@ static int ppgtt_handle_guest_entry_add(struct intel_vgpu_guest_page *gpt, } return 0; fail: - gvt_err("vgpu%d: fail: spt %p guest entry 0x%llx type %d\n", vgpu->id, - spt, we->val64, we->type); + gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n", + spt, we->val64, we->type); return ret; } @@ -1250,8 +1250,8 @@ static int ppgtt_handle_guest_write_page_table( } return 0; fail: - gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d.\n", - vgpu->id, spt, we->val64, we->type); + gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n", + spt, we->val64, we->type); return ret; } @@ -1493,7 +1493,7 @@ static int shadow_mm(struct intel_vgpu_mm *mm) spt = ppgtt_populate_shadow_page_by_guest_entry(vgpu, &ge); if (IS_ERR(spt)) { - gvt_err("fail to populate guest root pointer\n"); + gvt_vgpu_err("fail to populate guest root pointer\n"); ret = PTR_ERR(spt); goto fail; } @@ -1566,7 +1566,7 @@ struct intel_vgpu_mm *intel_vgpu_create_mm(struct intel_vgpu *vgpu, ret = gtt->mm_alloc_page_table(mm); if (ret) { - gvt_err("fail to allocate page table for mm\n"); + gvt_vgpu_err("fail to allocate page table for mm\n"); goto fail; } @@ -1584,7 +1584,7 @@ struct intel_vgpu_mm *intel_vgpu_create_mm(struct intel_vgpu *vgpu, } return mm; fail: - gvt_err("fail to create mm\n"); + gvt_vgpu_err("fail to create mm\n"); if (mm) intel_gvt_mm_unreference(mm); return ERR_PTR(ret); @@ -1760,7 +1760,7 @@ unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma) mm->page_table_level, gma, gpa); return gpa; err: - gvt_err("invalid mm type: %d gma %lx\n", mm->type, gma); + gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma); return INTEL_GVT_INVALID_ADDR; } @@ -1836,8 +1836,7 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, if (ops->test_present(&e)) { ret = gtt_entry_p2m(vgpu, &e, &m); if (ret) { - gvt_err("vgpu%d: fail to translate guest gtt entry\n", - vgpu->id); + gvt_vgpu_err("fail to translate guest gtt entry\n"); return ret; } } else { @@ -1893,14 +1892,14 @@ static int alloc_scratch_pages(struct intel_vgpu *vgpu, scratch_pt = (void *)get_zeroed_page(GFP_KERNEL); if (!scratch_pt) { - gvt_err("fail to allocate scratch page\n"); + gvt_vgpu_err("fail to allocate scratch page\n"); return -ENOMEM; } daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0, 4096, PCI_DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, daddr)) { - gvt_err("fail to dmamap scratch_pt\n"); + gvt_vgpu_err("fail to dmamap scratch_pt\n"); __free_page(virt_to_page(scratch_pt)); return -ENOMEM; } @@ -2003,7 +2002,7 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) ggtt_mm = intel_vgpu_create_mm(vgpu, INTEL_GVT_MM_GGTT, NULL, 1, 0); if (IS_ERR(ggtt_mm)) { - gvt_err("fail to create mm for ggtt.\n"); + gvt_vgpu_err("fail to create mm for ggtt.\n"); return PTR_ERR(ggtt_mm); } @@ -2076,7 +2075,6 @@ static int setup_spt_oos(struct intel_gvt *gvt) for (i = 0; i < preallocated_oos_pages; i++) { oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL); if (!oos_page) { - gvt_err("fail to pre-allocate oos page\n"); ret = -ENOMEM; goto fail; } @@ -2166,7 +2164,7 @@ int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu, mm = intel_vgpu_create_mm(vgpu, INTEL_GVT_MM_PPGTT, pdp, page_table_level, 0); if (IS_ERR(mm)) { - gvt_err("fail to create mm\n"); + gvt_vgpu_err("fail to create mm\n"); return PTR_ERR(mm); } } @@ -2196,7 +2194,7 @@ int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu, mm = intel_vgpu_find_ppgtt_mm(vgpu, page_table_level, pdp); if (!mm) { - gvt_err("fail to find ppgtt instance.\n"); + gvt_vgpu_err("fail to find ppgtt instance.\n"); return -EINVAL; } intel_gvt_mm_unreference(mm); diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index de975f40aebf..eaff45d417e8 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -181,11 +181,9 @@ static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST); if (!vgpu->mmio.disable_warn_untrack) { - gvt_err("vgpu%d: found oob fence register access\n", - vgpu->id); - gvt_err("vgpu%d: total fence %d, access fence %d\n", - vgpu->id, vgpu_fence_sz(vgpu), - fence_num); + gvt_vgpu_err("found oob fence register access\n"); + gvt_vgpu_err("total fence %d, access fence %d\n", + vgpu_fence_sz(vgpu), fence_num); } memset(p_data, 0, bytes); return -EINVAL; @@ -249,7 +247,7 @@ static int mul_force_wake_write(struct intel_vgpu *vgpu, break; default: /*should not hit here*/ - gvt_err("invalid forcewake offset 0x%x\n", offset); + gvt_vgpu_err("invalid forcewake offset 0x%x\n", offset); return -EINVAL; } } else { @@ -530,7 +528,7 @@ static int check_fdi_rx_train_status(struct intel_vgpu *vgpu, fdi_tx_train_bits = FDI_LINK_TRAIN_PATTERN_2; fdi_iir_check_bits = FDI_RX_SYMBOL_LOCK; } else { - gvt_err("Invalid train pattern %d\n", train_pattern); + gvt_vgpu_err("Invalid train pattern %d\n", train_pattern); return -EINVAL; } @@ -588,7 +586,7 @@ static int update_fdi_rx_iir_status(struct intel_vgpu *vgpu, else if (FDI_RX_IMR_TO_PIPE(offset) != INVALID_INDEX) index = FDI_RX_IMR_TO_PIPE(offset); else { - gvt_err("Unsupport registers %x\n", offset); + gvt_vgpu_err("Unsupport registers %x\n", offset); return -EINVAL; } @@ -818,7 +816,7 @@ static int dp_aux_ch_ctl_mmio_write(struct intel_vgpu *vgpu, u32 data; if (!dpy_is_valid_port(port_index)) { - gvt_err("GVT(%d): Unsupported DP port access!\n", vgpu->id); + gvt_vgpu_err("Unsupported DP port access!\n"); return 0; } @@ -1016,8 +1014,7 @@ static void write_virtual_sbi_register(struct intel_vgpu *vgpu, if (i == num) { if (num == SBI_REG_MAX) { - gvt_err("vgpu%d: SBI caching meets maximum limits\n", - vgpu->id); + gvt_vgpu_err("SBI caching meets maximum limits\n"); return; } display->sbi.number++; @@ -1097,7 +1094,7 @@ static int pvinfo_mmio_read(struct intel_vgpu *vgpu, unsigned int offset, break; } if (invalid_read) - gvt_err("invalid pvinfo read: [%x:%x] = %x\n", + gvt_vgpu_err("invalid pvinfo read: [%x:%x] = %x\n", offset, bytes, *(u32 *)p_data); vgpu->pv_notified = true; return 0; @@ -1125,7 +1122,7 @@ static int handle_g2v_notification(struct intel_vgpu *vgpu, int notification) case 1: /* Remove this in guest driver. */ break; default: - gvt_err("Invalid PV notification %d\n", notification); + gvt_vgpu_err("Invalid PV notification %d\n", notification); } return ret; } @@ -1181,7 +1178,7 @@ static int pvinfo_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, enter_failsafe_mode(vgpu, GVT_FAILSAFE_INSUFFICIENT_RESOURCE); break; default: - gvt_err("invalid pvinfo write offset %x bytes %x data %x\n", + gvt_vgpu_err("invalid pvinfo write offset %x bytes %x data %x\n", offset, bytes, data); break; } @@ -1415,7 +1412,8 @@ static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, if (execlist->elsp_dwords.index == 3) { ret = intel_vgpu_submit_execlist(vgpu, ring_id); if(ret) - gvt_err("fail submit workload on ring %d\n", ring_id); + gvt_vgpu_err("fail submit workload on ring %d\n", + ring_id); } ++execlist->elsp_dwords.index; diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 84d801638ede..cd218b07c6f6 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -426,7 +426,7 @@ static void kvmgt_protect_table_del(struct kvmgt_guest_info *info, static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev) { - struct intel_vgpu *vgpu; + struct intel_vgpu *vgpu = NULL; struct intel_vgpu_type *type; struct device *pdev; void *gvt; @@ -437,7 +437,7 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev) type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj)); if (!type) { - gvt_err("failed to find type %s to create\n", + gvt_vgpu_err("failed to find type %s to create\n", kobject_name(kobj)); ret = -EINVAL; goto out; @@ -446,7 +446,7 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev) vgpu = intel_gvt_ops->vgpu_create(gvt, type); if (IS_ERR_OR_NULL(vgpu)) { ret = vgpu == NULL ? -EFAULT : PTR_ERR(vgpu); - gvt_err("failed to create intel vgpu: %d\n", ret); + gvt_vgpu_err("failed to create intel vgpu: %d\n", ret); goto out; } @@ -526,7 +526,8 @@ static int intel_vgpu_open(struct mdev_device *mdev) ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &events, &vgpu->vdev.iommu_notifier); if (ret != 0) { - gvt_err("vfio_register_notifier for iommu failed: %d\n", ret); + gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n", + ret); goto out; } @@ -534,7 +535,8 @@ static int intel_vgpu_open(struct mdev_device *mdev) ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &events, &vgpu->vdev.group_notifier); if (ret != 0) { - gvt_err("vfio_register_notifier for group failed: %d\n", ret); + gvt_vgpu_err("vfio_register_notifier for group failed: %d\n", + ret); goto undo_iommu; } @@ -635,7 +637,7 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf, if (index >= VFIO_PCI_NUM_REGIONS) { - gvt_err("invalid index: %u\n", index); + gvt_vgpu_err("invalid index: %u\n", index); return -EINVAL; } @@ -669,7 +671,7 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf, case VFIO_PCI_VGA_REGION_INDEX: case VFIO_PCI_ROM_REGION_INDEX: default: - gvt_err("unsupported region: %u\n", index); + gvt_vgpu_err("unsupported region: %u\n", index); } return ret == 0 ? count : ret; @@ -861,7 +863,7 @@ static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu, trigger = eventfd_ctx_fdget(fd); if (IS_ERR(trigger)) { - gvt_err("eventfd_ctx_fdget failed\n"); + gvt_vgpu_err("eventfd_ctx_fdget failed\n"); return PTR_ERR(trigger); } vgpu->vdev.msi_trigger = trigger; @@ -1120,7 +1122,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, ret = vfio_set_irqs_validate_and_prepare(&hdr, max, VFIO_PCI_NUM_IRQS, &data_size); if (ret) { - gvt_err("intel:vfio_set_irqs_validate_and_prepare failed\n"); + gvt_vgpu_err("intel:vfio_set_irqs_validate_and_prepare failed\n"); return -EINVAL; } if (data_size) { @@ -1310,7 +1312,7 @@ static int kvmgt_guest_init(struct mdev_device *mdev) kvm = vgpu->vdev.kvm; if (!kvm || kvm->mm != current->mm) { - gvt_err("KVM is required to use Intel vGPU\n"); + gvt_vgpu_err("KVM is required to use Intel vGPU\n"); return -ESRCH; } @@ -1337,8 +1339,10 @@ static int kvmgt_guest_init(struct mdev_device *mdev) static bool kvmgt_guest_exit(struct kvmgt_guest_info *info) { + struct intel_vgpu *vgpu = info->vgpu; + if (!info) { - gvt_err("kvmgt_guest_info invalid\n"); + gvt_vgpu_err("kvmgt_guest_info invalid\n"); return false; } @@ -1383,12 +1387,14 @@ static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn) unsigned long iova, pfn; struct kvmgt_guest_info *info; struct device *dev; + struct intel_vgpu *vgpu; int rc; if (!handle_valid(handle)) return INTEL_GVT_INVALID_ADDR; info = (struct kvmgt_guest_info *)handle; + vgpu = info->vgpu; iova = gvt_cache_find(info->vgpu, gfn); if (iova != INTEL_GVT_INVALID_ADDR) return iova; @@ -1397,13 +1403,14 @@ static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn) dev = mdev_dev(info->vgpu->vdev.mdev); rc = vfio_pin_pages(dev, &gfn, 1, IOMMU_READ | IOMMU_WRITE, &pfn); if (rc != 1) { - gvt_err("vfio_pin_pages failed for gfn 0x%lx: %d\n", gfn, rc); + gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n", + gfn, rc); return INTEL_GVT_INVALID_ADDR; } /* transfer to host iova for GFX to use DMA */ rc = gvt_dma_map_iova(info->vgpu, pfn, &iova); if (rc) { - gvt_err("gvt_dma_map_iova failed for gfn: 0x%lx\n", gfn); + gvt_vgpu_err("gvt_dma_map_iova failed for gfn: 0x%lx\n", gfn); vfio_unpin_pages(dev, &gfn, 1); return INTEL_GVT_INVALID_ADDR; } diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index 60b698cb8365..1ba3bdb09341 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -142,10 +142,10 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, ret = intel_gvt_hypervisor_read_gpa(vgpu, pa, p_data, bytes); if (ret) { - gvt_err("vgpu%d: guest page read error %d, " + gvt_vgpu_err("guest page read error %d, " "gfn 0x%lx, pa 0x%llx, var 0x%x, len %d\n", - vgpu->id, ret, - gp->gfn, pa, *(u32 *)p_data, bytes); + ret, gp->gfn, pa, *(u32 *)p_data, + bytes); } mutex_unlock(&gvt->lock); return ret; @@ -200,14 +200,13 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, ret = intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes); if (!vgpu->mmio.disable_warn_untrack) { - gvt_err("vgpu%d: read untracked MMIO %x(%dB) val %x\n", - vgpu->id, offset, bytes, *(u32 *)p_data); + gvt_vgpu_err("read untracked MMIO %x(%dB) val %x\n", + offset, bytes, *(u32 *)p_data); if (offset == 0x206c) { - gvt_err("------------------------------------------\n"); - gvt_err("vgpu%d: likely triggers a gfx reset\n", - vgpu->id); - gvt_err("------------------------------------------\n"); + gvt_vgpu_err("------------------------------------------\n"); + gvt_vgpu_err("likely triggers a gfx reset\n"); + gvt_vgpu_err("------------------------------------------\n"); vgpu->mmio.disable_warn_untrack = true; } } @@ -220,8 +219,8 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, mutex_unlock(&gvt->lock); return 0; err: - gvt_err("vgpu%d: fail to emulate MMIO read %08x len %d\n", - vgpu->id, offset, bytes); + gvt_vgpu_err("fail to emulate MMIO read %08x len %d\n", + offset, bytes); mutex_unlock(&gvt->lock); return ret; } @@ -259,10 +258,11 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, if (gp) { ret = gp->handler(gp, pa, p_data, bytes); if (ret) { - gvt_err("vgpu%d: guest page write error %d, " - "gfn 0x%lx, pa 0x%llx, var 0x%x, len %d\n", - vgpu->id, ret, - gp->gfn, pa, *(u32 *)p_data, bytes); + gvt_err("guest page write error %d, " + "gfn 0x%lx, pa 0x%llx, " + "var 0x%x, len %d\n", + ret, gp->gfn, pa, + *(u32 *)p_data, bytes); } mutex_unlock(&gvt->lock); return ret; @@ -329,8 +329,8 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, /* all register bits are RO. */ if (ro_mask == ~(u64)0) { - gvt_err("vgpu%d: try to write RO reg %x\n", - vgpu->id, offset); + gvt_vgpu_err("try to write RO reg %x\n", + offset); ret = 0; goto out; } @@ -360,8 +360,8 @@ out: mutex_unlock(&gvt->lock); return 0; err: - gvt_err("vgpu%d: fail to emulate MMIO write %08x len %d\n", - vgpu->id, offset, bytes); + gvt_vgpu_err("fail to emulate MMIO write %08x len %d\n", offset, + bytes); mutex_unlock(&gvt->lock); return ret; } diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c index 5d1caf9daba9..311799136d7f 100644 --- a/drivers/gpu/drm/i915/gvt/opregion.c +++ b/drivers/gpu/drm/i915/gvt/opregion.c @@ -67,14 +67,15 @@ static int map_vgpu_opregion(struct intel_vgpu *vgpu, bool map) mfn = intel_gvt_hypervisor_virt_to_mfn(vgpu_opregion(vgpu)->va + i * PAGE_SIZE); if (mfn == INTEL_GVT_INVALID_ADDR) { - gvt_err("fail to get MFN from VA\n"); + gvt_vgpu_err("fail to get MFN from VA\n"); return -EINVAL; } ret = intel_gvt_hypervisor_map_gfn_to_mfn(vgpu, vgpu_opregion(vgpu)->gfn[i], mfn, 1, map); if (ret) { - gvt_err("fail to map GFN to MFN, errno: %d\n", ret); + gvt_vgpu_err("fail to map GFN to MFN, errno: %d\n", + ret); return ret; } } @@ -287,7 +288,7 @@ int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci) parm = vgpu_opregion(vgpu)->va + INTEL_GVT_OPREGION_PARM; if (!(swsci & SWSCI_SCI_SELECT)) { - gvt_err("vgpu%d: requesting SMI service\n", vgpu->id); + gvt_vgpu_err("requesting SMI service\n"); return 0; } /* ignore non 0->1 trasitions */ @@ -300,9 +301,8 @@ int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci) func = GVT_OPREGION_FUNC(*scic); subfunc = GVT_OPREGION_SUBFUNC(*scic); if (!querying_capabilities(*scic)) { - gvt_err("vgpu%d: requesting runtime service: func \"%s\"," + gvt_vgpu_err("requesting runtime service: func \"%s\"," " subfunc \"%s\"\n", - vgpu->id, opregion_func_name(func), opregion_subfunc_name(subfunc)); /* diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c index 73f052a4f424..95ee091ce085 100644 --- a/drivers/gpu/drm/i915/gvt/render.c +++ b/drivers/gpu/drm/i915/gvt/render.c @@ -167,7 +167,7 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) I915_WRITE_FW(reg, 0x1); if (wait_for_atomic((I915_READ_FW(reg) == 0), 50)) - gvt_err("timeout in invalidate ring (%d) tlb\n", ring_id); + gvt_vgpu_err("timeout in invalidate ring (%d) tlb\n", ring_id); else vgpu_vreg(vgpu, regs[ring_id]) = 0; diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index d3a56c949025..d29e4352d529 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -84,7 +84,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) (u32)((workload->ctx_desc.lrca + i) << GTT_PAGE_SHIFT)); if (context_gpa == INTEL_GVT_INVALID_ADDR) { - gvt_err("Invalid guest context descriptor\n"); + gvt_vgpu_err("Invalid guest context descriptor\n"); return -EINVAL; } @@ -176,6 +176,7 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; struct drm_i915_gem_request *rq; + struct intel_vgpu *vgpu = workload->vgpu; int ret; gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n", @@ -189,7 +190,7 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx); if (IS_ERR(rq)) { - gvt_err("fail to allocate gem request\n"); + gvt_vgpu_err("fail to allocate gem request\n"); ret = PTR_ERR(rq); goto out; } @@ -322,7 +323,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) (u32)((workload->ctx_desc.lrca + i) << GTT_PAGE_SHIFT)); if (context_gpa == INTEL_GVT_INVALID_ADDR) { - gvt_err("invalid guest context descriptor\n"); + gvt_vgpu_err("invalid guest context descriptor\n"); return; } @@ -417,6 +418,7 @@ static int workload_thread(void *priv) int ring_id = p->ring_id; struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; struct intel_vgpu_workload *workload = NULL; + struct intel_vgpu *vgpu = NULL; int ret; bool need_force_wake = IS_SKYLAKE(gvt->dev_priv); DEFINE_WAIT_FUNC(wait, woken_wake_function); @@ -459,7 +461,8 @@ static int workload_thread(void *priv) mutex_unlock(&gvt->lock); if (ret) { - gvt_err("fail to dispatch workload, skip\n"); + vgpu = workload->vgpu; + gvt_vgpu_err("fail to dispatch workload, skip\n"); goto complete; } -- cgit v1.2.3 From 3f765a341798ebd4e0ece7cce34399a8fd4a7f9f Mon Sep 17 00:00:00 2001 From: Yulei Zhang Date: Mon, 13 Mar 2017 23:21:27 +0800 Subject: drm/i915/gvt: correct the ggtt valid bit check in pipe control command GGTT valid bit in pipe control command move to DWORD1 after SNB, so change the valid check code correspondingly. v2: per Zhenyu's comment, replace the bit check with MACRO define PIPE_CONTROL_GLOBAL_GTT_IVB Signed-off-by: Yulei Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 2ca0506d8d8c..2b92cc8a7d1a 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -1030,7 +1030,7 @@ static int cmd_handler_pipe_control(struct parser_exec_state *s) ret = cmd_reg_handler(s, 0x2358, 1, "pipe_ctrl"); else if (post_sync == 1) { /* check ggtt*/ - if ((cmd_val(s, 2) & (1 << 2))) { + if ((cmd_val(s, 1) & PIPE_CONTROL_GLOBAL_GTT_IVB)) { gma = cmd_val(s, 2) & GENMASK(31, 3); if (gmadr_bytes == 8) gma |= (cmd_gma_hi(s, 3)) << 32; -- cgit v1.2.3 From 3dce2aca02929f180ab66171b333fa48fe485a03 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 8 Mar 2017 22:08:08 +0000 Subject: drm/i915/gvt: Remove bogus retry around i915_wait_request commit 8f1117abb408 ("drm/i915/gvt: handle workload lifecycle properly") includes some nonsense to retry a indefinite wait - i915_wait_request() does not return until the request is completed when used from an uninterruptible context. Fixes: 8f1117abb408 ("drm/i915/gvt: handle workload lifecycle properly" Signed-off-by: Chris Wilson Cc: Chuanxiao Dong Cc: Zhenyu Wang Cc: Zhi Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index d29e4352d529..dd8f8cc69a76 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -468,19 +468,7 @@ static int workload_thread(void *priv) gvt_dbg_sched("ring id %d wait workload %p\n", workload->ring_id, workload); -retry: - i915_wait_request(workload->req, - 0, MAX_SCHEDULE_TIMEOUT); - /* I915 has replay mechanism and a request will be replayed - * if there is i915 reset. So the seqno will be updated anyway. - * If the seqno is not updated yet after waiting, which means - * the replay may still be in progress and we can wait again. - */ - if (!i915_gem_request_completed(workload->req)) { - gvt_dbg_sched("workload %p not completed, wait again\n", - workload); - goto retry; - } + i915_wait_request(workload->req, 0, MAX_SCHEDULE_TIMEOUT); complete: gvt_dbg_sched("will complete workload %p, status: %d\n", -- cgit v1.2.3 From cf2135ca3d50d6468e9216fef3d0d33c31af635b Mon Sep 17 00:00:00 2001 From: Chuanxiao Dong Date: Thu, 9 Mar 2017 23:07:12 +0800 Subject: drm/i915/gvt: add enable_execlists check before enable gvt The GVT-g needs execlists to be enabled otherwise gvt should be disabled. Add a check for enable_execlists before enabling gvt. v2: use DRM_INFO in response to the user action Signed-off-by: Chuanxiao Dong Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/intel_gvt.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c index d23c0fcff751..8c04eca84351 100644 --- a/drivers/gpu/drm/i915/intel_gvt.c +++ b/drivers/gpu/drm/i915/intel_gvt.c @@ -77,6 +77,11 @@ int intel_gvt_init(struct drm_i915_private *dev_priv) goto bail; } + if (!i915.enable_execlists) { + DRM_INFO("GPU guest virtualisation [GVT-g] disabled due to disabled execlist submission [i915.enable_execlists module parameter]\n"); + goto bail; + } + /* * We're not in host or fail to find a MPT module, disable GVT-g */ -- cgit v1.2.3 From 5180edc2421117766fcb9c2d2dc6bfaeefdeb709 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Thu, 16 Mar 2017 09:45:09 +0800 Subject: drm/i915/kvmgt: fix suspicious rcu dereference usage The srcu read lock must be held while accessing kvm memslots. This patch fix below warning for function kvmgt_rw_gpa(). [ 165.345093] [ ERR: suspicious RCU usage. ] [ 165.416538] Call Trace: [ 165.418989] dump_stack+0x85/0xc2 [ 165.422310] lockdep_rcu_suspicious+0xd7/0x110 [ 165.426769] kvm_read_guest_page+0x195/0x1b0 [kvm] [ 165.431574] kvm_read_guest+0x50/0x90 [kvm] [ 165.440492] kvmgt_rw_gpa+0x43/0xa0 [kvmgt] [ 165.444683] kvmgt_read_gpa+0x11/0x20 [kvmgt] [ 165.449061] gtt_get_entry64+0x4d/0xc0 [i915] [ 165.453438] ppgtt_populate_shadow_page_by_guest_entry+0x380/0xdc0 [i915] [ 165.460254] shadow_mm+0xd1/0x460 [i915] [ 165.472488] intel_vgpu_create_mm+0x1ab/0x210 [i915] [ 165.477472] intel_vgpu_g2v_create_ppgtt_mm+0x5f/0xc0 [i915] [ 165.483154] pvinfo_mmio_write+0x19b/0x1d0 [i915] [ 165.499068] intel_vgpu_emulate_mmio_write+0x3f9/0x600 [i915] [ 165.504827] intel_vgpu_rw+0x114/0x150 [kvmgt] [ 165.509281] intel_vgpu_write+0x16f/0x1a0 [kvmgt] [ 165.513993] vfio_mdev_write+0x20/0x30 [vfio_mdev] [ 165.518793] vfio_device_fops_write+0x24/0x30 [vfio] [ 165.523770] __vfs_write+0x28/0x120 [ 165.540529] vfs_write+0xce/0x1f0 v2: fix Cc format for stable Signed-off-by: Changbin Du Cc: # v4.10+ Reviewed-by: Xiao Guangrong Reviewed-by: Jike Song Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/kvmgt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index cd218b07c6f6..1ea3eb270de8 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1424,7 +1424,7 @@ static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa, { struct kvmgt_guest_info *info; struct kvm *kvm; - int ret; + int idx, ret; bool kthread = current->mm == NULL; if (!handle_valid(handle)) @@ -1436,8 +1436,10 @@ static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa, if (kthread) use_mm(kvm->mm); + idx = srcu_read_lock(&kvm->srcu); ret = write ? kvm_write_guest(kvm, gpa, buf, len) : kvm_read_guest(kvm, gpa, buf, len); + srcu_read_unlock(&kvm->srcu, idx); if (kthread) unuse_mm(kvm->mm); -- cgit v1.2.3 From 17f1b1a6d4e5f41df161eb2a90af22c003a243fc Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Wed, 15 Mar 2017 23:16:01 -0400 Subject: drm/i915/gvt: scan shadow indirect context image when valid The shadow indirect context image should be only scanned when valid. So far, Only RCS ring has the shadow indirect context image. This patch limits the scan logic only for RCS ring. v2. refine description of the subject v3. fix alignment. (Zhenyu) Signed-off-by: Tina Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index dd8f8cc69a76..907e6bc794f6 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -203,9 +203,12 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) if (ret) goto out; - ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx); - if (ret) - goto out; + if ((workload->ring_id == RCS) && + (workload->wa_ctx.indirect_ctx.size != 0)) { + ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx); + if (ret) + goto out; + } ret = populate_shadow_context(workload); if (ret) -- cgit v1.2.3 From 3cd23b828b37bd5ccf4b40132f12dbf20d7afdb6 Mon Sep 17 00:00:00 2001 From: Chuanxiao Dong Date: Thu, 16 Mar 2017 09:47:58 +0800 Subject: drm/i915/gvt: GVT pin/unpin shadow context When handling guest request, GVT needs to populate/update shadow_ctx with guest context. This behavior needs to make sure the shadow_ctx is pinned. The current implementation is relying on i195 allocate request to pin but this way cannot guarantee the i915 not to unpin the shadow_ctx when GVT update the guest context from shadow_ctx. So GVT should pin/unpin the shadow_ctx by itself. Signed-off-by: Chuanxiao Dong Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 907e6bc794f6..39a83eb7aecc 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -175,6 +175,7 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) int ring_id = workload->ring_id; struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; + struct intel_engine_cs *engine = dev_priv->engine[ring_id]; struct drm_i915_gem_request *rq; struct intel_vgpu *vgpu = workload->vgpu; int ret; @@ -188,6 +189,21 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) mutex_lock(&dev_priv->drm.struct_mutex); + /* pin shadow context by gvt even the shadow context will be pinned + * when i915 alloc request. That is because gvt will update the guest + * context from shadow context when workload is completed, and at that + * moment, i915 may already unpined the shadow context to make the + * shadow_ctx pages invalid. So gvt need to pin itself. After update + * the guest context, gvt can unpin the shadow_ctx safely. + */ + ret = engine->context_pin(engine, shadow_ctx); + if (ret) { + gvt_vgpu_err("fail to pin shadow context\n"); + workload->status = ret; + mutex_unlock(&dev_priv->drm.struct_mutex); + return ret; + } + rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx); if (IS_ERR(rq)) { gvt_vgpu_err("fail to allocate gem request\n"); @@ -231,6 +247,9 @@ out: if (!IS_ERR_OR_NULL(rq)) i915_add_request_no_flush(rq); + else + engine->context_unpin(engine, shadow_ctx); + mutex_unlock(&dev_priv->drm.struct_mutex); return ret; } @@ -380,6 +399,10 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) * For the workload w/o request, directly complete the workload. */ if (workload->req) { + struct drm_i915_private *dev_priv = + workload->vgpu->gvt->dev_priv; + struct intel_engine_cs *engine = + dev_priv->engine[workload->ring_id]; wait_event(workload->shadow_ctx_status_wq, !atomic_read(&workload->shadow_ctx_active)); @@ -392,6 +415,10 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) INTEL_GVT_EVENT_MAX) intel_vgpu_trigger_virtual_event(vgpu, event); } + mutex_lock(&dev_priv->drm.struct_mutex); + /* unpin shadow ctx as the shadow_ctx update is done */ + engine->context_unpin(engine, workload->vgpu->shadow_ctx); + mutex_unlock(&dev_priv->drm.struct_mutex); } gvt_dbg_sched("ring id %d complete workload %p status %d\n", -- cgit v1.2.3 From 2958b9013fcbabeeba221161d0712f5259f1e15d Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 17 Mar 2017 12:11:20 +0800 Subject: drm/i915/gvt: Fix gvt scheduler interval time Fix to correctly assign 1ms for gvt scheduler interval time, as previous code using HZ is pretty broken. And use no delay for start gvt scheduler function. Fixes: 4b63960ebd3f ("drm/i915/gvt: vGPU schedule policy framework") Cc: Zhi Wang Cc: stable@vger.kernel.org # v4.10+ Acked-by: Chuanxiao Dong Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/sched_policy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index 06c9584ac5f0..34b9acdf3479 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -101,7 +101,7 @@ struct tbs_sched_data { struct list_head runq_head; }; -#define GVT_DEFAULT_TIME_SLICE (1 * HZ / 1000) +#define GVT_DEFAULT_TIME_SLICE (msecs_to_jiffies(1)) static void tbs_sched_func(struct work_struct *work) { @@ -223,7 +223,7 @@ static void tbs_sched_start_schedule(struct intel_vgpu *vgpu) return; list_add_tail(&vgpu_data->list, &sched_data->runq_head); - schedule_delayed_work(&sched_data->work, sched_data->period); + schedule_delayed_work(&sched_data->work, 0); } static void tbs_sched_stop_schedule(struct intel_vgpu *vgpu) -- cgit v1.2.3 From e7ede72a6d40cb3a30c087142d79381ca8a31dab Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 15 Mar 2017 22:53:37 +0100 Subject: perf symbols: Fix symbols__fixup_end heuristic for corner cases The current symbols__fixup_end() heuristic for the last entry in the rb tree is suboptimal as it leads to not being able to recognize the symbol in the call graph in a couple of corner cases, for example: i) If the symbol has a start address (f.e. exposed via kallsyms) that is at a page boundary, then the roundup(curr->start, 4096) for the last entry will result in curr->start == curr->end with a symbol length of zero. ii) If the symbol has a start address that is shortly before a page boundary, then also here, curr->end - curr->start will just be very few bytes, where it's unrealistic that we could perform a match against. Instead, change the heuristic to roundup(curr->start, 4096) + 4096, so that we can catch such corner cases and have a better chance to find that specific symbol. It's still just best effort as the real end of the symbol is unknown to us (and could even be at a larger offset than the current range), but better than the current situation. Alexei reported that he recently run into case i) with a JITed eBPF program (these are all page aligned) as the last symbol which wasn't properly shown in the call graph (while other eBPF program symbols in the rb tree were displayed correctly). Since this is a generic issue, lets try to improve the heuristic a bit. Reported-and-Tested-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Fixes: 2e538c4a1847 ("perf tools: Improve kernel/modules symbol lookup") Link: http://lkml.kernel.org/r/bb5c80d27743be6f12afc68405f1956a330e1bc9.1489614365.git.daniel@iogearbox.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 70e389bc4af7..9b4d8ba22fed 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -202,7 +202,7 @@ void symbols__fixup_end(struct rb_root *symbols) /* Last entry */ if (curr->end == curr->start) - curr->end = roundup(curr->start, 4096); + curr->end = roundup(curr->start, 4096) + 4096; } void __map_groups__fixup_end(struct map_groups *mg, enum map_type type) -- cgit v1.2.3 From 77f88796cee819b9c4562b0b6b44691b3b7755b1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 16 Mar 2017 16:54:24 -0400 Subject: cgroup, kthread: close race window where new kthreads can be migrated to non-root cgroups Creation of a kthread goes through a couple interlocked stages between the kthread itself and its creator. Once the new kthread starts running, it initializes itself and wakes up the creator. The creator then can further configure the kthread and then let it start doing its job by waking it up. In this configuration-by-creator stage, the creator is the only one that can wake it up but the kthread is visible to userland. When altering the kthread's attributes from userland is allowed, this is fine; however, for cases where CPU affinity is critical, kthread_bind() is used to first disable affinity changes from userland and then set the affinity. This also prevents the kthread from being migrated into non-root cgroups as that can affect the CPU affinity and many other things. Unfortunately, the cgroup side of protection is racy. While the PF_NO_SETAFFINITY flag prevents further migrations, userland can win the race before the creator sets the flag with kthread_bind() and put the kthread in a non-root cgroup, which can lead to all sorts of problems including incorrect CPU affinity and starvation. This bug got triggered by userland which periodically tries to migrate all processes in the root cpuset cgroup to a non-root one. Per-cpu workqueue workers got caught while being created and ended up with incorrected CPU affinity breaking concurrency management and sometimes stalling workqueue execution. This patch adds task->no_cgroup_migration which disallows the task to be migrated by userland. kthreadd starts with the flag set making every child kthread start in the root cgroup with migration disallowed. The flag is cleared after the kthread finishes initialization by which time PF_NO_SETAFFINITY is set if the kthread should stay in the root cgroup. It'd be better to wait for the initialization instead of failing but I couldn't think of a way of implementing that without adding either a new PF flag, or sleeping and retrying from waiting side. Even if userland depends on changing cgroup membership of a kthread, it either has to be synchronized with kthread_create() or periodically repeat, so it's unlikely that this would break anything. v2: Switch to a simpler implementation using a new task_struct bit field suggested by Oleg. Signed-off-by: Tejun Heo Suggested-by: Oleg Nesterov Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra (Intel) Cc: Thomas Gleixner Reported-and-debugged-by: Chris Mason Cc: stable@vger.kernel.org # v4.3+ (we can't close the race on < v4.3) Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 21 +++++++++++++++++++++ include/linux/sched.h | 4 ++++ kernel/cgroup/cgroup.c | 9 +++++---- kernel/kthread.c | 3 +++ 4 files changed, 33 insertions(+), 4 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index f6b43fbb141c..af9c86e958bd 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -570,6 +570,25 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp) pr_cont_kernfs_path(cgrp->kn); } +static inline void cgroup_init_kthreadd(void) +{ + /* + * kthreadd is inherited by all kthreads, keep it in the root so + * that the new kthreads are guaranteed to stay in the root until + * initialization is finished. + */ + current->no_cgroup_migration = 1; +} + +static inline void cgroup_kthread_ready(void) +{ + /* + * This kthread finished initialization. The creator should have + * set PF_NO_SETAFFINITY if this kthread should stay in the root. + */ + current->no_cgroup_migration = 0; +} + #else /* !CONFIG_CGROUPS */ struct cgroup_subsys_state; @@ -590,6 +609,8 @@ static inline void cgroup_free(struct task_struct *p) {} static inline int cgroup_init_early(void) { return 0; } static inline int cgroup_init(void) { return 0; } +static inline void cgroup_init_kthreadd(void) {} +static inline void cgroup_kthread_ready(void) {} static inline bool task_under_cgroup_hierarchy(struct task_struct *task, struct cgroup *ancestor) diff --git a/include/linux/sched.h b/include/linux/sched.h index d67eee84fd43..4cf9a59a4d08 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -604,6 +604,10 @@ struct task_struct { #ifdef CONFIG_COMPAT_BRK unsigned brk_randomized:1; #endif +#ifdef CONFIG_CGROUPS + /* disallow userland-initiated cgroup migration */ + unsigned no_cgroup_migration:1; +#endif unsigned long atomic_flags; /* Flags requiring atomic access. */ diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 0125589c7428..638ef7568495 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2425,11 +2425,12 @@ ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, tsk = tsk->group_leader; /* - * Workqueue threads may acquire PF_NO_SETAFFINITY and become - * trapped in a cpuset, or RT worker may be born in a cgroup - * with no rt_runtime allocated. Just say no. + * kthreads may acquire PF_NO_SETAFFINITY during initialization. + * If userland migrates such a kthread to a non-root cgroup, it can + * become trapped in a cpuset, or RT kthread may be born in a + * cgroup with no rt_runtime allocated. Just say no. */ - if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) { + if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) { ret = -EINVAL; goto out_unlock_rcu; } diff --git a/kernel/kthread.c b/kernel/kthread.c index 2f26adea0f84..26db528c1d88 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -20,6 +20,7 @@ #include #include #include +#include #include static DEFINE_SPINLOCK(kthread_create_lock); @@ -225,6 +226,7 @@ static int kthread(void *_create) ret = -EINTR; if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) { + cgroup_kthread_ready(); __kthread_parkme(self); ret = threadfn(data); } @@ -538,6 +540,7 @@ int kthreadd(void *unused) set_mems_allowed(node_states[N_MEMORY]); current->flags |= PF_NOFREEZE; + cgroup_init_kthreadd(); for (;;) { set_current_state(TASK_INTERRUPTIBLE); -- cgit v1.2.3 From cf8c73afb3abf0f8905efbaddd4ce11a0deec9da Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 17 Mar 2017 10:22:51 +0800 Subject: drm/amd/amdgpu: add POLARIS12 PCI ID Signed-off-by: Evan Quan Reviewed-by: Junwei Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index f7adbace428a..b76cd699eb0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -421,6 +421,7 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x6985, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, {0x1002, 0x6986, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, {0x1002, 0x6987, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, + {0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, {0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12}, {0, 0, 0} -- cgit v1.2.3 From 822f5845f710e57d7e2df1fd1ee00d6e19d334fe Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Tue, 7 Feb 2017 13:08:23 -0600 Subject: efi/esrt: Cleanup bad memory map log messages The Intel Compute Stick STCK1A8LFC and Weibu F3C platforms both log 2 error messages during boot: efi: requested map not found. esrt: ESRT header is not in the memory map. Searching the web, this seems to affect many other platforms too. Since these messages are logged as errors, they appear on-screen during the boot process even when using the "quiet" boot parameter used by distros. Demote the ESRT error to a warning so that it does not appear on-screen, and delete the error logging from efi_mem_desc_lookup; both callsites of that function log more specific messages upon failure. Out of curiosity I looked closer at the Weibu F3C. There is no entry in the UEFI-provided memory map which corresponds to the ESRT pointer, but hacking the code to map it anyway, the ESRT does appear to be valid with 2 entries. Signed-off-by: Daniel Drake Cc: Matt Fleming Acked-by: Peter Jones Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi.c | 1 - drivers/firmware/efi/esrt.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index e7d404059b73..b372aad3b449 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -389,7 +389,6 @@ int __init efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md) return 0; } } - pr_err_once("requested map not found.\n"); return -ENOENT; } diff --git a/drivers/firmware/efi/esrt.c b/drivers/firmware/efi/esrt.c index 08b026864d4e..8554d7aec31c 100644 --- a/drivers/firmware/efi/esrt.c +++ b/drivers/firmware/efi/esrt.c @@ -254,7 +254,7 @@ void __init efi_esrt_init(void) rc = efi_mem_desc_lookup(efi.esrt, &md); if (rc < 0) { - pr_err("ESRT header is not in the memory map.\n"); + pr_warn("ESRT header is not in the memory map.\n"); return; } -- cgit v1.2.3 From 38a33101dd5fb8f07244e7e6dd04747a6cd2e3fb Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Thu, 9 Mar 2017 11:36:36 +0800 Subject: NFS: fix the fault nrequests decreasing for nfs_inode COPY The nfs_commit_file for NFSv4.2's COPY operation goes through the commit path for normal WRITE, but without increase nrequests, so, the nrequests decreased in nfs_commit_release_pages is fault. After that, the nrequests will be wrong. [ 5670.299881] ------------[ cut here ]------------ [ 5670.300295] WARNING: CPU: 0 PID: 27656 at fs/nfs/inode.c:127 nfs_clear_inode+0x66/0x90 [nfs] [ 5670.300558] Modules linked in: nfsv4(E) nfs(E) fscache(E) tun bridge stp llc fuse ip_set nfnetlink vmw_vsock_vmci_transport vsock snd_seq_midi snd_seq_midi_event ppdev f2fs coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_ens1371 intel_rapl_perf gameport snd_ac97_codec vmw_balloon ac97_bus snd_seq snd_pcm joydev snd_rawmidi snd_timer snd_seq_device snd soundcore nfit parport_pc parport acpi_cpufreq tpm_tis tpm_tis_core tpm i2c_piix4 vmw_vmci shpchp nfsd auth_rpcgss nfs_acl lockd grace sunrpc xfs libcrc32c vmwgfx drm_kms_helper ttm drm e1000 crc32c_intel mptspi scsi_transport_spi serio_raw mptscsih mptbase ata_generic pata_acpi fjes [last unloaded: fscache] [ 5670.302925] CPU: 0 PID: 27656 Comm: umount.nfs4 Tainted: G W E 4.11.0-rc1+ #519 [ 5670.303292] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015 [ 5670.304094] Call Trace: [ 5670.304510] dump_stack+0x63/0x86 [ 5670.304917] __warn+0xcb/0xf0 [ 5670.305276] warn_slowpath_null+0x1d/0x20 [ 5670.305661] nfs_clear_inode+0x66/0x90 [nfs] [ 5670.306093] nfs4_evict_inode+0x61/0x70 [nfsv4] [ 5670.306480] evict+0xbb/0x1c0 [ 5670.306888] dispose_list+0x4d/0x70 [ 5670.307233] evict_inodes+0x178/0x1a0 [ 5670.307579] generic_shutdown_super+0x44/0xf0 [ 5670.307985] nfs_kill_super+0x21/0x40 [nfs] [ 5670.308325] deactivate_locked_super+0x43/0x70 [ 5670.308698] deactivate_super+0x5a/0x60 [ 5670.309036] cleanup_mnt+0x3f/0x90 [ 5670.309407] __cleanup_mnt+0x12/0x20 [ 5670.309837] task_work_run+0x80/0xa0 [ 5670.310162] exit_to_usermode_loop+0x89/0x90 [ 5670.310497] syscall_return_slowpath+0xaa/0xb0 [ 5670.310875] entry_SYSCALL_64_fastpath+0xa7/0xa9 [ 5670.311197] RIP: 0033:0x7f1bb3617fe7 [ 5670.311545] RSP: 002b:00007ffecbabb828 EFLAGS: 00000206 ORIG_RAX: 00000000000000a6 [ 5670.311906] RAX: 0000000000000000 RBX: 0000000001dca1f0 RCX: 00007f1bb3617fe7 [ 5670.312239] RDX: 000000000000000c RSI: 0000000000000001 RDI: 0000000001dc83c0 [ 5670.312653] RBP: 0000000001dc83c0 R08: 0000000000000001 R09: 0000000000000000 [ 5670.312998] R10: 0000000000000755 R11: 0000000000000206 R12: 00007ffecbabc66a [ 5670.313335] R13: 0000000001dc83a0 R14: 0000000000000000 R15: 0000000000000000 [ 5670.313758] ---[ end trace bf4bfe7764e4eb40 ]--- Cc: linux-kernel@vger.kernel.org Fixes: 67911c8f18 ("NFS: Add nfs_commit_file()") Signed-off-by: Kinglong Mee Cc: stable@vger.kernel.org # 4.7+ Signed-off-by: Anna Schumaker --- fs/nfs/write.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e75b056f46f4..abb2c8a3be42 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1784,7 +1784,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) (long long)req_offset(req)); if (status < 0) { nfs_context_set_write_error(req->wb_context, status); - nfs_inode_remove_request(req); + if (req->wb_page) + nfs_inode_remove_request(req); dprintk_cont(", error = %d\n", status); goto next; } @@ -1793,7 +1794,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) * returned by the server against all stored verfs. */ if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) { /* We have a match */ - nfs_inode_remove_request(req); + if (req->wb_page) + nfs_inode_remove_request(req); dprintk_cont(" OK\n"); goto next; } -- cgit v1.2.3 From eed50879d64ab1b9f76445dbab822e43a098b309 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 11 Mar 2017 15:52:47 -0500 Subject: xprtrdma: Squelch kbuild sparse complaint New complaint from kbuild for 4.9.y: net/sunrpc/xprtrdma/verbs.c:489:19: sparse: incompatible types in comparison expression (different type sizes) verbs.c: 489 max_sge = min(ia->ri_device->attrs.max_sge, RPCRDMA_MAX_SEND_SGES); I can't reproduce this running sparse here. Likewise, "make W=1 net/sunrpc/xprtrdma/verbs.o" never indicated any issue. A little poking suggests that because the range of its values is small, gcc can make the actual width of RPCRDMA_MAX_SEND_SGES smaller than the width of an unsigned integer. Fixes: 16f906d66cd7 ("xprtrdma: Reduce required number of send SGEs") Signed-off-by: Chuck Lever Cc: stable@kernel.org Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 81cd31acf690..3b332b395045 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -503,7 +503,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, struct ib_cq *sendcq, *recvcq; int rc; - max_sge = min(ia->ri_device->attrs.max_sge, RPCRDMA_MAX_SEND_SGES); + max_sge = min_t(unsigned int, ia->ri_device->attrs.max_sge, + RPCRDMA_MAX_SEND_SGES); if (max_sge < RPCRDMA_MIN_SEND_SGES) { pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge); return -ENOMEM; -- cgit v1.2.3 From 05fae7bbc237bc7de0ee9c3dcf85b2572a80e3b5 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 10 Mar 2017 10:48:13 +0800 Subject: nfs: make nfs4_cb_sv_ops static Fixes the following sparse warning: fs/nfs/callback.c:235:21: warning: symbol 'nfs4_cb_sv_ops' was not declared. Should it be static? Signed-off-by: Jason Yan Signed-off-by: Anna Schumaker --- fs/nfs/callback.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 484bebc20bca..5c8a096d763e 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -231,12 +231,12 @@ static struct svc_serv_ops nfs41_cb_sv_ops = { .svo_module = THIS_MODULE, }; -struct svc_serv_ops *nfs4_cb_sv_ops[] = { +static struct svc_serv_ops *nfs4_cb_sv_ops[] = { [0] = &nfs40_cb_sv_ops, [1] = &nfs41_cb_sv_ops, }; #else -struct svc_serv_ops *nfs4_cb_sv_ops[] = { +static struct svc_serv_ops *nfs4_cb_sv_ops[] = { [0] = &nfs40_cb_sv_ops, [1] = NULL, }; -- cgit v1.2.3 From 63513232f8cd219dcaa5eafae028740ed3067d83 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 13 Mar 2017 10:36:19 -0400 Subject: NFS prevent double free in async nfs4_exchange_id Since rpc_task is async, the release function should be called which will free the impl_id, scope, and owner. Trond pointed at 2 more problems: -- use of client pointer after free in the nfs4_exchangeid_release() function -- cl_count mismatch if rpc_run_task() isn't run Fixes: 8d89bd70bc9 ("NFS setup async exchange_id") Signed-off-by: Olga Kornievskaia Cc: stable@vger.kernel.org # 4.9 Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c1f5369cd339..c780d98035cc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7425,11 +7425,11 @@ static void nfs4_exchange_id_release(void *data) struct nfs41_exchange_id_data *cdata = (struct nfs41_exchange_id_data *)data; - nfs_put_client(cdata->args.client); if (cdata->xprt) { xprt_put(cdata->xprt); rpc_clnt_xprt_switch_put(cdata->args.client->cl_rpcclient); } + nfs_put_client(cdata->args.client); kfree(cdata->res.impl_id); kfree(cdata->res.server_scope); kfree(cdata->res.server_owner); @@ -7536,10 +7536,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, task_setup_data.callback_data = calldata; task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) { - status = PTR_ERR(task); - goto out_impl_id; - } + if (IS_ERR(task)) + return PTR_ERR(task); if (!xprt) { status = rpc_wait_for_completion_task(task); @@ -7567,6 +7565,7 @@ out_server_owner: kfree(calldata->res.server_owner); out_calldata: kfree(calldata); + nfs_put_client(clp); goto out; } -- cgit v1.2.3 From 033853325fe3bdc70819a8b97915bd3bca41d3af Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 8 Mar 2017 14:39:15 -0500 Subject: NFSv4.1 respect server's max size in CREATE_SESSION Currently client doesn't respect max sizes server returns in CREATE_SESSION. nfs4_session_set_rwsize() gets called and server->rsize, server->wsize are 0 so they never get set to the sizes returned by the server. Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker --- fs/nfs/nfs4client.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 5ae9d64ea08b..8346ccbf2d52 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1023,9 +1023,9 @@ static void nfs4_session_set_rwsize(struct nfs_server *server) server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead; server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead; - if (server->rsize > server_resp_sz) + if (!server->rsize || server->rsize > server_resp_sz) server->rsize = server_resp_sz; - if (server->wsize > server_rqst_sz) + if (!server->wsize || server->wsize > server_rqst_sz) server->wsize = server_rqst_sz; #endif /* CONFIG_NFS_V4_1 */ } -- cgit v1.2.3 From a33e4b036d4612f62220f37a9fa29d273b6fd0ca Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 9 Mar 2017 12:56:48 -0500 Subject: pNFS: return status from nfs4_pnfs_ds_connect The nfs4_pnfs_ds_connect path can call rpc_create which can fail or it can wait on another context to reach the same failure. This checks that the rpc_create succeeded and returns the error to the caller. When an error is returned, both the files and flexfiles layouts will return NULL from _prepare_ds(). The flexfiles layout will also return the layout with the error NFS4ERR_NXIO. Signed-off-by: Weston Andros Adamson Signed-off-by: Anna Schumaker --- fs/nfs/client.c | 25 ++++++++++++++++++++++++- fs/nfs/filelayout/filelayoutdev.c | 7 ++++++- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 3 ++- fs/nfs/internal.h | 2 ++ fs/nfs/pnfs.h | 2 +- fs/nfs/pnfs_nfs.c | 15 +++++++++++++-- 6 files changed, 48 insertions(+), 6 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 91a8d610ba0f..390ada8741bc 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -325,10 +325,33 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat return NULL; } -static bool nfs_client_init_is_complete(const struct nfs_client *clp) +/* + * Return true if @clp is done initializing, false if still working on it. + * + * Use nfs_client_init_status to check if it was successful. + */ +bool nfs_client_init_is_complete(const struct nfs_client *clp) { return clp->cl_cons_state <= NFS_CS_READY; } +EXPORT_SYMBOL_GPL(nfs_client_init_is_complete); + +/* + * Return 0 if @clp was successfully initialized, -errno otherwise. + * + * This must be called *after* nfs_client_init_is_complete() returns true, + * otherwise it will pop WARN_ON_ONCE and return -EINVAL + */ +int nfs_client_init_status(const struct nfs_client *clp) +{ + /* called without checking nfs_client_init_is_complete */ + if (clp->cl_cons_state > NFS_CS_READY) { + WARN_ON_ONCE(1); + return -EINVAL; + } + return clp->cl_cons_state; +} +EXPORT_SYMBOL_GPL(nfs_client_init_status); int nfs_wait_client_init_complete(const struct nfs_client *clp) { diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index f956ca20a8a3..188120626179 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -266,6 +266,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); struct nfs4_pnfs_ds *ret = ds; struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); + int status; if (ds == NULL) { printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", @@ -277,9 +278,13 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) if (ds->ds_clp) goto out_test_devid; - nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, + status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, dataserver_retrans, 4, s->nfs_client->cl_minorversion); + if (status) { + ret = NULL; + goto out; + } out_test_devid: if (ret->ds_clp == NULL || diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index e5a6f248697b..544e7725e679 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -384,6 +384,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, struct inode *ino = lseg->pls_layout->plh_inode; struct nfs_server *s = NFS_SERVER(ino); unsigned int max_payload; + int status; if (!ff_layout_mirror_valid(lseg, mirror, true)) { pr_err_ratelimited("NFS: %s: No data server for offset index %d\n", @@ -404,7 +405,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, /* FIXME: For now we assume the server sent only one version of NFS * to use for the DS. */ - nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, + status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, dataserver_retrans, mirror->mirror_ds->ds_versions[0].version, mirror->mirror_ds->ds_versions[0].minor_version); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 09ca5095c04e..7b38fedb7e03 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -186,6 +186,8 @@ extern struct nfs_server *nfs_clone_server(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, rpc_authflavor_t); +extern bool nfs_client_init_is_complete(const struct nfs_client *clp); +extern int nfs_client_init_status(const struct nfs_client *clp); extern int nfs_wait_client_init_complete(const struct nfs_client *clp); extern void nfs_mark_client_ready(struct nfs_client *clp, int state); extern struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 63f77b49a586..590e1e35781f 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -367,7 +367,7 @@ void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds); struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags); void nfs4_pnfs_v3_ds_connect_unload(void); -void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, +int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, struct nfs4_deviceid_node *devid, unsigned int timeo, unsigned int retrans, u32 version, u32 minor_version); struct nfs4_pnfs_ds_addr *nfs4_decode_mp_ds_addr(struct net *net, diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 9414b492439f..a7691b927af6 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -745,9 +745,9 @@ out: /* * Create an rpc connection to the nfs4_pnfs_ds data server. * Currently only supports IPv4 and IPv6 addresses. - * If connection fails, make devid unavailable. + * If connection fails, make devid unavailable and return a -errno. */ -void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, +int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, struct nfs4_deviceid_node *devid, unsigned int timeo, unsigned int retrans, u32 version, u32 minor_version) { @@ -772,6 +772,17 @@ void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, } else { nfs4_wait_ds_connect(ds); } + + /* + * At this point the ds->ds_clp should be ready, but it might have + * hit an error. + */ + if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) { + WARN_ON_ONCE(1); + return -EINVAL; + } + + return nfs_client_init_status(ds->ds_clp); } EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect); -- cgit v1.2.3 From da066f3f039eba3e72e97b2ccad0dd8b45ba84bd Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 9 Mar 2017 12:56:49 -0500 Subject: pNFS/flexfiles: never nfs4_mark_deviceid_unavailable The flexfiles layout should never mark a device unavailable. Move nfs4_mark_deviceid_unavailable out of nfs4_pnfs_ds_connect and call directly from files layout where it's still needed. The flexfiles driver still handles marked devices in error paths, but will now print a rate limited warning. Signed-off-by: Weston Andros Adamson Signed-off-by: Anna Schumaker --- fs/nfs/filelayout/filelayoutdev.c | 1 + fs/nfs/flexfilelayout/flexfilelayout.h | 14 +++++++++++++- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 2 +- fs/nfs/pnfs_nfs.c | 24 ++++++++++++++++-------- 4 files changed, 31 insertions(+), 10 deletions(-) diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 188120626179..d913e818858f 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -282,6 +282,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) dataserver_retrans, 4, s->nfs_client->cl_minorversion); if (status) { + nfs4_mark_deviceid_unavailable(devid); ret = NULL; goto out; } diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index f4f39b0ab09b..98b34c9b0564 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -175,7 +175,19 @@ ff_layout_no_read_on_rw(struct pnfs_layout_segment *lseg) static inline bool ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node) { - return nfs4_test_deviceid_unavailable(node); + /* + * Flexfiles should never mark a DS unavailable, but if it does + * print a (ratelimited) warning as this can affect performance. + */ + if (nfs4_test_deviceid_unavailable(node)) { + u32 *p = (u32 *)node->deviceid.data; + + pr_warn_ratelimited("NFS: flexfiles layout referencing an " + "unavailable device [%x%x%x%x]\n", + p[0], p[1], p[2], p[3]); + return true; + } + return false; } static inline int diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 544e7725e679..85fde93dff77 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -421,11 +421,11 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, mirror->mirror_ds->ds_versions[0].wsize = max_payload; goto out; } +out_fail: ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), mirror, lseg->pls_range.offset, lseg->pls_range.length, NFS4ERR_NXIO, OP_ILLEGAL, GFP_NOIO); -out_fail: if (fail_return || !ff_layout_has_available_ds(lseg)) pnfs_error_mark_layout_for_return(ino, lseg); ds = NULL; diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index a7691b927af6..7250b95549ec 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -751,9 +751,11 @@ int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, struct nfs4_deviceid_node *devid, unsigned int timeo, unsigned int retrans, u32 version, u32 minor_version) { - if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { - int err = 0; + int err; +again: + err = 0; + if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { if (version == 3) { err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo, retrans); @@ -766,23 +768,29 @@ int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, err = -EPROTONOSUPPORT; } - if (err) - nfs4_mark_deviceid_unavailable(devid); nfs4_clear_ds_conn_bit(ds); } else { nfs4_wait_ds_connect(ds); + + /* what was waited on didn't connect AND didn't mark unavail */ + if (!ds->ds_clp && !nfs4_test_deviceid_unavailable(devid)) + goto again; } /* * At this point the ds->ds_clp should be ready, but it might have * hit an error. */ - if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) { - WARN_ON_ONCE(1); - return -EINVAL; + if (!err) { + if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) { + WARN_ON_ONCE(ds->ds_clp || + !nfs4_test_deviceid_unavailable(devid)); + return -EINVAL; + } + err = nfs_client_init_status(ds->ds_clp); } - return nfs_client_init_status(ds->ds_clp); + return err; } EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect); -- cgit v1.2.3 From 49d4a334727057af57048ded99697d17b016d91b Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 6 Mar 2017 18:20:56 -0800 Subject: Btrfs: fix regression in lock_delalloc_pages The bug is a regression after commit (da2c7009f6ca "btrfs: teach __process_pages_contig about PAGE_LOCK operation") and commit (76c0021db8fd "Btrfs: use helper to simplify lock/unlock pages"). So if the dirty pages which are under writeback got truncated partially before we lock the dirty pages, we couldn't find all pages mapping to the delalloc range, and the bug didn't return an error so it kept going on and found that the delalloc range got truncated and got to unlock the dirty pages, and then the ASSERT could caught the error, and showed ----------------------------------------------------------------------------- assertion failed: page_ops & PAGE_LOCK, file: fs/btrfs/extent_io.c, line: 1716 ----------------------------------------------------------------------------- This fixes the bug by returning the proper -EAGAIN. Cc: David Sterba Reported-by: Dave Jones Signed-off-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 28e81922a21c..8df797432740 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1714,7 +1714,8 @@ static int __process_pages_contig(struct address_space *mapping, * can we find nothing at @index. */ ASSERT(page_ops & PAGE_LOCK); - return ret; + err = -EAGAIN; + goto out; } for (i = 0; i < ret; i++) { -- cgit v1.2.3 From e1699d2d7bf6e6cce3e1baff19f9dd4595a58664 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Fri, 10 Mar 2017 16:45:44 -0500 Subject: btrfs: add missing memset while reading compressed inline extents This is a story about 4 distinct (and very old) btrfs bugs. Commit c8b978188c ("Btrfs: Add zlib compression support") added three data corruption bugs for inline extents (bugs #1-3). Commit 93c82d5750 ("Btrfs: zero page past end of inline file items") fixed bug #1: uncompressed inline extents followed by a hole and more extents could get non-zero data in the hole as they were read. The fix was to add a memset in btrfs_get_extent to zero out the hole. Commit 166ae5a418 ("btrfs: fix inline compressed read err corruption") fixed bug #2: compressed inline extents which contained non-zero bytes might be replaced with zero bytes in some cases. This patch removed an unhelpful memset from uncompress_inline, but the case where memset is required was missed. There is also a memset in the decompression code, but this only covers decompressed data that is shorter than the ram_bytes from the extent ref record. This memset doesn't cover the region between the end of the decompressed data and the end of the page. It has also moved around a few times over the years, so there's no single patch to refer to. This patch fixes bug #3: compressed inline extents followed by a hole and more extents could get non-zero data in the hole as they were read (i.e. bug #3 is the same as bug #1, but s/uncompressed/compressed/). The fix is the same: zero out the hole in the compressed case too, by putting a memset back in uncompress_inline, but this time with correct parameters. The last and oldest bug, bug #0, is the cause of the offending inline extent/hole/extent pattern. Bug #0 is a subtle and mostly-harmless quirk of behavior somewhere in the btrfs write code. In a few special cases, an inline extent and hole are allowed to persist where they normally would be combined with later extents in the file. A fast reproducer for bug #0 is presented below. A few offending extents are also created in the wild during large rsync transfers with the -S flag. A Linux kernel build (git checkout; make allyesconfig; make -j8) will produce a handful of offending files as well. Once an offending file is created, it can present different content to userspace each time it is read. Bug #0 is at least 4 and possibly 8 years old. I verified every vX.Y kernel back to v3.5 has this behavior. There are fossil records of this bug's effects in commits all the way back to v2.6.32. I have no reason to believe bug #0 wasn't present at the beginning of btrfs compression support in v2.6.29, but I can't easily test kernels that old to be sure. It is not clear whether bug #0 is worth fixing. A fix would likely require injecting extra reads into currently write-only paths, and most of the exceptional cases caused by bug #0 are already handled now. Whether we like them or not, bug #0's inline extents followed by holes are part of the btrfs de-facto disk format now, and we need to be able to read them without data corruption or an infoleak. So enough about bug #0, let's get back to bug #3 (this patch). An example of on-disk structure leading to data corruption found in the wild: item 61 key (606890 INODE_ITEM 0) itemoff 9662 itemsize 160 inode generation 50 transid 50 size 47424 nbytes 49141 block group 0 mode 100644 links 1 uid 0 gid 0 rdev 0 flags 0x0(none) item 62 key (606890 INODE_REF 603050) itemoff 9642 itemsize 20 inode ref index 3 namelen 10 name: DB_File.so item 63 key (606890 EXTENT_DATA 0) itemoff 8280 itemsize 1362 inline extent data size 1341 ram 4085 compress(zlib) item 64 key (606890 EXTENT_DATA 4096) itemoff 8227 itemsize 53 extent data disk byte 5367308288 nr 20480 extent data offset 0 nr 45056 ram 45056 extent compression(zlib) Different data appears in userspace during each read of the 11 bytes between 4085 and 4096. The extent in item 63 is not long enough to fill the first page of the file, so a memset is required to fill the space between item 63 (ending at 4085) and item 64 (beginning at 4096) with zero. Here is a reproducer from Liu Bo, which demonstrates another method of creating the same inline extent and hole pattern: Using 'page_poison=on' kernel command line (or enable CONFIG_PAGE_POISONING) run the following: # touch foo # chattr +c foo # xfs_io -f -c "pwrite -W 0 1000" foo # xfs_io -f -c "falloc 4 8188" foo # od -x foo # echo 3 >/proc/sys/vm/drop_caches # od -x foo This produce the following on my box: Correct output: file contains 1000 data bytes followed by zeros: 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0001740 cdcd cdcd cdcd cdcd 0000 0000 0000 0000 0001760 0000 0000 0000 0000 0000 0000 0000 0000 * 0020000 Actual output: the data after the first 1000 bytes will be different each run: 0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd * 0001740 cdcd cdcd cdcd cdcd 6c63 7400 635f 006d 0001760 5f74 6f43 7400 435f 0053 5f74 7363 7400 0002000 435f 0056 5f74 6164 7400 645f 0062 5f74 (...) Signed-off-by: Zygo Blaxell Reviewed-by: Liu Bo Reviewed-by: Chris Mason Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b2bc07aad1ae..e57191072aa3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6709,6 +6709,20 @@ static noinline int uncompress_inline(struct btrfs_path *path, max_size = min_t(unsigned long, PAGE_SIZE, max_size); ret = btrfs_decompress(compress_type, tmp, page, extent_offset, inline_size, max_size); + + /* + * decompression code contains a memset to fill in any space between the end + * of the uncompressed data and the end of max_size in case the decompressed + * data ends up shorter than ram_bytes. That doesn't cover the hole between + * the end of an inline extent and the beginning of the next block, so we + * cover that region here. + */ + + if (max_size + pg_offset < PAGE_SIZE) { + char *map = kmap(page); + memset(map + pg_offset + max_size, 0, PAGE_SIZE - max_size - pg_offset); + kunmap(page); + } kfree(tmp); return ret; } -- cgit v1.2.3 From e7348396c6d51b57c95c6646c390cd078e038e19 Mon Sep 17 00:00:00 2001 From: Masaki Ota Date: Fri, 17 Mar 2017 14:10:57 -0700 Subject: Input: ALPS - fix V8+ protocol handling (73 03 28) Devices identified as E7="73 03 28" use slightly modified version of V8 protocol, with lower count per electrode, different offsets, and different feature bits in OTP data. Fixes: aeaa881f9b17 ("Input: ALPS - set DualPoint flag for 74 03 28 devices") Signed-off-by: Masaki Ota Acked-by: Pali Rohar Tested-by: Paul Donohue Tested-by: Nick Fletcher Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/alps.c | 66 +++++++++++++++++++++++++++++++++++----------- drivers/input/mouse/alps.h | 11 ++++++++ 2 files changed, 61 insertions(+), 16 deletions(-) diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index 72b28ebfe360..262d9b620fcf 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -2462,14 +2462,34 @@ static int alps_update_device_area_ss4_v2(unsigned char otp[][4], int num_y_electrode; int x_pitch, y_pitch, x_phys, y_phys; - num_x_electrode = SS4_NUMSENSOR_XOFFSET + (otp[1][0] & 0x0F); - num_y_electrode = SS4_NUMSENSOR_YOFFSET + ((otp[1][0] >> 4) & 0x0F); + if (IS_SS4PLUS_DEV(priv->dev_id)) { + num_x_electrode = + SS4PLUS_NUMSENSOR_XOFFSET + (otp[0][2] & 0x0F); + num_y_electrode = + SS4PLUS_NUMSENSOR_YOFFSET + ((otp[0][2] >> 4) & 0x0F); - priv->x_max = (num_x_electrode - 1) * SS4_COUNT_PER_ELECTRODE; - priv->y_max = (num_y_electrode - 1) * SS4_COUNT_PER_ELECTRODE; + priv->x_max = + (num_x_electrode - 1) * SS4PLUS_COUNT_PER_ELECTRODE; + priv->y_max = + (num_y_electrode - 1) * SS4PLUS_COUNT_PER_ELECTRODE; - x_pitch = ((otp[1][2] >> 2) & 0x07) + SS4_MIN_PITCH_MM; - y_pitch = ((otp[1][2] >> 5) & 0x07) + SS4_MIN_PITCH_MM; + x_pitch = (otp[0][1] & 0x0F) + SS4PLUS_MIN_PITCH_MM; + y_pitch = ((otp[0][1] >> 4) & 0x0F) + SS4PLUS_MIN_PITCH_MM; + + } else { + num_x_electrode = + SS4_NUMSENSOR_XOFFSET + (otp[1][0] & 0x0F); + num_y_electrode = + SS4_NUMSENSOR_YOFFSET + ((otp[1][0] >> 4) & 0x0F); + + priv->x_max = + (num_x_electrode - 1) * SS4_COUNT_PER_ELECTRODE; + priv->y_max = + (num_y_electrode - 1) * SS4_COUNT_PER_ELECTRODE; + + x_pitch = ((otp[1][2] >> 2) & 0x07) + SS4_MIN_PITCH_MM; + y_pitch = ((otp[1][2] >> 5) & 0x07) + SS4_MIN_PITCH_MM; + } x_phys = x_pitch * (num_x_electrode - 1); /* In 0.1 mm units */ y_phys = y_pitch * (num_y_electrode - 1); /* In 0.1 mm units */ @@ -2485,7 +2505,10 @@ static int alps_update_btn_info_ss4_v2(unsigned char otp[][4], { unsigned char is_btnless; - is_btnless = (otp[1][1] >> 3) & 0x01; + if (IS_SS4PLUS_DEV(priv->dev_id)) + is_btnless = (otp[1][0] >> 1) & 0x01; + else + is_btnless = (otp[1][1] >> 3) & 0x01; if (is_btnless) priv->flags |= ALPS_BUTTONPAD; @@ -2493,6 +2516,21 @@ static int alps_update_btn_info_ss4_v2(unsigned char otp[][4], return 0; } +static int alps_update_dual_info_ss4_v2(unsigned char otp[][4], + struct alps_data *priv) +{ + bool is_dual = false; + + if (IS_SS4PLUS_DEV(priv->dev_id)) + is_dual = (otp[0][0] >> 4) & 0x01; + + if (is_dual) + priv->flags |= ALPS_DUALPOINT | + ALPS_DUALPOINT_WITH_PRESSURE; + + return 0; +} + static int alps_set_defaults_ss4_v2(struct psmouse *psmouse, struct alps_data *priv) { @@ -2508,6 +2546,8 @@ static int alps_set_defaults_ss4_v2(struct psmouse *psmouse, alps_update_btn_info_ss4_v2(otp, priv); + alps_update_dual_info_ss4_v2(otp, priv); + return 0; } @@ -2753,10 +2793,6 @@ static int alps_set_protocol(struct psmouse *psmouse, if (alps_set_defaults_ss4_v2(psmouse, priv)) return -EIO; - if (priv->fw_ver[1] == 0x1) - priv->flags |= ALPS_DUALPOINT | - ALPS_DUALPOINT_WITH_PRESSURE; - break; } @@ -2827,10 +2863,7 @@ static int alps_identify(struct psmouse *psmouse, struct alps_data *priv) ec[2] >= 0x90 && ec[2] <= 0x9d) { protocol = &alps_v3_protocol_data; } else if (e7[0] == 0x73 && e7[1] == 0x03 && - e7[2] == 0x14 && ec[1] == 0x02) { - protocol = &alps_v8_protocol_data; - } else if (e7[0] == 0x73 && e7[1] == 0x03 && - e7[2] == 0x28 && ec[1] == 0x01) { + (e7[2] == 0x14 || e7[2] == 0x28)) { protocol = &alps_v8_protocol_data; } else { psmouse_dbg(psmouse, @@ -2840,7 +2873,8 @@ static int alps_identify(struct psmouse *psmouse, struct alps_data *priv) } if (priv) { - /* Save the Firmware version */ + /* Save Device ID and Firmware version */ + memcpy(priv->dev_id, e7, 3); memcpy(priv->fw_ver, ec, 3); error = alps_set_protocol(psmouse, priv, protocol); if (error) diff --git a/drivers/input/mouse/alps.h b/drivers/input/mouse/alps.h index 6d279aa27cb9..4334f2805d93 100644 --- a/drivers/input/mouse/alps.h +++ b/drivers/input/mouse/alps.h @@ -54,6 +54,16 @@ enum SS4_PACKET_ID { #define SS4_MASK_NORMAL_BUTTONS 0x07 +#define SS4PLUS_COUNT_PER_ELECTRODE 128 +#define SS4PLUS_NUMSENSOR_XOFFSET 16 +#define SS4PLUS_NUMSENSOR_YOFFSET 5 +#define SS4PLUS_MIN_PITCH_MM 37 + +#define IS_SS4PLUS_DEV(_b) (((_b[0]) == 0x73) && \ + ((_b[1]) == 0x03) && \ + ((_b[2]) == 0x28) \ + ) + #define SS4_IS_IDLE_V2(_b) (((_b[0]) == 0x18) && \ ((_b[1]) == 0x10) && \ ((_b[2]) == 0x00) && \ @@ -283,6 +293,7 @@ struct alps_data { int addr_command; u16 proto_version; u8 byte0, mask0; + u8 dev_id[3]; u8 fw_ver[3]; int flags; int x_max; -- cgit v1.2.3 From 47e6fb4212d09f325c0847d05985dd3d71553095 Mon Sep 17 00:00:00 2001 From: Masaki Ota Date: Fri, 17 Mar 2017 14:19:40 -0700 Subject: Input: ALPS - fix trackstick button handling on V8 devices Alps stick devices always have physical buttons, so we should not check ALPS_BUTTONPAD flag to decide whether we should report them. Fixes: 4777ac220c43 ("Input: ALPS - add touchstick support for SS5 hardware") Signed-off-by: Masaki Ota Acked-by: Pali Rohar Tested-by: Paul Donohue Tested-by: Nick Fletcher Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/alps.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index 262d9b620fcf..f210e19ddba6 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -1282,10 +1282,8 @@ static int alps_decode_ss4_v2(struct alps_fields *f, /* handle buttons */ if (pkt_id == SS4_PACKET_ID_STICK) { f->ts_left = !!(SS4_BTN_V2(p) & 0x01); - if (!(priv->flags & ALPS_BUTTONPAD)) { - f->ts_right = !!(SS4_BTN_V2(p) & 0x02); - f->ts_middle = !!(SS4_BTN_V2(p) & 0x04); - } + f->ts_right = !!(SS4_BTN_V2(p) & 0x02); + f->ts_middle = !!(SS4_BTN_V2(p) & 0x04); } else { f->left = !!(SS4_BTN_V2(p) & 0x01); if (!(priv->flags & ALPS_BUTTONPAD)) { -- cgit v1.2.3 From 7de32556dfc62b9e1203730cc26b71292da8a244 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 18 Mar 2017 00:57:39 +0100 Subject: cpufreq: intel_pstate: One set of global limits in active mode In the active mode intel_pstate currently uses two sets of global limits, each associated with one of the possible scaling_governor settings in that mode: "powersave" or "performance". The driver switches over from one of those sets to the other depending on the scaling_governor setting for the last CPU whose per-policy cpufreq interface in sysfs was last used to change parameters exposed in there. That obviously leads to no end of issues when the scaling_governor settings differ between CPUs. The most recent issue was introduced by commit a240c4aa5d0f (cpufreq: intel_pstate: Do not reinit performance limits in ->setpolicy) that eliminated the reinitialization of "performance" limits in intel_pstate_set_policy() preventing the max limit from being set to anything below 100, among other things. Namely, an undesirable side effect of commit a240c4aa5d0f is that now, after setting scaling_governor to "performance" in the active mode, the per-policy limits for the CPU in question go to the highest level and stay there even when it is switched back to "powersave" later. As it turns out, some distributions set scaling_governor to "performance" temporarily for all CPUs to speed-up system initialization, so that change causes them to misbehave later. To fix that, get rid of the performance/powersave global limits split and use just one set of global limits for everything. From the user's persepctive, after this modification, when scaling_governor is switched from "performance" to "powersave" or the other way around on one CPU, the limits settings (ie. the global max/min_perf_pct and per-policy scaling_max/min_freq for any CPUs) will not change. Still, switching from "performance" to "powersave" or the other way around changes the way in which P-states are selected and in particular "performance" causes the driver to always request the highest P-state it is allowed to ask for for the given CPU. Fixes: a240c4aa5d0f (cpufreq: intel_pstate: Do not reinit performance limits in ->setpolicy) Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 142 +++++++++++++---------------------------- 1 file changed, 46 insertions(+), 96 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 08e134ffba68..7b07803e7042 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -364,9 +364,7 @@ static bool driver_registered __read_mostly; static bool acpi_ppc; #endif -static struct perf_limits performance_limits; -static struct perf_limits powersave_limits; -static struct perf_limits *limits; +static struct perf_limits global; static void intel_pstate_init_limits(struct perf_limits *limits) { @@ -377,14 +375,6 @@ static void intel_pstate_init_limits(struct perf_limits *limits) limits->max_sysfs_pct = 100; } -static void intel_pstate_set_performance_limits(struct perf_limits *limits) -{ - intel_pstate_init_limits(limits); - limits->min_perf_pct = 100; - limits->min_perf = int_ext_tofp(1); - limits->min_sysfs_pct = 100; -} - static DEFINE_MUTEX(intel_pstate_driver_lock); static DEFINE_MUTEX(intel_pstate_limits_lock); @@ -507,7 +497,7 @@ static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) * correct max turbo frequency based on the turbo state. * Also need to convert to MHz as _PSS freq is in MHz. */ - if (!limits->turbo_disabled) + if (!global.turbo_disabled) cpu->acpi_perf_data.states[0].core_frequency = policy->cpuinfo.max_freq / 1000; cpu->valid_pss_table = true; @@ -626,7 +616,7 @@ static inline void update_turbo_state(void) cpu = all_cpu_data[0]; rdmsrl(MSR_IA32_MISC_ENABLE, misc_en); - limits->turbo_disabled = + global.turbo_disabled = (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE || cpu->pstate.max_pstate == cpu->pstate.turbo_pstate); } @@ -851,7 +841,7 @@ static struct freq_attr *hwp_cpufreq_attrs[] = { static void intel_pstate_hwp_set(struct cpufreq_policy *policy) { int min, hw_min, max, hw_max, cpu; - struct perf_limits *perf_limits = limits; + struct perf_limits *perf_limits = &global; u64 value, cap; for_each_cpu(cpu, policy->cpus) { @@ -863,19 +853,22 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy) rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); hw_min = HWP_LOWEST_PERF(cap); - if (limits->no_turbo) + if (global.no_turbo) hw_max = HWP_GUARANTEED_PERF(cap); else hw_max = HWP_HIGHEST_PERF(cap); - min = fp_ext_toint(hw_max * perf_limits->min_perf); + max = fp_ext_toint(hw_max * perf_limits->max_perf); + if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) + min = max; + else + min = fp_ext_toint(hw_max * perf_limits->min_perf); rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); value &= ~HWP_MIN_PERF(~0L); value |= HWP_MIN_PERF(min); - max = fp_ext_toint(hw_max * perf_limits->max_perf); value &= ~HWP_MAX_PERF(~0L); value |= HWP_MAX_PERF(max); @@ -968,20 +961,11 @@ static int intel_pstate_resume(struct cpufreq_policy *policy) } static void intel_pstate_update_policies(void) - __releases(&intel_pstate_limits_lock) - __acquires(&intel_pstate_limits_lock) { - struct perf_limits *saved_limits = limits; int cpu; - mutex_unlock(&intel_pstate_limits_lock); - for_each_possible_cpu(cpu) cpufreq_update_policy(cpu); - - mutex_lock(&intel_pstate_limits_lock); - - limits = saved_limits; } /************************** debugfs begin ************************/ @@ -1060,7 +1044,7 @@ static void intel_pstate_debug_hide_params(void) static ssize_t show_##file_name \ (struct kobject *kobj, struct attribute *attr, char *buf) \ { \ - return sprintf(buf, "%u\n", limits->object); \ + return sprintf(buf, "%u\n", global.object); \ } static ssize_t intel_pstate_show_status(char *buf); @@ -1151,10 +1135,10 @@ static ssize_t show_no_turbo(struct kobject *kobj, } update_turbo_state(); - if (limits->turbo_disabled) - ret = sprintf(buf, "%u\n", limits->turbo_disabled); + if (global.turbo_disabled) + ret = sprintf(buf, "%u\n", global.turbo_disabled); else - ret = sprintf(buf, "%u\n", limits->no_turbo); + ret = sprintf(buf, "%u\n", global.no_turbo); mutex_unlock(&intel_pstate_driver_lock); @@ -1181,19 +1165,19 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, mutex_lock(&intel_pstate_limits_lock); update_turbo_state(); - if (limits->turbo_disabled) { + if (global.turbo_disabled) { pr_warn("Turbo disabled by BIOS or unavailable on processor\n"); mutex_unlock(&intel_pstate_limits_lock); mutex_unlock(&intel_pstate_driver_lock); return -EPERM; } - limits->no_turbo = clamp_t(int, input, 0, 1); - - intel_pstate_update_policies(); + global.no_turbo = clamp_t(int, input, 0, 1); mutex_unlock(&intel_pstate_limits_lock); + intel_pstate_update_policies(); + mutex_unlock(&intel_pstate_driver_lock); return count; @@ -1218,19 +1202,16 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, mutex_lock(&intel_pstate_limits_lock); - limits->max_sysfs_pct = clamp_t(int, input, 0 , 100); - limits->max_perf_pct = min(limits->max_policy_pct, - limits->max_sysfs_pct); - limits->max_perf_pct = max(limits->min_policy_pct, - limits->max_perf_pct); - limits->max_perf_pct = max(limits->min_perf_pct, - limits->max_perf_pct); - limits->max_perf = percent_ext_fp(limits->max_perf_pct); - - intel_pstate_update_policies(); + global.max_sysfs_pct = clamp_t(int, input, 0 , 100); + global.max_perf_pct = min(global.max_policy_pct, global.max_sysfs_pct); + global.max_perf_pct = max(global.min_policy_pct, global.max_perf_pct); + global.max_perf_pct = max(global.min_perf_pct, global.max_perf_pct); + global.max_perf = percent_ext_fp(global.max_perf_pct); mutex_unlock(&intel_pstate_limits_lock); + intel_pstate_update_policies(); + mutex_unlock(&intel_pstate_driver_lock); return count; @@ -1255,19 +1236,16 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, mutex_lock(&intel_pstate_limits_lock); - limits->min_sysfs_pct = clamp_t(int, input, 0 , 100); - limits->min_perf_pct = max(limits->min_policy_pct, - limits->min_sysfs_pct); - limits->min_perf_pct = min(limits->max_policy_pct, - limits->min_perf_pct); - limits->min_perf_pct = min(limits->max_perf_pct, - limits->min_perf_pct); - limits->min_perf = percent_ext_fp(limits->min_perf_pct); - - intel_pstate_update_policies(); + global.min_sysfs_pct = clamp_t(int, input, 0 , 100); + global.min_perf_pct = max(global.min_policy_pct, global.min_sysfs_pct); + global.min_perf_pct = min(global.max_policy_pct, global.min_perf_pct); + global.min_perf_pct = min(global.max_perf_pct, global.min_perf_pct); + global.min_perf = percent_ext_fp(global.min_perf_pct); mutex_unlock(&intel_pstate_limits_lock); + intel_pstate_update_policies(); + mutex_unlock(&intel_pstate_driver_lock); return count; @@ -1387,7 +1365,7 @@ static u64 atom_get_val(struct cpudata *cpudata, int pstate) u32 vid; val = (u64)pstate << 8; - if (limits->no_turbo && !limits->turbo_disabled) + if (global.no_turbo && !global.turbo_disabled) val |= (u64)1 << 32; vid_fp = cpudata->vid.min + mul_fp( @@ -1557,7 +1535,7 @@ static u64 core_get_val(struct cpudata *cpudata, int pstate) u64 val; val = (u64)pstate << 8; - if (limits->no_turbo && !limits->turbo_disabled) + if (global.no_turbo && !global.turbo_disabled) val |= (u64)1 << 32; return val; @@ -1683,9 +1661,9 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) int max_perf = cpu->pstate.turbo_pstate; int max_perf_adj; int min_perf; - struct perf_limits *perf_limits = limits; + struct perf_limits *perf_limits = &global; - if (limits->no_turbo || limits->turbo_disabled) + if (global.no_turbo || global.turbo_disabled) max_perf = cpu->pstate.max_pstate; if (per_cpu_limits) @@ -1820,7 +1798,7 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) sample->busy_scaled = busy_frac * 100; - target = limits->no_turbo || limits->turbo_disabled ? + target = global.no_turbo || global.turbo_disabled ? cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; target += target >> 2; target = mul_fp(target, busy_frac); @@ -2116,7 +2094,7 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, static int intel_pstate_set_policy(struct cpufreq_policy *policy) { struct cpudata *cpu; - struct perf_limits *perf_limits = NULL; + struct perf_limits *perf_limits = &global; if (!policy->cpuinfo.max_freq) return -ENODEV; @@ -2139,21 +2117,6 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) mutex_lock(&intel_pstate_limits_lock); - if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { - pr_debug("set performance\n"); - if (!perf_limits) { - limits = &performance_limits; - perf_limits = limits; - } - } else { - pr_debug("set powersave\n"); - if (!perf_limits) { - limits = &powersave_limits; - perf_limits = limits; - } - - } - intel_pstate_update_perf_limits(policy, perf_limits); if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { @@ -2177,16 +2140,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) static int intel_pstate_verify_policy(struct cpufreq_policy *policy) { struct cpudata *cpu = all_cpu_data[policy->cpu]; - struct perf_limits *perf_limits; - - if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) - perf_limits = &performance_limits; - else - perf_limits = &powersave_limits; update_turbo_state(); - policy->cpuinfo.max_freq = perf_limits->turbo_disabled || - perf_limits->no_turbo ? + policy->cpuinfo.max_freq = global.turbo_disabled || global.no_turbo ? cpu->pstate.max_freq : cpu->pstate.turbo_freq; @@ -2201,9 +2157,9 @@ static int intel_pstate_verify_policy(struct cpufreq_policy *policy) unsigned int max_freq, min_freq; max_freq = policy->cpuinfo.max_freq * - perf_limits->max_sysfs_pct / 100; + global.max_sysfs_pct / 100; min_freq = policy->cpuinfo.max_freq * - perf_limits->min_sysfs_pct / 100; + global.min_sysfs_pct / 100; cpufreq_verify_within_limits(policy, min_freq, max_freq); } @@ -2255,7 +2211,7 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) /* cpuinfo and default policy values */ policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; update_turbo_state(); - policy->cpuinfo.max_freq = limits->turbo_disabled ? + policy->cpuinfo.max_freq = global.turbo_disabled ? cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; policy->cpuinfo.max_freq *= cpu->pstate.scaling; @@ -2275,7 +2231,7 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy) return ret; policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100) + if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE)) policy->policy = CPUFREQ_POLICY_PERFORMANCE; else policy->policy = CPUFREQ_POLICY_POWERSAVE; @@ -2301,7 +2257,7 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy) struct cpudata *cpu = all_cpu_data[policy->cpu]; update_turbo_state(); - policy->cpuinfo.max_freq = limits->turbo_disabled ? + policy->cpuinfo.max_freq = global.turbo_disabled ? cpu->pstate.max_freq : cpu->pstate.turbo_freq; cpufreq_verify_within_cpu_limits(policy); @@ -2317,7 +2273,7 @@ static unsigned int intel_cpufreq_turbo_update(struct cpudata *cpu, update_turbo_state(); - max_freq = limits->no_turbo || limits->turbo_disabled ? + max_freq = global.no_turbo || global.turbo_disabled ? cpu->pstate.max_freq : cpu->pstate.turbo_freq; policy->cpuinfo.max_freq = max_freq; if (policy->max > max_freq) @@ -2425,13 +2381,7 @@ static int intel_pstate_register_driver(void) { int ret; - intel_pstate_init_limits(&powersave_limits); - intel_pstate_set_performance_limits(&performance_limits); - if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE) && - intel_pstate_driver == &intel_pstate) - limits = &performance_limits; - else - limits = &powersave_limits; + intel_pstate_init_limits(&global); ret = cpufreq_register_driver(intel_pstate_driver); if (ret) { -- cgit v1.2.3 From 436ecf5519d892397af133a79ccd38a17c25fa51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Fri, 17 Mar 2017 17:21:28 +0100 Subject: USB: serial: qcserial: add Dell DW5811e MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a Dell branded Sierra Wireless EM7455. Cc: Signed-off-by: Bjørn Mork Signed-off-by: Johan Hovold --- drivers/usb/serial/qcserial.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 696458db7e3c..38b3f0d8cd58 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -169,6 +169,8 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x413c, 0x81a9)}, /* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81b1)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81b3)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */ + {DEVICE_SWI(0x413c, 0x81b5)}, /* Dell Wireless 5811e QDL */ + {DEVICE_SWI(0x413c, 0x81b6)}, /* Dell Wireless 5811e QDL */ /* Huawei devices */ {DEVICE_HWI(0x03f0, 0x581d)}, /* HP lt4112 LTE/HSPA+ Gobi 4G Modem (Huawei me906e) */ -- cgit v1.2.3 From 74e3f6e63da6c8e8246fba1689e040bc926b4a1a Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 14 Mar 2017 15:24:51 +0530 Subject: parisc: perf: Fix potential NULL pointer dereference Fix potential NULL pointer dereference and clean up coding style errors (code indent, trailing whitespaces). Signed-off-by: Arvind Yadav Signed-off-by: Helge Deller --- arch/parisc/kernel/perf.c | 94 ++++++++++++++++++++++++----------------------- 1 file changed, 49 insertions(+), 45 deletions(-) diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c index e282a5131d77..6017a5af2e6e 100644 --- a/arch/parisc/kernel/perf.c +++ b/arch/parisc/kernel/perf.c @@ -39,7 +39,7 @@ * the PDC INTRIGUE calls. This is done to eliminate bugs introduced * in various PDC revisions. The code is much more maintainable * and reliable this way vs having to debug on every version of PDC - * on every box. + * on every box. */ #include @@ -195,8 +195,8 @@ static int perf_config(uint32_t *image_ptr); static int perf_release(struct inode *inode, struct file *file); static int perf_open(struct inode *inode, struct file *file); static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos); -static ssize_t perf_write(struct file *file, const char __user *buf, size_t count, - loff_t *ppos); +static ssize_t perf_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos); static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg); static void perf_start_counters(void); static int perf_stop_counters(uint32_t *raddr); @@ -222,7 +222,7 @@ extern void perf_intrigue_disable_perf_counters (void); /* * configure: * - * Configure the cpu with a given data image. First turn off the counters, + * Configure the cpu with a given data image. First turn off the counters, * then download the image, then turn the counters back on. */ static int perf_config(uint32_t *image_ptr) @@ -234,7 +234,7 @@ static int perf_config(uint32_t *image_ptr) error = perf_stop_counters(raddr); if (error != 0) { printk("perf_config: perf_stop_counters = %ld\n", error); - return -EINVAL; + return -EINVAL; } printk("Preparing to write image\n"); @@ -242,7 +242,7 @@ printk("Preparing to write image\n"); error = perf_write_image((uint64_t *)image_ptr); if (error != 0) { printk("perf_config: DOWNLOAD = %ld\n", error); - return -EINVAL; + return -EINVAL; } printk("Preparing to start counters\n"); @@ -254,7 +254,7 @@ printk("Preparing to start counters\n"); } /* - * Open the device and initialize all of its memory. The device is only + * Open the device and initialize all of its memory. The device is only * opened once, but can be "queried" by multiple processes that know its * file descriptor. */ @@ -298,19 +298,19 @@ static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t * called on the processor that the download should happen * on. */ -static ssize_t perf_write(struct file *file, const char __user *buf, size_t count, - loff_t *ppos) +static ssize_t perf_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) { size_t image_size; uint32_t image_type; uint32_t interface_type; uint32_t test; - if (perf_processor_interface == ONYX_INTF) + if (perf_processor_interface == ONYX_INTF) image_size = PCXU_IMAGE_SIZE; - else if (perf_processor_interface == CUDA_INTF) + else if (perf_processor_interface == CUDA_INTF) image_size = PCXW_IMAGE_SIZE; - else + else return -EFAULT; if (!capable(CAP_SYS_ADMIN)) @@ -330,22 +330,22 @@ static ssize_t perf_write(struct file *file, const char __user *buf, size_t coun /* First check the machine type is correct for the requested image */ - if (((perf_processor_interface == CUDA_INTF) && - (interface_type != CUDA_INTF)) || - ((perf_processor_interface == ONYX_INTF) && - (interface_type != ONYX_INTF))) + if (((perf_processor_interface == CUDA_INTF) && + (interface_type != CUDA_INTF)) || + ((perf_processor_interface == ONYX_INTF) && + (interface_type != ONYX_INTF))) return -EINVAL; /* Next check to make sure the requested image is valid */ - if (((interface_type == CUDA_INTF) && + if (((interface_type == CUDA_INTF) && (test >= MAX_CUDA_IMAGES)) || - ((interface_type == ONYX_INTF) && - (test >= MAX_ONYX_IMAGES))) + ((interface_type == ONYX_INTF) && + (test >= MAX_ONYX_IMAGES))) return -EINVAL; /* Copy the image into the processor */ - if (interface_type == CUDA_INTF) + if (interface_type == CUDA_INTF) return perf_config(cuda_images[test]); else return perf_config(onyx_images[test]); @@ -359,7 +359,7 @@ static ssize_t perf_write(struct file *file, const char __user *buf, size_t coun static void perf_patch_images(void) { #if 0 /* FIXME!! */ -/* +/* * NOTE: this routine is VERY specific to the current TLB image. * If the image is changed, this routine might also need to be changed. */ @@ -367,9 +367,9 @@ static void perf_patch_images(void) extern void $i_dtlb_miss_2_0(); extern void PA2_0_iva(); - /* + /* * We can only use the lower 32-bits, the upper 32-bits should be 0 - * anyway given this is in the kernel + * anyway given this is in the kernel */ uint32_t itlb_addr = (uint32_t)&($i_itlb_miss_2_0); uint32_t dtlb_addr = (uint32_t)&($i_dtlb_miss_2_0); @@ -377,21 +377,21 @@ static void perf_patch_images(void) if (perf_processor_interface == ONYX_INTF) { /* clear last 2 bytes */ - onyx_images[TLBMISS][15] &= 0xffffff00; + onyx_images[TLBMISS][15] &= 0xffffff00; /* set 2 bytes */ onyx_images[TLBMISS][15] |= (0x000000ff&((dtlb_addr) >> 24)); onyx_images[TLBMISS][16] = (dtlb_addr << 8)&0xffffff00; onyx_images[TLBMISS][17] = itlb_addr; /* clear last 2 bytes */ - onyx_images[TLBHANDMISS][15] &= 0xffffff00; + onyx_images[TLBHANDMISS][15] &= 0xffffff00; /* set 2 bytes */ onyx_images[TLBHANDMISS][15] |= (0x000000ff&((dtlb_addr) >> 24)); onyx_images[TLBHANDMISS][16] = (dtlb_addr << 8)&0xffffff00; onyx_images[TLBHANDMISS][17] = itlb_addr; /* clear last 2 bytes */ - onyx_images[BIG_CPI][15] &= 0xffffff00; + onyx_images[BIG_CPI][15] &= 0xffffff00; /* set 2 bytes */ onyx_images[BIG_CPI][15] |= (0x000000ff&((dtlb_addr) >> 24)); onyx_images[BIG_CPI][16] = (dtlb_addr << 8)&0xffffff00; @@ -404,24 +404,24 @@ static void perf_patch_images(void) } else if (perf_processor_interface == CUDA_INTF) { /* Cuda interface */ - cuda_images[TLBMISS][16] = + cuda_images[TLBMISS][16] = (cuda_images[TLBMISS][16]&0xffff0000) | ((dtlb_addr >> 8)&0x0000ffff); - cuda_images[TLBMISS][17] = + cuda_images[TLBMISS][17] = ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); cuda_images[TLBMISS][18] = (itlb_addr << 16)&0xffff0000; - cuda_images[TLBHANDMISS][16] = + cuda_images[TLBHANDMISS][16] = (cuda_images[TLBHANDMISS][16]&0xffff0000) | ((dtlb_addr >> 8)&0x0000ffff); - cuda_images[TLBHANDMISS][17] = + cuda_images[TLBHANDMISS][17] = ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); cuda_images[TLBHANDMISS][18] = (itlb_addr << 16)&0xffff0000; - cuda_images[BIG_CPI][16] = + cuda_images[BIG_CPI][16] = (cuda_images[BIG_CPI][16]&0xffff0000) | ((dtlb_addr >> 8)&0x0000ffff); - cuda_images[BIG_CPI][17] = + cuda_images[BIG_CPI][17] = ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); cuda_images[BIG_CPI][18] = (itlb_addr << 16)&0xffff0000; } else { @@ -433,7 +433,7 @@ static void perf_patch_images(void) /* * ioctl routine - * All routines effect the processor that they are executed on. Thus you + * All routines effect the processor that they are executed on. Thus you * must be running on the processor that you wish to change. */ @@ -459,7 +459,7 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } /* copy out the Counters */ - if (copy_to_user((void __user *)arg, raddr, + if (copy_to_user((void __user *)arg, raddr, sizeof (raddr)) != 0) { error = -EFAULT; break; @@ -487,7 +487,7 @@ static const struct file_operations perf_fops = { .open = perf_open, .release = perf_release }; - + static struct miscdevice perf_dev = { MISC_DYNAMIC_MINOR, PA_PERF_DEV, @@ -595,7 +595,7 @@ static int perf_stop_counters(uint32_t *raddr) /* OR sticky2 (bit 1496) to counter2 bit 32 */ tmp64 |= (userbuf[23] >> 8) & 0x0000000080000000; raddr[2] = (uint32_t)tmp64; - + /* Counter3 is bits 1497 to 1528 */ tmp64 = (userbuf[23] >> 7) & 0x00000000ffffffff; /* OR sticky3 (bit 1529) to counter3 bit 32 */ @@ -617,7 +617,7 @@ static int perf_stop_counters(uint32_t *raddr) userbuf[22] = 0; userbuf[23] = 0; - /* + /* * Write back the zeroed bytes + the image given * the read was destructive. */ @@ -625,13 +625,13 @@ static int perf_stop_counters(uint32_t *raddr) } else { /* - * Read RDR-15 which contains the counters and sticky bits + * Read RDR-15 which contains the counters and sticky bits */ if (!perf_rdr_read_ubuf(15, userbuf)) { return -13; } - /* + /* * Clear out the counters */ perf_rdr_clear(15); @@ -644,7 +644,7 @@ static int perf_stop_counters(uint32_t *raddr) raddr[2] = (uint32_t)((userbuf[1] >> 32) & 0x00000000ffffffffUL); raddr[3] = (uint32_t)(userbuf[1] & 0x00000000ffffffffUL); } - + return 0; } @@ -682,7 +682,7 @@ static int perf_rdr_read_ubuf(uint32_t rdr_num, uint64_t *buffer) i = tentry->num_words; while (i--) { buffer[i] = 0; - } + } /* Check for bits an even number of 64 */ if ((xbits = width & 0x03f) != 0) { @@ -808,18 +808,22 @@ static int perf_write_image(uint64_t *memaddr) } runway = ioremap_nocache(cpu_device->hpa.start, 4096); + if (!runway) { + pr_err("perf_write_image: ioremap failed!\n"); + return -ENOMEM; + } /* Merge intrigue bits into Runway STATUS 0 */ tmp64 = __raw_readq(runway + RUNWAY_STATUS) & 0xffecfffffffffffful; - __raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul), + __raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul), runway + RUNWAY_STATUS); - + /* Write RUNWAY DEBUG registers */ for (i = 0; i < 8; i++) { __raw_writeq(*memaddr++, runway + RUNWAY_DEBUG); } - return 0; + return 0; } /* @@ -843,7 +847,7 @@ printk("perf_rdr_write\n"); perf_rdr_shift_out_U(rdr_num, buffer[i]); } else { perf_rdr_shift_out_W(rdr_num, buffer[i]); - } + } } printk("perf_rdr_write done\n"); } -- cgit v1.2.3 From 73580dac7618e4bcd21679f553cf3c97323fec46 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 18 Mar 2017 17:13:27 +0100 Subject: parisc: Fix system shutdown halt On those parisc machines which don't provide a software power off function, the system currently kills the init process at the end of a shutdown and unexpectedly restarts insteads of halting. Fix it by adding a loop which will not return. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # 4.9+ --- arch/parisc/kernel/process.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 06f7ca7fe70b..b76f503eee4a 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -142,6 +142,8 @@ void machine_power_off(void) printk(KERN_EMERG "System shut down completed.\n" "Please power this system off now."); + + for (;;); } void (*pm_power_off)(void) = machine_power_off; -- cgit v1.2.3 From 9c28ca4ff8bad7486182291a55b4f67a70af718d Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 8 Mar 2017 00:09:59 -0800 Subject: target: Drop pointless tfo->check_stop_free check All in-tree fabric drivers provide a tfo->check_stop_free(), so there is no need to do the extra check within existing transport_cmd_check_stop_to_fabric() code. Just to be sure, add a check in target_fabric_tf_ops_check() to notify any out-of-tree drivers that might be missing it. Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_configfs.c | 4 ++++ drivers/target/target_core_transport.c | 3 +-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 54b36c9835be..38b5025e4c7a 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -421,6 +421,10 @@ static int target_fabric_tf_ops_check(const struct target_core_fabric_ops *tfo) pr_err("Missing tfo->aborted_task()\n"); return -EINVAL; } + if (!tfo->check_stop_free) { + pr_err("Missing tfo->check_stop_free()\n"); + return -EINVAL; + } /* * We at least require tfo->fabric_make_wwn(), tfo->fabric_drop_wwn() * tfo->fabric_make_tpg() and tfo->fabric_drop_tpg() in diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 434d9d693989..b1a3cdb29468 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -636,8 +636,7 @@ static int transport_cmd_check_stop_to_fabric(struct se_cmd *cmd) * Fabric modules are expected to return '1' here if the se_cmd being * passed is released at this point, or zero if not being released. */ - return cmd->se_tfo->check_stop_free ? cmd->se_tfo->check_stop_free(cmd) - : 0; + return cmd->se_tfo->check_stop_free(cmd); } static void transport_lun_remove_cmd(struct se_cmd *cmd) -- cgit v1.2.3 From 3abaa2bfdb1e6bb33d38a2e82cf3bb82ec0197bf Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 1 Mar 2017 23:14:39 -0600 Subject: tcmu: allow hw_max_sectors greater than 128 tcmu hard codes the hw_max_sectors to 128 which is a litle small. Userspace uses the max_sectors to report the optimal IO size and some initiators perform better with larger IOs (open-iscsi seems to do better with 256 to 512 depending on the test). (Fix do not display hw max sectors twice - MNC) Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 54 +++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index c3adefe95e50..24e8580f07b8 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -960,7 +960,8 @@ static int tcmu_configure_device(struct se_device *dev) if (dev->dev_attrib.hw_block_size == 0) dev->dev_attrib.hw_block_size = 512; /* Other attributes can be configured in userspace */ - dev->dev_attrib.hw_max_sectors = 128; + if (!dev->dev_attrib.hw_max_sectors) + dev->dev_attrib.hw_max_sectors = 128; dev->dev_attrib.hw_queue_depth = 128; ret = tcmu_netlink_event(TCMU_CMD_ADDED_DEVICE, udev->uio_info.name, @@ -1031,16 +1032,42 @@ static void tcmu_free_device(struct se_device *dev) } enum { - Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_err, + Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_hw_max_sectors, + Opt_err, }; static match_table_t tokens = { {Opt_dev_config, "dev_config=%s"}, {Opt_dev_size, "dev_size=%u"}, {Opt_hw_block_size, "hw_block_size=%u"}, + {Opt_hw_max_sectors, "hw_max_sectors=%u"}, {Opt_err, NULL} }; +static int tcmu_set_dev_attrib(substring_t *arg, u32 *dev_attrib) +{ + unsigned long tmp_ul; + char *arg_p; + int ret; + + arg_p = match_strdup(arg); + if (!arg_p) + return -ENOMEM; + + ret = kstrtoul(arg_p, 0, &tmp_ul); + kfree(arg_p); + if (ret < 0) { + pr_err("kstrtoul() failed for dev attrib\n"); + return ret; + } + if (!tmp_ul) { + pr_err("dev attrib must be nonzero\n"); + return -EINVAL; + } + *dev_attrib = tmp_ul; + return 0; +} + static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev, const char *page, ssize_t count) { @@ -1048,7 +1075,6 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev, char *orig, *ptr, *opts, *arg_p; substring_t args[MAX_OPT_ARGS]; int ret = 0, token; - unsigned long tmp_ul; opts = kstrdup(page, GFP_KERNEL); if (!opts) @@ -1082,22 +1108,12 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev, pr_err("kstrtoul() failed for dev_size=\n"); break; case Opt_hw_block_size: - arg_p = match_strdup(&args[0]); - if (!arg_p) { - ret = -ENOMEM; - break; - } - ret = kstrtoul(arg_p, 0, &tmp_ul); - kfree(arg_p); - if (ret < 0) { - pr_err("kstrtoul() failed for hw_block_size=\n"); - break; - } - if (!tmp_ul) { - pr_err("hw_block_size must be nonzero\n"); - break; - } - dev->dev_attrib.hw_block_size = tmp_ul; + ret = tcmu_set_dev_attrib(&args[0], + &(dev->dev_attrib.hw_block_size)); + break; + case Opt_hw_max_sectors: + ret = tcmu_set_dev_attrib(&args[0], + &(dev->dev_attrib.hw_max_sectors)); break; default: break; -- cgit v1.2.3 From 2579325ca0acc598fdf41ba12b2871d3467f28df Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 1 Mar 2017 23:14:40 -0600 Subject: tcmu: return on first Opt parse failure We only were returing failure if the last opt to be parsed failed. This has a return failure when we first detect a failure. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 24e8580f07b8..4339ab2133b3 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1118,6 +1118,9 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev, default: break; } + + if (ret) + break; } kfree(orig); -- cgit v1.2.3 From 530c6891b1220cba780b6c18f4691d85a3435080 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 1 Mar 2017 23:13:24 -0600 Subject: target: allow ALUA setup for some passthrough backends This patch allows passthrough backends to use the core/base LIO ALUA setup and state checks, but still handle the execution of commands. This will allow the target_core_user module to execute STPG and RTPG in userspace, and not have to duplicate the ALUA state checks, path information (needed so we can check if command is executable on specific paths) and setup (rtslib sets/updates the configfs ALUA interface like it does for iblock or file). For STPG, the target_core_user userspace daemon, tcmu-runner will still execute the STPG, and to update the core/base LIO state it will use the existing configfs interface. For RTPG, tcmu-runner will loop over configfs and/or cache the state. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_alua.c | 9 +++++---- drivers/target/target_core_pscsi.c | 3 ++- drivers/target/target_core_tpg.c | 3 ++- include/target/target_core_backend.h | 7 ++++++- 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index f5e330099bfc..a41bbb8087cf 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -691,7 +691,7 @@ target_alua_state_check(struct se_cmd *cmd) if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE) return 0; - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA) return 0; /* @@ -1973,7 +1973,7 @@ ssize_t core_alua_store_tg_pt_gp_info( unsigned char buf[TG_PT_GROUP_NAME_BUF]; int move = 0; - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH || + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA || (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) return -ENODEV; @@ -2230,7 +2230,7 @@ ssize_t core_alua_store_offline_bit( unsigned long tmp; int ret; - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH || + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA || (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) return -ENODEV; @@ -2316,7 +2316,8 @@ ssize_t core_alua_store_secondary_write_metadata( int core_setup_alua(struct se_device *dev) { - if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) && + if (!(dev->transport->transport_flags & + TRANSPORT_FLAG_PASSTHROUGH_ALUA) && !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) { struct t10_alua_lu_gp_member *lu_gp_mem; diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 44d92f23a3f0..94cda7991e80 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -1080,7 +1080,8 @@ static void pscsi_req_done(struct request *req, int uptodate) static const struct target_backend_ops pscsi_ops = { .name = "pscsi", .owner = THIS_MODULE, - .transport_flags = TRANSPORT_FLAG_PASSTHROUGH, + .transport_flags = TRANSPORT_FLAG_PASSTHROUGH | + TRANSPORT_FLAG_PASSTHROUGH_ALUA, .attach_hba = pscsi_attach_hba, .detach_hba = pscsi_detach_hba, .pmode_enable_hba = pscsi_pmode_enable_hba, diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index c0dbfa016575..6fb191914f45 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -602,7 +602,8 @@ int core_tpg_add_lun( if (ret) goto out_kill_ref; - if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) && + if (!(dev->transport->transport_flags & + TRANSPORT_FLAG_PASSTHROUGH_ALUA) && !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) target_attach_tg_pt_gp(lun, dev->t10_alua.default_tg_pt_gp); diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index b54b98dc2d4a..1b0f447ce850 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -4,7 +4,12 @@ #include #include -#define TRANSPORT_FLAG_PASSTHROUGH 1 +#define TRANSPORT_FLAG_PASSTHROUGH 0x1 +/* + * ALUA commands, state checks and setup operations are handled by the + * backend module. + */ +#define TRANSPORT_FLAG_PASSTHROUGH_ALUA 0x2 struct request_queue; struct scatterlist; -- cgit v1.2.3 From 0a4145729871ef29afe8b0c57560a1f5bd736416 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 1 Mar 2017 23:13:25 -0600 Subject: target: fail ALUA transitions for pscsi We do not setup the LU group for pscsi devices, so if you write a state to alua_access_state that will cause a transition you will get a NULL pointer dereference. This patch will fail attempts to try and transition the path for backend devices that set the TRANSPORT_FLAG_PASSTHROUGH_ALUA flag. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_alua.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index a41bbb8087cf..5b5a1e250a65 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -1149,6 +1149,9 @@ int core_alua_do_port_transition( struct t10_alua_tg_pt_gp *tg_pt_gp; int primary, valid_states, rc = 0; + if (l_dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA) + return -ENODEV; + valid_states = l_tg_pt_gp->tg_pt_gp_alua_supported_states; if (core_alua_check_transition(new_state, valid_states, &primary) != 0) return -EINVAL; -- cgit v1.2.3 From 207ee84133c00a8a2a5bdec94df4a5b37d78881c Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 1 Mar 2017 23:13:26 -0600 Subject: target: Use system workqueue for ALUA transitions If tcmu-runner is processing a STPG and needs to change the kernel's ALUA state then we cannot use the same work queue for task management requests and ALUA transitions, because we could deadlock. The problem occurs when a STPG times out before tcmu-runner is able to call into target_tg_pt_gp_alua_access_state_store-> core_alua_do_port_transition -> core_alua_do_transition_tg_pt -> queue_work. In this case, the tmr is on the work queue waiting for the STPG to complete, but the STPG transition is now queued behind the waiting tmr. Note: This bug will also be fixed by this patch: http://www.spinics.net/lists/target-devel/msg14560.html which switches the tmr code to use the system workqueues. For both, I am not sure if we need a dedicated workqueue since it is not a performance path and I do not think we need WQ_MEM_RECLAIM to make forward progress to free up memory like the block layer does. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_alua.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 5b5a1e250a65..58bf5e6350ac 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -1121,13 +1121,11 @@ static int core_alua_do_transition_tg_pt( unsigned long transition_tmo; transition_tmo = tg_pt_gp->tg_pt_gp_implicit_trans_secs * HZ; - queue_delayed_work(tg_pt_gp->tg_pt_gp_dev->tmr_wq, - &tg_pt_gp->tg_pt_gp_transition_work, - transition_tmo); + schedule_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work, + transition_tmo); } else { tg_pt_gp->tg_pt_gp_transition_complete = &wait; - queue_delayed_work(tg_pt_gp->tg_pt_gp_dev->tmr_wq, - &tg_pt_gp->tg_pt_gp_transition_work, 0); + schedule_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work, 0); wait_for_completion(&wait); tg_pt_gp->tg_pt_gp_transition_complete = NULL; } -- cgit v1.2.3 From d7175373f2745ed4abe5b388d5aabd06304f801e Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 2 Mar 2017 04:59:48 -0600 Subject: target: fix ALUA transition timeout handling The implicit transition time tells initiators the min time to wait before timing out a transition. We currently schedule the transition to occur in tg_pt_gp_implicit_trans_secs seconds so there is no room for delays. If core_alua_do_transition_tg_pt_work->core_alua_update_tpg_primary_metadata needs to write out info to a remote file, then the initiator can easily time out the operation. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_alua.c | 23 ++++++++--------------- include/target/target_core_base.h | 2 +- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 58bf5e6350ac..594807cd92cb 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -1013,7 +1013,7 @@ static void core_alua_queue_state_change_ua(struct t10_alua_tg_pt_gp *tg_pt_gp) static void core_alua_do_transition_tg_pt_work(struct work_struct *work) { struct t10_alua_tg_pt_gp *tg_pt_gp = container_of(work, - struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work.work); + struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work); struct se_device *dev = tg_pt_gp->tg_pt_gp_dev; bool explicit = (tg_pt_gp->tg_pt_gp_alua_access_status == ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG); @@ -1076,13 +1076,12 @@ static int core_alua_do_transition_tg_pt( /* * Flush any pending transitions */ - if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs && - atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == + if (!explicit && atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == ALUA_ACCESS_STATE_TRANSITION) { /* Just in case */ tg_pt_gp->tg_pt_gp_alua_pending_state = new_state; tg_pt_gp->tg_pt_gp_transition_complete = &wait; - flush_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work); + flush_work(&tg_pt_gp->tg_pt_gp_transition_work); wait_for_completion(&wait); tg_pt_gp->tg_pt_gp_transition_complete = NULL; return 0; @@ -1117,15 +1116,9 @@ static int core_alua_do_transition_tg_pt( atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt); spin_unlock(&dev->t10_alua.tg_pt_gps_lock); - if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs) { - unsigned long transition_tmo; - - transition_tmo = tg_pt_gp->tg_pt_gp_implicit_trans_secs * HZ; - schedule_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work, - transition_tmo); - } else { + schedule_work(&tg_pt_gp->tg_pt_gp_transition_work); + if (explicit) { tg_pt_gp->tg_pt_gp_transition_complete = &wait; - schedule_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work, 0); wait_for_completion(&wait); tg_pt_gp->tg_pt_gp_transition_complete = NULL; } @@ -1696,8 +1689,8 @@ struct t10_alua_tg_pt_gp *core_alua_allocate_tg_pt_gp(struct se_device *dev, mutex_init(&tg_pt_gp->tg_pt_gp_md_mutex); spin_lock_init(&tg_pt_gp->tg_pt_gp_lock); atomic_set(&tg_pt_gp->tg_pt_gp_ref_cnt, 0); - INIT_DELAYED_WORK(&tg_pt_gp->tg_pt_gp_transition_work, - core_alua_do_transition_tg_pt_work); + INIT_WORK(&tg_pt_gp->tg_pt_gp_transition_work, + core_alua_do_transition_tg_pt_work); tg_pt_gp->tg_pt_gp_dev = dev; atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state, ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED); @@ -1805,7 +1798,7 @@ void core_alua_free_tg_pt_gp( dev->t10_alua.alua_tg_pt_gps_counter--; spin_unlock(&dev->t10_alua.tg_pt_gps_lock); - flush_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work); + flush_work(&tg_pt_gp->tg_pt_gp_transition_work); /* * Allow a struct t10_alua_tg_pt_gp_member * referenced by diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 37c274e61acc..4b784b6e21c0 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -299,7 +299,7 @@ struct t10_alua_tg_pt_gp { struct list_head tg_pt_gp_lun_list; struct se_lun *tg_pt_gp_alua_lun; struct se_node_acl *tg_pt_gp_alua_nacl; - struct delayed_work tg_pt_gp_transition_work; + struct work_struct tg_pt_gp_transition_work; struct completion *tg_pt_gp_transition_complete; }; -- cgit v1.2.3 From 1ca4d4fa3bfcbe8964f81e5818a9b90436466eb0 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 2 Mar 2017 04:59:49 -0600 Subject: target: allow userspace to set state to transitioning Userspace target_core_user handlers like tcmu-runner may want to set the ALUA state to transitioning while it does implicit transitions. This patch allows that state when set from configfs. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_alua.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 594807cd92cb..252d4e4b7b33 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -43,7 +43,7 @@ #include "target_core_ua.h" static sense_reason_t core_alua_check_transition(int state, int valid, - int *primary); + int *primary, int explicit); static int core_alua_set_tg_pt_secondary_state( struct se_lun *lun, int explicit, int offline); @@ -335,8 +335,8 @@ target_emulate_set_target_port_groups(struct se_cmd *cmd) * the state is a primary or secondary target port asymmetric * access state. */ - rc = core_alua_check_transition(alua_access_state, - valid_states, &primary); + rc = core_alua_check_transition(alua_access_state, valid_states, + &primary, 1); if (rc) { /* * If the SET TARGET PORT GROUPS attempts to establish @@ -762,7 +762,7 @@ target_alua_state_check(struct se_cmd *cmd) * Check implicit and explicit ALUA state change request. */ static sense_reason_t -core_alua_check_transition(int state, int valid, int *primary) +core_alua_check_transition(int state, int valid, int *primary, int explicit) { /* * OPTIMIZED, NON-OPTIMIZED, STANDBY and UNAVAILABLE are @@ -804,11 +804,14 @@ core_alua_check_transition(int state, int valid, int *primary) *primary = 0; break; case ALUA_ACCESS_STATE_TRANSITION: - /* - * Transitioning is set internally, and - * cannot be selected manually. - */ - goto not_supported; + if (!(valid & ALUA_T_SUP) || explicit) + /* + * Transitioning is set internally and by tcmu daemon, + * and cannot be selected through a STPG. + */ + goto not_supported; + *primary = 0; + break; default: pr_err("Unknown ALUA access state: 0x%02x\n", state); return TCM_INVALID_PARAMETER_LIST; @@ -1070,7 +1073,7 @@ static int core_alua_do_transition_tg_pt( if (atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == new_state) return 0; - if (new_state == ALUA_ACCESS_STATE_TRANSITION) + if (explicit && new_state == ALUA_ACCESS_STATE_TRANSITION) return -EAGAIN; /* @@ -1091,10 +1094,6 @@ static int core_alua_do_transition_tg_pt( * Save the old primary ALUA access state, and set the current state * to ALUA_ACCESS_STATE_TRANSITION. */ - tg_pt_gp->tg_pt_gp_alua_previous_state = - atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state); - tg_pt_gp->tg_pt_gp_alua_pending_state = new_state; - atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state, ALUA_ACCESS_STATE_TRANSITION); tg_pt_gp->tg_pt_gp_alua_access_status = (explicit) ? @@ -1103,6 +1102,13 @@ static int core_alua_do_transition_tg_pt( core_alua_queue_state_change_ua(tg_pt_gp); + if (new_state == ALUA_ACCESS_STATE_TRANSITION) + return 0; + + tg_pt_gp->tg_pt_gp_alua_previous_state = + atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state); + tg_pt_gp->tg_pt_gp_alua_pending_state = new_state; + /* * Check for the optional ALUA primary state transition delay */ @@ -1144,7 +1150,8 @@ int core_alua_do_port_transition( return -ENODEV; valid_states = l_tg_pt_gp->tg_pt_gp_alua_supported_states; - if (core_alua_check_transition(new_state, valid_states, &primary) != 0) + if (core_alua_check_transition(new_state, valid_states, &primary, + explicit) != 0) return -EINVAL; local_lu_gp_mem = l_dev->dev_alua_lu_gp_mem; -- cgit v1.2.3 From 760bf578edf8122f2503a3a6a3f4b0de3b6ce0bb Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 2 Mar 2017 04:59:50 -0600 Subject: target: fix race during implicit transition work flushes This fixes the following races: 1. core_alua_do_transition_tg_pt could have read tg_pt_gp_alua_access_state and gone into this if chunk: if (!explicit && atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == ALUA_ACCESS_STATE_TRANSITION) { and then core_alua_do_transition_tg_pt_work could update the state. core_alua_do_transition_tg_pt would then only set tg_pt_gp_alua_pending_state and the tg_pt_gp_alua_access_state would not get updated with the second calls state. 2. core_alua_do_transition_tg_pt could be setting tg_pt_gp_transition_complete while the tg_pt_gp_transition_work is already completing. core_alua_do_transition_tg_pt then waits on the completion that will never be called. To handle these issues, we just call flush_work which will return when core_alua_do_transition_tg_pt_work has completed so there is no need to do the complete/wait. And, if core_alua_do_transition_tg_pt_work was running, instead of trying to sneak in the state change, we just schedule up another core_alua_do_transition_tg_pt_work call. Note that this does not handle a possible race where there are multiple threads call core_alua_do_transition_tg_pt at the same time. I think we need a mutex in target_tg_pt_gp_alua_access_state_store. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_alua.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 252d4e4b7b33..fd7c16a7ca6e 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -1079,16 +1079,8 @@ static int core_alua_do_transition_tg_pt( /* * Flush any pending transitions */ - if (!explicit && atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == - ALUA_ACCESS_STATE_TRANSITION) { - /* Just in case */ - tg_pt_gp->tg_pt_gp_alua_pending_state = new_state; - tg_pt_gp->tg_pt_gp_transition_complete = &wait; + if (!explicit) flush_work(&tg_pt_gp->tg_pt_gp_transition_work); - wait_for_completion(&wait); - tg_pt_gp->tg_pt_gp_transition_complete = NULL; - return 0; - } /* * Save the old primary ALUA access state, and set the current state -- cgit v1.2.3 From 972c7f167974fa41ea8a2eed4b857cc59f59c42c Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 9 Mar 2017 02:42:08 -0600 Subject: tcmu: add helper to check if dev was configured This adds a helper to check if the dev was configured. It will be used in the next patch to prevent updates to some config settings after the device has been setup. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 4339ab2133b3..892b311e7874 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -998,6 +998,11 @@ static void tcmu_dev_call_rcu(struct rcu_head *p) kfree(udev); } +static bool tcmu_dev_configured(struct tcmu_dev *udev) +{ + return udev->uio_info.uio_dev ? true : false; +} + static void tcmu_free_device(struct se_device *dev) { struct tcmu_dev *udev = TCMU_DEV(dev); @@ -1019,8 +1024,7 @@ static void tcmu_free_device(struct se_device *dev) spin_unlock_irq(&udev->commands_lock); WARN_ON(!all_expired); - /* Device was configured */ - if (udev->uio_info.uio_dev) { + if (tcmu_dev_configured(udev)) { tcmu_netlink_event(TCMU_CMD_REMOVED_DEVICE, udev->uio_info.name, udev->uio_info.uio_dev->minor); -- cgit v1.2.3 From af980e46a26ac8805685bb70c8572dbc47abb126 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 9 Mar 2017 02:42:09 -0600 Subject: tcmu: make cmd timeout configurable A single daemon could implement multiple types of devices using multuple types of real devices that may not support restarting from crashes and/or handling tcmu timeouts. This makes the cmd timeout configurable, so handlers that do not support it can turn if off for now. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 41 +++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 892b311e7874..10cc15f0b1fa 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -112,6 +112,7 @@ struct tcmu_dev { spinlock_t commands_lock; struct timer_list timeout; + unsigned int cmd_time_out; char dev_config[TCMU_CONFIG_LEN]; }; @@ -172,7 +173,9 @@ static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd) tcmu_cmd->se_cmd = se_cmd; tcmu_cmd->tcmu_dev = udev; - tcmu_cmd->deadline = jiffies + msecs_to_jiffies(TCMU_TIME_OUT); + if (udev->cmd_time_out) + tcmu_cmd->deadline = jiffies + + msecs_to_jiffies(udev->cmd_time_out); idr_preload(GFP_KERNEL); spin_lock_irq(&udev->commands_lock); @@ -451,7 +454,11 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) pr_debug("sleeping for ring space\n"); spin_unlock_irq(&udev->cmdr_lock); - ret = schedule_timeout(msecs_to_jiffies(TCMU_TIME_OUT)); + if (udev->cmd_time_out) + ret = schedule_timeout( + msecs_to_jiffies(udev->cmd_time_out)); + else + ret = schedule_timeout(msecs_to_jiffies(TCMU_TIME_OUT)); finish_wait(&udev->wait_cmdr, &__wait); if (!ret) { pr_warn("tcmu: command timed out\n"); @@ -526,8 +533,9 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) /* TODO: only if FLUSH and FUA? */ uio_event_notify(&udev->uio_info); - mod_timer(&udev->timeout, - round_jiffies_up(jiffies + msecs_to_jiffies(TCMU_TIME_OUT))); + if (udev->cmd_time_out) + mod_timer(&udev->timeout, round_jiffies_up(jiffies + + msecs_to_jiffies(udev->cmd_time_out))); return TCM_NO_SENSE; } @@ -742,6 +750,7 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name) } udev->hba = hba; + udev->cmd_time_out = TCMU_TIME_OUT; init_waitqueue_head(&udev->wait_cmdr); spin_lock_init(&udev->cmdr_lock); @@ -1037,7 +1046,7 @@ static void tcmu_free_device(struct se_device *dev) enum { Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_hw_max_sectors, - Opt_err, + Opt_cmd_time_out, Opt_err, }; static match_table_t tokens = { @@ -1045,6 +1054,7 @@ static match_table_t tokens = { {Opt_dev_size, "dev_size=%u"}, {Opt_hw_block_size, "hw_block_size=%u"}, {Opt_hw_max_sectors, "hw_max_sectors=%u"}, + {Opt_cmd_time_out, "cmd_time_out=%u"}, {Opt_err, NULL} }; @@ -1111,6 +1121,23 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev, if (ret < 0) pr_err("kstrtoul() failed for dev_size=\n"); break; + case Opt_cmd_time_out: + if (tcmu_dev_configured(udev)) { + pr_err("Can not update cmd_time_out after device has been configured.\n"); + ret = -EINVAL; + break; + } + arg_p = match_strdup(&args[0]); + if (!arg_p) { + ret = -ENOMEM; + break; + } + ret = kstrtouint(arg_p, 0, &udev->cmd_time_out); + kfree(arg_p); + if (ret < 0) + pr_err("kstrtouint() failed for cmd_time_out=\n"); + udev->cmd_time_out *= MSEC_PER_SEC; + break; case Opt_hw_block_size: ret = tcmu_set_dev_attrib(&args[0], &(dev->dev_attrib.hw_block_size)); @@ -1138,7 +1165,9 @@ static ssize_t tcmu_show_configfs_dev_params(struct se_device *dev, char *b) bl = sprintf(b + bl, "Config: %s ", udev->dev_config[0] ? udev->dev_config : "NULL"); - bl += sprintf(b + bl, "Size: %zu\n", udev->dev_size); + bl += sprintf(b + bl, "Size: %zu ", udev->dev_size); + bl += sprintf(b + bl, "Cmd Time Out: %lu\n", + udev->cmd_time_out / MSEC_PER_SEC); return bl; } -- cgit v1.2.3 From 7d7a743543905a8297dce53b36e793e5307da5d7 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sat, 18 Mar 2017 15:04:13 -0700 Subject: tcmu: Convert cmd_time_out into backend device attribute Instead of putting cmd_time_out under ../target/core/user_0/foo/control, which has historically been used by parameters needed for initial backend device configuration, go ahead and move cmd_time_out into a backend device attribute. In order to do this, tcmu_module_init() has been updated to create a local struct configfs_attribute **tcmu_attrs, that is based upon the existing passthrough_attrib_attrs along with the new cmd_time_out attribute. Once **tcm_attrs has been setup, go ahead and point it at tcmu_ops->tb_dev_attrib_attrs so it's picked up by target-core. Also following MNC's previous change, ->cmd_time_out is stored in milliseconds but exposed via configfs in seconds. Also, note this patch restricts the modification of ->cmd_time_out to before + after the TCMU device has been configured, but not while it has active fabric exports. Cc: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 94 ++++++++++++++++++++++++++++----------- 1 file changed, 68 insertions(+), 26 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 10cc15f0b1fa..c6874c38a10b 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -1046,7 +1047,7 @@ static void tcmu_free_device(struct se_device *dev) enum { Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_hw_max_sectors, - Opt_cmd_time_out, Opt_err, + Opt_err, }; static match_table_t tokens = { @@ -1054,7 +1055,6 @@ static match_table_t tokens = { {Opt_dev_size, "dev_size=%u"}, {Opt_hw_block_size, "hw_block_size=%u"}, {Opt_hw_max_sectors, "hw_max_sectors=%u"}, - {Opt_cmd_time_out, "cmd_time_out=%u"}, {Opt_err, NULL} }; @@ -1121,23 +1121,6 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev, if (ret < 0) pr_err("kstrtoul() failed for dev_size=\n"); break; - case Opt_cmd_time_out: - if (tcmu_dev_configured(udev)) { - pr_err("Can not update cmd_time_out after device has been configured.\n"); - ret = -EINVAL; - break; - } - arg_p = match_strdup(&args[0]); - if (!arg_p) { - ret = -ENOMEM; - break; - } - ret = kstrtouint(arg_p, 0, &udev->cmd_time_out); - kfree(arg_p); - if (ret < 0) - pr_err("kstrtouint() failed for cmd_time_out=\n"); - udev->cmd_time_out *= MSEC_PER_SEC; - break; case Opt_hw_block_size: ret = tcmu_set_dev_attrib(&args[0], &(dev->dev_attrib.hw_block_size)); @@ -1165,9 +1148,7 @@ static ssize_t tcmu_show_configfs_dev_params(struct se_device *dev, char *b) bl = sprintf(b + bl, "Config: %s ", udev->dev_config[0] ? udev->dev_config : "NULL"); - bl += sprintf(b + bl, "Size: %zu ", udev->dev_size); - bl += sprintf(b + bl, "Cmd Time Out: %lu\n", - udev->cmd_time_out / MSEC_PER_SEC); + bl += sprintf(b + bl, "Size: %zu\n", udev->dev_size); return bl; } @@ -1186,7 +1167,48 @@ tcmu_parse_cdb(struct se_cmd *cmd) return passthrough_parse_cdb(cmd, tcmu_queue_cmd); } -static const struct target_backend_ops tcmu_ops = { +static ssize_t tcmu_cmd_time_out_show(struct config_item *item, char *page) +{ + struct se_dev_attrib *da = container_of(to_config_group(item), + struct se_dev_attrib, da_group); + struct tcmu_dev *udev = container_of(da->da_dev, + struct tcmu_dev, se_dev); + + return snprintf(page, PAGE_SIZE, "%lu\n", udev->cmd_time_out / MSEC_PER_SEC); +} + +static ssize_t tcmu_cmd_time_out_store(struct config_item *item, const char *page, + size_t count) +{ + struct se_dev_attrib *da = container_of(to_config_group(item), + struct se_dev_attrib, da_group); + struct tcmu_dev *udev = container_of(da->da_dev, + struct tcmu_dev, se_dev); + u32 val; + int ret; + + if (da->da_dev->export_count) { + pr_err("Unable to set tcmu cmd_time_out while exports exist\n"); + return -EINVAL; + } + + ret = kstrtou32(page, 0, &val); + if (ret < 0) + return ret; + + if (!val) { + pr_err("Illegal value for cmd_time_out\n"); + return -EINVAL; + } + + udev->cmd_time_out = val * MSEC_PER_SEC; + return count; +} +CONFIGFS_ATTR(tcmu_, cmd_time_out); + +static struct configfs_attribute **tcmu_attrs; + +static struct target_backend_ops tcmu_ops = { .name = "user", .owner = THIS_MODULE, .transport_flags = TRANSPORT_FLAG_PASSTHROUGH, @@ -1200,12 +1222,12 @@ static const struct target_backend_ops tcmu_ops = { .show_configfs_dev_params = tcmu_show_configfs_dev_params, .get_device_type = sbc_get_device_type, .get_blocks = tcmu_get_blocks, - .tb_dev_attrib_attrs = passthrough_attrib_attrs, + .tb_dev_attrib_attrs = NULL, }; static int __init tcmu_module_init(void) { - int ret; + int ret, i, len = 0; BUILD_BUG_ON((sizeof(struct tcmu_cmd_entry) % TCMU_OP_ALIGN_SIZE) != 0); @@ -1227,12 +1249,31 @@ static int __init tcmu_module_init(void) goto out_unreg_device; } + for (i = 0; passthrough_attrib_attrs[i] != NULL; i++) { + len += sizeof(struct configfs_attribute *); + } + len += sizeof(struct configfs_attribute *) * 2; + + tcmu_attrs = kzalloc(len, GFP_KERNEL); + if (!tcmu_attrs) { + ret = -ENOMEM; + goto out_unreg_genl; + } + + for (i = 0; passthrough_attrib_attrs[i] != NULL; i++) { + tcmu_attrs[i] = passthrough_attrib_attrs[i]; + } + tcmu_attrs[i] = &tcmu_attr_cmd_time_out; + tcmu_ops.tb_dev_attrib_attrs = tcmu_attrs; + ret = transport_backend_register(&tcmu_ops); if (ret) - goto out_unreg_genl; + goto out_attrs; return 0; +out_attrs: + kfree(tcmu_attrs); out_unreg_genl: genl_unregister_family(&tcmu_genl_family); out_unreg_device: @@ -1246,6 +1287,7 @@ out_free_cache: static void __exit tcmu_module_exit(void) { target_backend_unregister(&tcmu_ops); + kfree(tcmu_attrs); genl_unregister_family(&tcmu_genl_family); root_device_unregister(tcmu_root_device); kmem_cache_destroy(tcmu_cmd_cache); -- cgit v1.2.3 From c4a9b538ab2a109c5f9798bea1f8f4bf93aadfb9 Mon Sep 17 00:00:00 2001 From: Joe Carnuccio Date: Wed, 15 Mar 2017 09:48:43 -0700 Subject: qla2xxx: Allow vref count to timeout on vport delete. Cc: Signed-off-by: Joe Carnuccio Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_attr.c | 4 +--- drivers/scsi/qla2xxx/qla_def.h | 6 +++++- drivers/scsi/qla2xxx/qla_init.c | 1 + drivers/scsi/qla2xxx/qla_mid.c | 14 ++++++++------ drivers/scsi/qla2xxx/qla_os.c | 1 + 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index f610103994af..435ff7fd6384 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -2154,8 +2154,6 @@ qla24xx_vport_delete(struct fc_vport *fc_vport) "Timer for the VP[%d] has stopped\n", vha->vp_idx); } - BUG_ON(atomic_read(&vha->vref_count)); - qla2x00_free_fcports(vha); mutex_lock(&ha->vport_lock); @@ -2166,7 +2164,7 @@ qla24xx_vport_delete(struct fc_vport *fc_vport) dma_free_coherent(&ha->pdev->dev, vha->gnl.size, vha->gnl.l, vha->gnl.ldma); - if (vha->qpair->vp_idx == vha->vp_idx) { + if (vha->qpair && vha->qpair->vp_idx == vha->vp_idx) { if (qla2xxx_delete_qpair(vha, vha->qpair) != QLA_SUCCESS) ql_log(ql_log_warn, vha, 0x7087, "Queue Pair delete failed.\n"); diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 625d438e3cce..8662ef4192db 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -4076,6 +4076,7 @@ typedef struct scsi_qla_host { /* Count of active session/fcport */ int fcport_count; wait_queue_head_t fcport_waitQ; + wait_queue_head_t vref_waitq; } scsi_qla_host_t; struct qla27xx_image_status { @@ -4131,14 +4132,17 @@ struct qla2_sgx { mb(); \ if (__vha->flags.delete_progress) { \ atomic_dec(&__vha->vref_count); \ + wake_up(&__vha->vref_waitq); \ __bail = 1; \ } else { \ __bail = 0; \ } \ } while (0) -#define QLA_VHA_MARK_NOT_BUSY(__vha) \ +#define QLA_VHA_MARK_NOT_BUSY(__vha) do { \ atomic_dec(&__vha->vref_count); \ + wake_up(&__vha->vref_waitq); \ +} while (0) \ #define QLA_QPAIR_MARK_BUSY(__qpair, __bail) do { \ atomic_inc(&__qpair->ref_count); \ diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 32fb9007f137..9f3740c68cc8 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -5148,6 +5148,7 @@ qla2x00_update_fcports(scsi_qla_host_t *base_vha) } } atomic_dec(&vha->vref_count); + wake_up(&vha->vref_waitq); } spin_unlock_irqrestore(&ha->vport_slock, flags); } diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index c6d6f0d912ff..09a490c98763 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -74,13 +74,14 @@ qla24xx_deallocate_vp_id(scsi_qla_host_t *vha) * ensures no active vp_list traversal while the vport is removed * from the queue) */ - spin_lock_irqsave(&ha->vport_slock, flags); - while (atomic_read(&vha->vref_count)) { - spin_unlock_irqrestore(&ha->vport_slock, flags); - - msleep(500); + wait_event_timeout(vha->vref_waitq, atomic_read(&vha->vref_count), + 10*HZ); - spin_lock_irqsave(&ha->vport_slock, flags); + spin_lock_irqsave(&ha->vport_slock, flags); + if (atomic_read(&vha->vref_count)) { + ql_dbg(ql_dbg_vport, vha, 0xfffa, + "vha->vref_count=%u timeout\n", vha->vref_count.counter); + vha->vref_count = (atomic_t)ATOMIC_INIT(0); } list_del(&vha->list); qlt_update_vp_map(vha, RESET_VP_IDX); @@ -269,6 +270,7 @@ qla2x00_alert_all_vps(struct rsp_que *rsp, uint16_t *mb) spin_lock_irqsave(&ha->vport_slock, flags); atomic_dec(&vha->vref_count); + wake_up(&vha->vref_waitq); } i++; } diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 1fed235a1b4a..54d4e802bde0 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -4268,6 +4268,7 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht, spin_lock_init(&vha->work_lock); spin_lock_init(&vha->cmd_list_lock); init_waitqueue_head(&vha->fcport_waitQ); + init_waitqueue_head(&vha->vref_waitq); vha->gnl.size = sizeof(struct get_name_list_extended) * (ha->max_loop_id + 1); -- cgit v1.2.3 From ae940f2c472a62904dc18234de5cf3ed28f195ee Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 15 Mar 2017 09:48:44 -0700 Subject: qla2xxx: Fix memory leak for abts processing Cc: Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_target.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 45f5077684f0..ecf97c5993e8 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -6642,6 +6642,8 @@ qlt_handle_abts_recv_work(struct work_struct *work) spin_lock_irqsave(&ha->hardware_lock, flags); qlt_response_pkt_all_vps(vha, (response_t *)&op->atio); spin_unlock_irqrestore(&ha->hardware_lock, flags); + + kfree(op); } void -- cgit v1.2.3 From 8b666809e10cda9814af3e8be339d35b83909056 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 15 Mar 2017 09:48:45 -0700 Subject: qla2xxx: Fix request queue corruption. When FW notify driver or driver detects low FW resource, driver tries to send out Busy SCSI Status to tell Initiator side to back off. During the send process, the lock was not held. Cc: Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_target.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index ecf97c5993e8..a463bcc57902 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -5079,16 +5079,22 @@ qlt_send_busy(struct scsi_qla_host *vha, static int qlt_chk_qfull_thresh_hold(struct scsi_qla_host *vha, - struct atio_from_isp *atio) + struct atio_from_isp *atio, bool ha_locked) { struct qla_hw_data *ha = vha->hw; uint16_t status; + unsigned long flags; if (ha->tgt.num_pend_cmds < Q_FULL_THRESH_HOLD(ha)) return 0; + if (!ha_locked) + spin_lock_irqsave(&ha->hardware_lock, flags); status = temp_sam_status; qlt_send_busy(vha, atio, status); + if (!ha_locked) + spin_unlock_irqrestore(&ha->hardware_lock, flags); + return 1; } @@ -5133,7 +5139,7 @@ static void qlt_24xx_atio_pkt(struct scsi_qla_host *vha, if (likely(atio->u.isp24.fcp_cmnd.task_mgmt_flags == 0)) { - rc = qlt_chk_qfull_thresh_hold(vha, atio); + rc = qlt_chk_qfull_thresh_hold(vha, atio, ha_locked); if (rc != 0) { tgt->atio_irq_cmd_count--; return; @@ -5256,7 +5262,7 @@ static void qlt_response_pkt(struct scsi_qla_host *vha, response_t *pkt) break; } - rc = qlt_chk_qfull_thresh_hold(vha, atio); + rc = qlt_chk_qfull_thresh_hold(vha, atio, true); if (rc != 0) { tgt->irq_cmd_count--; return; -- cgit v1.2.3 From 8f6fc8d4e7ae2347d6261d11a7eb2b247d2954d8 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 15 Mar 2017 09:48:46 -0700 Subject: qla2xxx: Fix inadequate lock protection for ABTS. Normally, ABTS is sent to Target Core as Task MGMT command. In the case of error, qla2xxx needs to send response, hardware_lock is required to prevent request queue corruption. Cc: Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_target.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index a463bcc57902..a78c3e9bcb57 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -130,6 +130,9 @@ static void qlt_send_term_imm_notif(struct scsi_qla_host *vha, static struct fc_port *qlt_create_sess(struct scsi_qla_host *vha, fc_port_t *fcport, bool local); void qlt_unreg_sess(struct fc_port *sess); +static void qlt_24xx_handle_abts(struct scsi_qla_host *, + struct abts_recv_from_24xx *); + /* * Global Variables */ @@ -389,6 +392,8 @@ static bool qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha, (struct abts_recv_from_24xx *)atio; struct scsi_qla_host *host = qlt_find_host_by_vp_idx(vha, entry->vp_index); + unsigned long flags; + if (unlikely(!host)) { ql_dbg(ql_dbg_tgt, vha, 0xffff, "qla_target(%d): Response pkt (ABTS_RECV_24XX) " @@ -396,9 +401,12 @@ static bool qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha, vha->vp_idx, entry->vp_index); break; } - qlt_response_pkt(host, (response_t *)atio); + if (!ha_locked) + spin_lock_irqsave(&host->hw->hardware_lock, flags); + qlt_24xx_handle_abts(host, (struct abts_recv_from_24xx *)atio); + if (!ha_locked) + spin_unlock_irqrestore(&host->hw->hardware_lock, flags); break; - } /* case PUREX_IOCB_TYPE: ql2xmvasynctoatio */ -- cgit v1.2.3 From f159b3c7cd45c550d0f73806451a10b6b6bc08ae Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 15 Mar 2017 09:48:47 -0700 Subject: qla2xxx: Fix sess_lock & hardware_lock lock order problem. The main lock that needs to be held for CMD or TMR submission to upper layer is the sess_lock. The sess_lock is used to serialize cmd submission and session deletion. The addition of hardware_lock being held is not necessary. This patch removes hardware_lock dependency from CMD/TMR submission. Use hardware_lock only for error response in this case. Path1 CPU0 CPU1 ---- ---- lock(&(&ha->tgt.sess_lock)->rlock); lock(&(&ha->hardware_lock)->rlock); lock(&(&ha->tgt.sess_lock)->rlock); lock(&(&ha->hardware_lock)->rlock); Path2/deadlock *** DEADLOCK *** Call Trace: dump_stack+0x85/0xc2 print_circular_bug+0x1e3/0x250 __lock_acquire+0x1425/0x1620 lock_acquire+0xbf/0x210 _raw_spin_lock_irqsave+0x53/0x70 qlt_sess_work_fn+0x21d/0x480 [qla2xxx] process_one_work+0x1f4/0x6e0 Cc: Cc: Bart Van Assche Reported-by: Bart Van Assche Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_target.c | 41 +++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index a78c3e9bcb57..989f931af156 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -5727,30 +5727,23 @@ static void qlt_abort_work(struct qla_tgt *tgt, } } - spin_lock_irqsave(&ha->hardware_lock, flags); - - if (tgt->tgt_stop) - goto out_term; - rc = __qlt_24xx_handle_abts(vha, &prm->abts, sess); + ha->tgt.tgt_ops->put_sess(sess); + spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2); + if (rc != 0) goto out_term; - spin_unlock_irqrestore(&ha->hardware_lock, flags); - if (sess) - ha->tgt.tgt_ops->put_sess(sess); - spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2); return; out_term2: - spin_lock_irqsave(&ha->hardware_lock, flags); + if (sess) + ha->tgt.tgt_ops->put_sess(sess); + spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2); out_term: + spin_lock_irqsave(&ha->hardware_lock, flags); qlt_24xx_send_abts_resp(vha, &prm->abts, FCP_TMF_REJECTED, false); spin_unlock_irqrestore(&ha->hardware_lock, flags); - - if (sess) - ha->tgt.tgt_ops->put_sess(sess); - spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2); } static void qlt_tmr_work(struct qla_tgt *tgt, @@ -5770,7 +5763,7 @@ static void qlt_tmr_work(struct qla_tgt *tgt, spin_lock_irqsave(&ha->tgt.sess_lock, flags); if (tgt->tgt_stop) - goto out_term; + goto out_term2; s_id = prm->tm_iocb2.u.isp24.fcp_hdr.s_id; sess = ha->tgt.tgt_ops->find_sess_by_s_id(vha, s_id); @@ -5782,11 +5775,11 @@ static void qlt_tmr_work(struct qla_tgt *tgt, spin_lock_irqsave(&ha->tgt.sess_lock, flags); if (!sess) - goto out_term; + goto out_term2; } else { if (sess->deleted) { sess = NULL; - goto out_term; + goto out_term2; } if (!kref_get_unless_zero(&sess->sess_kref)) { @@ -5794,7 +5787,7 @@ static void qlt_tmr_work(struct qla_tgt *tgt, "%s: kref_get fail %8phC\n", __func__, sess->port_name); sess = NULL; - goto out_term; + goto out_term2; } } @@ -5804,17 +5797,19 @@ static void qlt_tmr_work(struct qla_tgt *tgt, unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun); rc = qlt_issue_task_mgmt(sess, unpacked_lun, fn, iocb, 0); - if (rc != 0) - goto out_term; - ha->tgt.tgt_ops->put_sess(sess); spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); + + if (rc != 0) + goto out_term; return; +out_term2: + if (sess) + ha->tgt.tgt_ops->put_sess(sess); + spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); out_term: qlt_send_term_exchange(vha, NULL, &prm->tm_iocb2, 1, 0); - ha->tgt.tgt_ops->put_sess(sess); - spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); } static void qlt_sess_work_fn(struct work_struct *work) -- cgit v1.2.3 From 5b33469a055c77001fd2c62b0f985c991b0e5b65 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 15 Mar 2017 09:48:48 -0700 Subject: qla2xxx: Allow relogin to proceed if remote login did not finish If the remote port have started the login process, then the PLOGI and PRLI should be back to back. Driver will allow the remote port to complete the process. For the case where the remote port decide to back off from sending PRLI, this local port sets an expiration timer for the PRLI. Once the expiration time passes, the relogin retry logic is allowed to go through and perform login with the remote port. Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_def.h | 2 ++ drivers/scsi/qla2xxx/qla_init.c | 12 ++++++++++-- drivers/scsi/qla2xxx/qla_isr.c | 25 +++++++++++++++++++------ drivers/scsi/qla2xxx/qla_target.c | 1 + 4 files changed, 32 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 8662ef4192db..56cd45fc600a 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -2300,6 +2300,8 @@ typedef struct fc_port { struct ct_sns_desc ct_desc; enum discovery_state disc_state; enum login_state fw_login_state; + unsigned long plogi_nack_done_deadline; + u32 login_gen, last_login_gen; u32 rscn_gen, last_rscn_gen; u32 chip_reset; diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 9f3740c68cc8..a7865a5d556d 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -876,10 +876,14 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport) fcport->login_retry--; if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) || - (fcport->fw_login_state == DSC_LS_PLOGI_COMP) || (fcport->fw_login_state == DSC_LS_PRLI_PEND)) return 0; + if (fcport->fw_login_state == DSC_LS_PLOGI_COMP) { + if (time_before_eq(jiffies, fcport->plogi_nack_done_deadline)) + return 0; + } + /* for pure Target Mode. Login will not be initiated */ if (vha->host->active_mode == MODE_TARGET) return 0; @@ -1041,10 +1045,14 @@ void qla24xx_handle_relogin_event(scsi_qla_host_t *vha, fcport->flags); if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) || - (fcport->fw_login_state == DSC_LS_PLOGI_COMP) || (fcport->fw_login_state == DSC_LS_PRLI_PEND)) return; + if (fcport->fw_login_state == DSC_LS_PLOGI_COMP) { + if (time_before_eq(jiffies, fcport->plogi_nack_done_deadline)) + return; + } + if (fcport->flags & FCF_ASYNC_SENT) { fcport->login_retry++; set_bit(RELOGIN_NEEDED, &vha->dpc_flags); diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 3c66ea29de27..b2c6da752edd 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -1620,9 +1620,9 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req, QLA_LOGIO_LOGIN_RETRIED : 0; if (logio->entry_status) { ql_log(ql_log_warn, fcport->vha, 0x5034, - "Async-%s error entry - hdl=%x" + "Async-%s error entry - %8phC hdl=%x" "portid=%02x%02x%02x entry-status=%x.\n", - type, sp->handle, fcport->d_id.b.domain, + type, fcport->port_name, sp->handle, fcport->d_id.b.domain, fcport->d_id.b.area, fcport->d_id.b.al_pa, logio->entry_status); ql_dump_buffer(ql_dbg_async + ql_dbg_buffer, vha, 0x504d, @@ -1633,8 +1633,9 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req, if (le16_to_cpu(logio->comp_status) == CS_COMPLETE) { ql_dbg(ql_dbg_async, fcport->vha, 0x5036, - "Async-%s complete - hdl=%x portid=%02x%02x%02x " - "iop0=%x.\n", type, sp->handle, fcport->d_id.b.domain, + "Async-%s complete - %8phC hdl=%x portid=%02x%02x%02x " + "iop0=%x.\n", type, fcport->port_name, sp->handle, + fcport->d_id.b.domain, fcport->d_id.b.area, fcport->d_id.b.al_pa, le32_to_cpu(logio->io_parameter[0])); @@ -1674,6 +1675,17 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req, case LSC_SCODE_NPORT_USED: data[0] = MBS_LOOP_ID_USED; break; + case LSC_SCODE_CMD_FAILED: + if (iop[1] == 0x0606) { + /* + * PLOGI/PRLI Completed. We must have Recv PLOGI/PRLI, + * Target side acked. + */ + data[0] = MBS_COMMAND_COMPLETE; + goto logio_done; + } + data[0] = MBS_COMMAND_ERROR; + break; case LSC_SCODE_NOXCB: vha->hw->exch_starvation++; if (vha->hw->exch_starvation > 5) { @@ -1695,8 +1707,9 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req, } ql_dbg(ql_dbg_async, fcport->vha, 0x5037, - "Async-%s failed - hdl=%x portid=%02x%02x%02x comp=%x " - "iop0=%x iop1=%x.\n", type, sp->handle, fcport->d_id.b.domain, + "Async-%s failed - %8phC hdl=%x portid=%02x%02x%02x comp=%x " + "iop0=%x iop1=%x.\n", type, fcport->port_name, + sp->handle, fcport->d_id.b.domain, fcport->d_id.b.area, fcport->d_id.b.al_pa, le16_to_cpu(logio->comp_status), le32_to_cpu(logio->io_parameter[0]), diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 989f931af156..925d9b858b24 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -562,6 +562,7 @@ void qla2x00_async_nack_sp_done(void *s, int res) sp->fcport->login_gen++; sp->fcport->fw_login_state = DSC_LS_PLOGI_COMP; sp->fcport->logout_on_delete = 1; + sp->fcport->plogi_nack_done_deadline = jiffies + HZ; break; case SRB_NACK_PRLI: -- cgit v1.2.3 From be25152c0d9e236076323abbe9def9714234b761 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 15 Mar 2017 09:48:49 -0700 Subject: qla2xxx: Improve T10-DIF/PI handling in driver. Add routines to support T10 DIF tag. Signed-off-by: Quinn Tran Signed-off-by: Anil Gurumurthy Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_dbg.h | 1 + drivers/scsi/qla2xxx/qla_def.h | 10 + drivers/scsi/qla2xxx/qla_gbl.h | 6 +- drivers/scsi/qla2xxx/qla_iocb.c | 13 +- drivers/scsi/qla2xxx/qla_target.c | 540 ++++++++++++++++++++++--------------- drivers/scsi/qla2xxx/qla_target.h | 38 ++- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 49 ++-- 7 files changed, 406 insertions(+), 251 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h index e1fc4e66966a..c6bffe929fe7 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.h +++ b/drivers/scsi/qla2xxx/qla_dbg.h @@ -348,6 +348,7 @@ ql_log_pci(uint32_t, struct pci_dev *pdev, int32_t, const char *fmt, ...); #define ql_dbg_tgt 0x00004000 /* Target mode */ #define ql_dbg_tgt_mgt 0x00002000 /* Target mode management */ #define ql_dbg_tgt_tmr 0x00001000 /* Target mode task management */ +#define ql_dbg_tgt_dif 0x00000800 /* Target mode dif */ extern int qla27xx_dump_mpi_ram(struct qla_hw_data *, uint32_t, uint32_t *, uint32_t, void **); diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 56cd45fc600a..9d1d3dcf1c87 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -3127,6 +3127,16 @@ struct bidi_statistics { unsigned long long transfer_bytes; }; +struct qla_tc_param { + struct scsi_qla_host *vha; + uint32_t blk_sz; + uint32_t bufflen; + struct scatterlist *sg; + struct scatterlist *prot_sg; + struct crc_context *ctx; + uint8_t *ctx_dsd_alloced; +}; + /* Multi queue support */ #define MBC_INITIALIZE_MULTIQ 0x1f #define QLA_QUE_PAGE 0X1000 diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index b3d6441d1d90..ca6f122e5865 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -256,11 +256,11 @@ extern unsigned long qla2x00_get_async_timeout(struct scsi_qla_host *); extern void *qla2x00_alloc_iocbs(scsi_qla_host_t *, srb_t *); extern int qla2x00_issue_marker(scsi_qla_host_t *, int); extern int qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *, srb_t *, - uint32_t *, uint16_t, struct qla_tgt_cmd *); + uint32_t *, uint16_t, struct qla_tc_param *); extern int qla24xx_walk_and_build_sglist(struct qla_hw_data *, srb_t *, - uint32_t *, uint16_t, struct qla_tgt_cmd *); + uint32_t *, uint16_t, struct qla_tc_param *); extern int qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *, srb_t *, - uint32_t *, uint16_t, struct qla_tgt_cmd *); + uint32_t *, uint16_t, struct qla_tc_param *); extern int qla24xx_get_one_block_sg(uint32_t, struct qla2_sgx *, uint32_t *); extern int qla24xx_configure_prot_mode(srb_t *, uint16_t *); extern int qla24xx_build_scsi_crc_2_iocbs(srb_t *, diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 535079280288..ea027f6a7fd4 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -889,7 +889,7 @@ qla24xx_get_one_block_sg(uint32_t blk_sz, struct qla2_sgx *sgx, int qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *ha, srb_t *sp, - uint32_t *dsd, uint16_t tot_dsds, struct qla_tgt_cmd *tc) + uint32_t *dsd, uint16_t tot_dsds, struct qla_tc_param *tc) { void *next_dsd; uint8_t avail_dsds = 0; @@ -898,7 +898,6 @@ qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *ha, srb_t *sp, struct scatterlist *sg_prot; uint32_t *cur_dsd = dsd; uint16_t used_dsds = tot_dsds; - uint32_t prot_int; /* protection interval */ uint32_t partial; struct qla2_sgx sgx; @@ -966,7 +965,7 @@ alloc_and_fill: } else { list_add_tail(&dsd_ptr->list, &(tc->ctx->dsd_list)); - tc->ctx_dsd_alloced = 1; + *tc->ctx_dsd_alloced = 1; } @@ -1005,7 +1004,7 @@ alloc_and_fill: int qla24xx_walk_and_build_sglist(struct qla_hw_data *ha, srb_t *sp, uint32_t *dsd, - uint16_t tot_dsds, struct qla_tgt_cmd *tc) + uint16_t tot_dsds, struct qla_tc_param *tc) { void *next_dsd; uint8_t avail_dsds = 0; @@ -1066,7 +1065,7 @@ qla24xx_walk_and_build_sglist(struct qla_hw_data *ha, srb_t *sp, uint32_t *dsd, } else { list_add_tail(&dsd_ptr->list, &(tc->ctx->dsd_list)); - tc->ctx_dsd_alloced = 1; + *tc->ctx_dsd_alloced = 1; } /* add new list to cmd iocb or last list */ @@ -1092,7 +1091,7 @@ qla24xx_walk_and_build_sglist(struct qla_hw_data *ha, srb_t *sp, uint32_t *dsd, int qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *ha, srb_t *sp, - uint32_t *dsd, uint16_t tot_dsds, struct qla_tgt_cmd *tc) + uint32_t *dsd, uint16_t tot_dsds, struct qla_tc_param *tc) { void *next_dsd; uint8_t avail_dsds = 0; @@ -1158,7 +1157,7 @@ qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *ha, srb_t *sp, } else { list_add_tail(&dsd_ptr->list, &(tc->ctx->dsd_list)); - tc->ctx_dsd_alloced = 1; + *tc->ctx_dsd_alloced = 1; } /* add new list to cmd iocb or last list */ diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 925d9b858b24..532004981dbd 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -143,6 +143,20 @@ static struct workqueue_struct *qla_tgt_wq; static DEFINE_MUTEX(qla_tgt_mutex); static LIST_HEAD(qla_tgt_glist); +static const char *prot_op_str(u32 prot_op) +{ + switch (prot_op) { + case TARGET_PROT_NORMAL: return "NORMAL"; + case TARGET_PROT_DIN_INSERT: return "DIN_INSERT"; + case TARGET_PROT_DOUT_INSERT: return "DOUT_INSERT"; + case TARGET_PROT_DIN_STRIP: return "DIN_STRIP"; + case TARGET_PROT_DOUT_STRIP: return "DOUT_STRIP"; + case TARGET_PROT_DIN_PASS: return "DIN_PASS"; + case TARGET_PROT_DOUT_PASS: return "DOUT_PASS"; + default: return "UNKNOWN"; + } +} + /* This API intentionally takes dest as a parameter, rather than returning * int value to avoid caller forgetting to issue wmb() after the store */ void qlt_do_generation_tick(struct scsi_qla_host *vha, int *dest) @@ -2022,6 +2036,70 @@ void qlt_free_mcmd(struct qla_tgt_mgmt_cmd *mcmd) } EXPORT_SYMBOL(qlt_free_mcmd); +/* + * ha->hardware_lock supposed to be held on entry. Might drop it, then + * reacquire + */ +void qlt_send_resp_ctio(scsi_qla_host_t *vha, struct qla_tgt_cmd *cmd, + uint8_t scsi_status, uint8_t sense_key, uint8_t asc, uint8_t ascq) +{ + struct atio_from_isp *atio = &cmd->atio; + struct ctio7_to_24xx *ctio; + uint16_t temp; + + ql_dbg(ql_dbg_tgt_dif, vha, 0x3066, + "Sending response CTIO7 (vha=%p, atio=%p, scsi_status=%02x, " + "sense_key=%02x, asc=%02x, ascq=%02x", + vha, atio, scsi_status, sense_key, asc, ascq); + + ctio = (struct ctio7_to_24xx *)qla2x00_alloc_iocbs(vha, NULL); + if (!ctio) { + ql_dbg(ql_dbg_async, vha, 0x3067, + "qla2x00t(%ld): %s failed: unable to allocate request packet", + vha->host_no, __func__); + goto out; + } + + ctio->entry_type = CTIO_TYPE7; + ctio->entry_count = 1; + ctio->handle = QLA_TGT_SKIP_HANDLE; + ctio->nport_handle = cmd->sess->loop_id; + ctio->timeout = cpu_to_le16(QLA_TGT_TIMEOUT); + ctio->vp_index = vha->vp_idx; + ctio->initiator_id[0] = atio->u.isp24.fcp_hdr.s_id[2]; + ctio->initiator_id[1] = atio->u.isp24.fcp_hdr.s_id[1]; + ctio->initiator_id[2] = atio->u.isp24.fcp_hdr.s_id[0]; + ctio->exchange_addr = atio->u.isp24.exchange_addr; + ctio->u.status1.flags = (atio->u.isp24.attr << 9) | + cpu_to_le16(CTIO7_FLAGS_STATUS_MODE_1 | CTIO7_FLAGS_SEND_STATUS); + temp = be16_to_cpu(atio->u.isp24.fcp_hdr.ox_id); + ctio->u.status1.ox_id = cpu_to_le16(temp); + ctio->u.status1.scsi_status = + cpu_to_le16(SS_RESPONSE_INFO_LEN_VALID | scsi_status); + ctio->u.status1.response_len = cpu_to_le16(18); + ctio->u.status1.residual = cpu_to_le32(get_datalen_for_atio(atio)); + + if (ctio->u.status1.residual != 0) + ctio->u.status1.scsi_status |= + cpu_to_le16(SS_RESIDUAL_UNDER); + + /* Response code and sense key */ + put_unaligned_le32(((0x70 << 24) | (sense_key << 8)), + (&ctio->u.status1.sense_data)[0]); + /* Additional sense length */ + put_unaligned_le32(0x0a, (&ctio->u.status1.sense_data)[1]); + /* ASC and ASCQ */ + put_unaligned_le32(((asc << 24) | (ascq << 16)), + (&ctio->u.status1.sense_data)[3]); + + /* Memory Barrier */ + wmb(); + + qla2x00_start_iocbs(vha, vha->req); +out: + return; +} + /* callback from target fabric module code */ void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *mcmd) { @@ -2270,7 +2348,7 @@ static int qlt_24xx_build_ctio_pkt(struct qla_tgt_prm *prm, */ return -EAGAIN; } else - ha->tgt.cmds[h-1] = prm->cmd; + ha->tgt.cmds[h - 1] = prm->cmd; pkt->handle = h | CTIO_COMPLETION_HANDLE_MARK; pkt->nport_handle = prm->cmd->loop_id; @@ -2400,6 +2478,50 @@ static inline int qlt_has_data(struct qla_tgt_cmd *cmd) return cmd->bufflen > 0; } +static void qlt_print_dif_err(struct qla_tgt_prm *prm) +{ + struct qla_tgt_cmd *cmd; + struct scsi_qla_host *vha; + + /* asc 0x10=dif error */ + if (prm->sense_buffer && (prm->sense_buffer[12] == 0x10)) { + cmd = prm->cmd; + vha = cmd->vha; + /* ASCQ */ + switch (prm->sense_buffer[13]) { + case 1: + ql_dbg(ql_dbg_tgt_dif, vha, 0xffff, + "BE detected Guard TAG ERR: lba[0x%llx|%lld] len[0x%x] " + "se_cmd=%p tag[%x]", + cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd, + cmd->atio.u.isp24.exchange_addr); + break; + case 2: + ql_dbg(ql_dbg_tgt_dif, vha, 0xffff, + "BE detected APP TAG ERR: lba[0x%llx|%lld] len[0x%x] " + "se_cmd=%p tag[%x]", + cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd, + cmd->atio.u.isp24.exchange_addr); + break; + case 3: + ql_dbg(ql_dbg_tgt_dif, vha, 0xffff, + "BE detected REF TAG ERR: lba[0x%llx|%lld] len[0x%x] " + "se_cmd=%p tag[%x]", + cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd, + cmd->atio.u.isp24.exchange_addr); + break; + default: + ql_dbg(ql_dbg_tgt_dif, vha, 0xffff, + "BE detected Dif ERR: lba[%llx|%lld] len[%x] " + "se_cmd=%p tag[%x]", + cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd, + cmd->atio.u.isp24.exchange_addr); + break; + } + ql_dump_buffer(ql_dbg_tgt_dif, vha, 0xffff, cmd->cdb, 16); + } +} + /* * Called without ha->hardware_lock held */ @@ -2521,18 +2643,9 @@ skip_explict_conf: for (i = 0; i < prm->sense_buffer_len/4; i++) ((uint32_t *)ctio->u.status1.sense_data)[i] = cpu_to_be32(((uint32_t *)prm->sense_buffer)[i]); -#if 0 - if (unlikely((prm->sense_buffer_len % 4) != 0)) { - static int q; - if (q < 10) { - ql_dbg(ql_dbg_tgt, vha, 0xe04f, - "qla_target(%d): %d bytes of sense " - "lost", prm->tgt->ha->vp_idx, - prm->sense_buffer_len % 4); - q++; - } - } -#endif + + qlt_print_dif_err(prm); + } else { ctio->u.status1.flags &= ~cpu_to_le16(CTIO7_FLAGS_STATUS_MODE_0); @@ -2546,19 +2659,9 @@ skip_explict_conf: /* Sense with len > 24, is it possible ??? */ } - - -/* diff */ static inline int qlt_hba_err_chk_enabled(struct se_cmd *se_cmd) { - /* - * Uncomment when corresponding SCSI changes are done. - * - if (!sp->cmd->prot_chk) - return 0; - * - */ switch (se_cmd->prot_op) { case TARGET_PROT_DOUT_INSERT: case TARGET_PROT_DIN_STRIP: @@ -2579,16 +2682,38 @@ qlt_hba_err_chk_enabled(struct se_cmd *se_cmd) return 0; } +static inline int +qla_tgt_ref_mask_check(struct se_cmd *se_cmd) +{ + switch (se_cmd->prot_op) { + case TARGET_PROT_DIN_INSERT: + case TARGET_PROT_DOUT_INSERT: + case TARGET_PROT_DIN_STRIP: + case TARGET_PROT_DOUT_STRIP: + case TARGET_PROT_DIN_PASS: + case TARGET_PROT_DOUT_PASS: + return 1; + default: + return 0; + } + return 0; +} + /* - * qla24xx_set_t10dif_tags_from_cmd - Extract Ref and App tags from SCSI command - * + * qla_tgt_set_dif_tags - Extract Ref and App tags from SCSI command */ -static inline void -qlt_set_t10dif_tags(struct se_cmd *se_cmd, struct crc_context *ctx) +static void +qla_tgt_set_dif_tags(struct qla_tgt_cmd *cmd, struct crc_context *ctx, + uint16_t *pfw_prot_opts) { + struct se_cmd *se_cmd = &cmd->se_cmd; uint32_t lba = 0xffffffff & se_cmd->t_task_lba; + scsi_qla_host_t *vha = cmd->tgt->vha; + struct qla_hw_data *ha = vha->hw; + uint32_t t32 = 0; - /* wait til Mode Sense/Select cmd, modepage Ah, subpage 2 + /* + * wait till Mode Sense/Select cmd, modepage Ah, subpage 2 * have been immplemented by TCM, before AppTag is avail. * Look for modesense_handlers[] */ @@ -2596,65 +2721,73 @@ qlt_set_t10dif_tags(struct se_cmd *se_cmd, struct crc_context *ctx) ctx->app_tag_mask[0] = 0x0; ctx->app_tag_mask[1] = 0x0; + if (IS_PI_UNINIT_CAPABLE(ha)) { + if ((se_cmd->prot_type == TARGET_DIF_TYPE1_PROT) || + (se_cmd->prot_type == TARGET_DIF_TYPE2_PROT)) + *pfw_prot_opts |= PO_DIS_VALD_APP_ESC; + else if (se_cmd->prot_type == TARGET_DIF_TYPE3_PROT) + *pfw_prot_opts |= PO_DIS_VALD_APP_REF_ESC; + } + + t32 = ha->tgt.tgt_ops->get_dif_tags(cmd, pfw_prot_opts); + switch (se_cmd->prot_type) { case TARGET_DIF_TYPE0_PROT: /* - * No check for ql2xenablehba_err_chk, as it would be an - * I/O error if hba tag generation is not done. + * No check for ql2xenablehba_err_chk, as it + * would be an I/O error if hba tag generation + * is not done. */ ctx->ref_tag = cpu_to_le32(lba); - - if (!qlt_hba_err_chk_enabled(se_cmd)) - break; - /* enable ALL bytes of the ref tag */ ctx->ref_tag_mask[0] = 0xff; ctx->ref_tag_mask[1] = 0xff; ctx->ref_tag_mask[2] = 0xff; ctx->ref_tag_mask[3] = 0xff; break; - /* - * For TYpe 1 protection: 16 bit GUARD tag, 32 bit REF tag, and - * 16 bit app tag. - */ case TARGET_DIF_TYPE1_PROT: - ctx->ref_tag = cpu_to_le32(lba); - - if (!qlt_hba_err_chk_enabled(se_cmd)) - break; - - /* enable ALL bytes of the ref tag */ - ctx->ref_tag_mask[0] = 0xff; - ctx->ref_tag_mask[1] = 0xff; - ctx->ref_tag_mask[2] = 0xff; - ctx->ref_tag_mask[3] = 0xff; - break; - /* - * For TYPE 2 protection: 16 bit GUARD + 32 bit REF tag has to - * match LBA in CDB + N - */ + /* + * For TYPE 1 protection: 16 bit GUARD tag, 32 bit + * REF tag, and 16 bit app tag. + */ + ctx->ref_tag = cpu_to_le32(lba); + if (!qla_tgt_ref_mask_check(se_cmd) || + !(ha->tgt.tgt_ops->chk_dif_tags(t32))) { + *pfw_prot_opts |= PO_DIS_REF_TAG_VALD; + break; + } + /* enable ALL bytes of the ref tag */ + ctx->ref_tag_mask[0] = 0xff; + ctx->ref_tag_mask[1] = 0xff; + ctx->ref_tag_mask[2] = 0xff; + ctx->ref_tag_mask[3] = 0xff; + break; case TARGET_DIF_TYPE2_PROT: - ctx->ref_tag = cpu_to_le32(lba); - - if (!qlt_hba_err_chk_enabled(se_cmd)) - break; - - /* enable ALL bytes of the ref tag */ - ctx->ref_tag_mask[0] = 0xff; - ctx->ref_tag_mask[1] = 0xff; - ctx->ref_tag_mask[2] = 0xff; - ctx->ref_tag_mask[3] = 0xff; - break; - - /* For Type 3 protection: 16 bit GUARD only */ + /* + * For TYPE 2 protection: 16 bit GUARD + 32 bit REF + * tag has to match LBA in CDB + N + */ + ctx->ref_tag = cpu_to_le32(lba); + if (!qla_tgt_ref_mask_check(se_cmd) || + !(ha->tgt.tgt_ops->chk_dif_tags(t32))) { + *pfw_prot_opts |= PO_DIS_REF_TAG_VALD; + break; + } + /* enable ALL bytes of the ref tag */ + ctx->ref_tag_mask[0] = 0xff; + ctx->ref_tag_mask[1] = 0xff; + ctx->ref_tag_mask[2] = 0xff; + ctx->ref_tag_mask[3] = 0xff; + break; case TARGET_DIF_TYPE3_PROT: - ctx->ref_tag_mask[0] = ctx->ref_tag_mask[1] = - ctx->ref_tag_mask[2] = ctx->ref_tag_mask[3] = 0x00; - break; + /* For TYPE 3 protection: 16 bit GUARD only */ + *pfw_prot_opts |= PO_DIS_REF_TAG_VALD; + ctx->ref_tag_mask[0] = ctx->ref_tag_mask[1] = + ctx->ref_tag_mask[2] = ctx->ref_tag_mask[3] = 0x00; + break; } } - static inline int qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha) { @@ -2673,6 +2806,7 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha) struct se_cmd *se_cmd = &cmd->se_cmd; uint32_t h; struct atio_from_isp *atio = &prm->cmd->atio; + struct qla_tc_param tc; uint16_t t16; ha = vha->hw; @@ -2698,16 +2832,15 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha) case TARGET_PROT_DIN_INSERT: case TARGET_PROT_DOUT_STRIP: transfer_length = data_bytes; - data_bytes += dif_bytes; + if (cmd->prot_sg_cnt) + data_bytes += dif_bytes; break; - case TARGET_PROT_DIN_STRIP: case TARGET_PROT_DOUT_INSERT: case TARGET_PROT_DIN_PASS: case TARGET_PROT_DOUT_PASS: transfer_length = data_bytes + dif_bytes; break; - default: BUG(); break; @@ -2743,7 +2876,6 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha) break; } - /* ---- PKT ---- */ /* Update entry type to indicate Command Type CRC_2 IOCB */ pkt->entry_type = CTIO_CRC2; @@ -2761,9 +2893,8 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha) } else ha->tgt.cmds[h-1] = prm->cmd; - pkt->handle = h | CTIO_COMPLETION_HANDLE_MARK; - pkt->nport_handle = prm->cmd->loop_id; + pkt->nport_handle = cpu_to_le16(prm->cmd->loop_id); pkt->timeout = cpu_to_le16(QLA_TGT_TIMEOUT); pkt->initiator_id[0] = atio->u.isp24.fcp_hdr.s_id[2]; pkt->initiator_id[1] = atio->u.isp24.fcp_hdr.s_id[1]; @@ -2784,12 +2915,10 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha) else if (cmd->dma_data_direction == DMA_FROM_DEVICE) pkt->flags = cpu_to_le16(CTIO7_FLAGS_DATA_OUT); - pkt->dseg_count = prm->tot_dsds; /* Fibre channel byte count */ pkt->transfer_length = cpu_to_le32(transfer_length); - /* ----- CRC context -------- */ /* Allocate CRC context from global pool */ @@ -2809,13 +2938,12 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha) /* Set handle */ crc_ctx_pkt->handle = pkt->handle; - qlt_set_t10dif_tags(se_cmd, crc_ctx_pkt); + qla_tgt_set_dif_tags(cmd, crc_ctx_pkt, &fw_prot_opts); pkt->crc_context_address[0] = cpu_to_le32(LSD(crc_ctx_dma)); pkt->crc_context_address[1] = cpu_to_le32(MSD(crc_ctx_dma)); pkt->crc_context_len = CRC_CONTEXT_LEN_FW; - if (!bundling) { cur_dsd = (uint32_t *) &crc_ctx_pkt->u.nobundling.data_address; } else { @@ -2836,16 +2964,24 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha) crc_ctx_pkt->byte_count = cpu_to_le32(data_bytes); crc_ctx_pkt->guard_seed = cpu_to_le16(0); + memset((uint8_t *)&tc, 0 , sizeof(tc)); + tc.vha = vha; + tc.blk_sz = cmd->blk_sz; + tc.bufflen = cmd->bufflen; + tc.sg = cmd->sg; + tc.prot_sg = cmd->prot_sg; + tc.ctx = crc_ctx_pkt; + tc.ctx_dsd_alloced = &cmd->ctx_dsd_alloced; /* Walks data segments */ pkt->flags |= cpu_to_le16(CTIO7_FLAGS_DSD_PTR); if (!bundling && prm->prot_seg_cnt) { if (qla24xx_walk_and_build_sglist_no_difb(ha, NULL, cur_dsd, - prm->tot_dsds, cmd)) + prm->tot_dsds, &tc)) goto crc_queuing_error; } else if (qla24xx_walk_and_build_sglist(ha, NULL, cur_dsd, - (prm->tot_dsds - prm->prot_seg_cnt), cmd)) + (prm->tot_dsds - prm->prot_seg_cnt), &tc)) goto crc_queuing_error; if (bundling && prm->prot_seg_cnt) { @@ -2854,18 +2990,18 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha) cur_dsd = (uint32_t *) &crc_ctx_pkt->u.bundling.dif_address; if (qla24xx_walk_and_build_prot_sglist(ha, NULL, cur_dsd, - prm->prot_seg_cnt, cmd)) + prm->prot_seg_cnt, &tc)) goto crc_queuing_error; } return QLA_SUCCESS; crc_queuing_error: /* Cleanup will be performed by the caller */ + vha->hw->tgt.cmds[h - 1] = NULL; return QLA_FUNCTION_FAILED; } - /* * Callback to setup response of xmit_type of QLA_TGT_XMIT_DATA and * * QLA_TGT_XMIT_STATUS for >= 24xx silicon @@ -3113,139 +3249,113 @@ EXPORT_SYMBOL(qlt_rdy_to_xfer); /* - * Checks the guard or meta-data for the type of error - * detected by the HBA. + * it is assumed either hardware_lock or qpair lock is held. */ -static inline int +static void qlt_handle_dif_error(struct scsi_qla_host *vha, struct qla_tgt_cmd *cmd, - struct ctio_crc_from_fw *sts) + struct ctio_crc_from_fw *sts) { uint8_t *ap = &sts->actual_dif[0]; uint8_t *ep = &sts->expected_dif[0]; - uint32_t e_ref_tag, a_ref_tag; - uint16_t e_app_tag, a_app_tag; - uint16_t e_guard, a_guard; uint64_t lba = cmd->se_cmd.t_task_lba; + uint8_t scsi_status, sense_key, asc, ascq; + unsigned long flags; - a_guard = be16_to_cpu(*(uint16_t *)(ap + 0)); - a_app_tag = be16_to_cpu(*(uint16_t *)(ap + 2)); - a_ref_tag = be32_to_cpu(*(uint32_t *)(ap + 4)); - - e_guard = be16_to_cpu(*(uint16_t *)(ep + 0)); - e_app_tag = be16_to_cpu(*(uint16_t *)(ep + 2)); - e_ref_tag = be32_to_cpu(*(uint32_t *)(ep + 4)); - - ql_dbg(ql_dbg_tgt, vha, 0xe075, - "iocb(s) %p Returned STATUS.\n", sts); - - ql_dbg(ql_dbg_tgt, vha, 0xf075, - "dif check TGT cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x]\n", - cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba, - a_ref_tag, e_ref_tag, a_app_tag, e_app_tag, a_guard, e_guard); - - /* - * Ignore sector if: - * For type 3: ref & app tag is all 'f's - * For type 0,1,2: app tag is all 'f's - */ - if ((a_app_tag == 0xffff) && - ((cmd->se_cmd.prot_type != TARGET_DIF_TYPE3_PROT) || - (a_ref_tag == 0xffffffff))) { - uint32_t blocks_done; - - /* 2TB boundary case covered automatically with this */ - blocks_done = e_ref_tag - (uint32_t)lba + 1; - cmd->se_cmd.bad_sector = e_ref_tag; - cmd->se_cmd.pi_err = 0; - ql_dbg(ql_dbg_tgt, vha, 0xf074, - "need to return scsi good\n"); - - /* Update protection tag */ - if (cmd->prot_sg_cnt) { - uint32_t i, k = 0, num_ent; - struct scatterlist *sg, *sgl; - + cmd->trc_flags |= TRC_DIF_ERR; - sgl = cmd->prot_sg; + cmd->a_guard = be16_to_cpu(*(uint16_t *)(ap + 0)); + cmd->a_app_tag = be16_to_cpu(*(uint16_t *)(ap + 2)); + cmd->a_ref_tag = be32_to_cpu(*(uint32_t *)(ap + 4)); - /* Patch the corresponding protection tags */ - for_each_sg(sgl, sg, cmd->prot_sg_cnt, i) { - num_ent = sg_dma_len(sg) / 8; - if (k + num_ent < blocks_done) { - k += num_ent; - continue; - } - k = blocks_done; - break; - } + cmd->e_guard = be16_to_cpu(*(uint16_t *)(ep + 0)); + cmd->e_app_tag = be16_to_cpu(*(uint16_t *)(ep + 2)); + cmd->e_ref_tag = be32_to_cpu(*(uint32_t *)(ep + 4)); - if (k != blocks_done) { - ql_log(ql_log_warn, vha, 0xf076, - "unexpected tag values tag:lba=%u:%llu)\n", - e_ref_tag, (unsigned long long)lba); - goto out; - } + ql_dbg(ql_dbg_tgt_dif, vha, 0xf075, + "%s: aborted %d state %d\n", __func__, cmd->aborted, cmd->state); -#if 0 - struct sd_dif_tuple *spt; - /* TODO: - * This section came from initiator. Is it valid here? - * should ulp be override with actual val??? - */ - spt = page_address(sg_page(sg)) + sg->offset; - spt += j; + scsi_status = sense_key = asc = ascq = 0; - spt->app_tag = 0xffff; - if (cmd->se_cmd.prot_type == SCSI_PROT_DIF_TYPE3) - spt->ref_tag = 0xffffffff; -#endif - } - - return 0; - } - - /* check guard */ - if (e_guard != a_guard) { - cmd->se_cmd.pi_err = TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED; - cmd->se_cmd.bad_sector = cmd->se_cmd.t_task_lba; - - ql_log(ql_log_warn, vha, 0xe076, - "Guard ERR: cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x] cmd=%p\n", - cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba, - a_ref_tag, e_ref_tag, a_app_tag, e_app_tag, - a_guard, e_guard, cmd); - goto out; + /* check appl tag */ + if (cmd->e_app_tag != cmd->a_app_tag) { + ql_dbg(ql_dbg_tgt_dif, vha, 0xffff, + "App Tag ERR: cdb[%x] lba[%llx %llx] blks[%x] [Actual|Expected] " + "Ref[%x|%x], App[%x|%x], " + "Guard [%x|%x] cmd=%p ox_id[%04x]", + cmd->cdb[0], lba, (lba+cmd->num_blks), cmd->num_blks, + cmd->a_ref_tag, cmd->e_ref_tag, + cmd->a_app_tag, cmd->e_app_tag, + cmd->a_guard, cmd->e_guard, + cmd, cmd->atio.u.isp24.fcp_hdr.ox_id); + + cmd->dif_err_code = DIF_ERR_APP; + scsi_status = SAM_STAT_CHECK_CONDITION; + sense_key = ABORTED_COMMAND; + asc = 0x10; + ascq = 0x2; } /* check ref tag */ - if (e_ref_tag != a_ref_tag) { - cmd->se_cmd.pi_err = TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED; - cmd->se_cmd.bad_sector = e_ref_tag; - - ql_log(ql_log_warn, vha, 0xe077, - "Ref Tag ERR: cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x] cmd=%p\n", - cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba, - a_ref_tag, e_ref_tag, a_app_tag, e_app_tag, - a_guard, e_guard, cmd); + if (cmd->e_ref_tag != cmd->a_ref_tag) { + ql_dbg(ql_dbg_tgt_dif, vha, 0xffff, + "Ref Tag ERR: cdb[%x] lba[%llx %llx] blks[%x] [Actual|Expected] " + "Ref[%x|%x], App[%x|%x], " + "Guard[%x|%x] cmd=%p ox_id[%04x] ", + cmd->cdb[0], lba, (lba+cmd->num_blks), cmd->num_blks, + cmd->a_ref_tag, cmd->e_ref_tag, + cmd->a_app_tag, cmd->e_app_tag, + cmd->a_guard, cmd->e_guard, + cmd, cmd->atio.u.isp24.fcp_hdr.ox_id); + + cmd->dif_err_code = DIF_ERR_REF; + scsi_status = SAM_STAT_CHECK_CONDITION; + sense_key = ABORTED_COMMAND; + asc = 0x10; + ascq = 0x3; goto out; } - /* check appl tag */ - if (e_app_tag != a_app_tag) { - cmd->se_cmd.pi_err = TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED; - cmd->se_cmd.bad_sector = cmd->se_cmd.t_task_lba; - - ql_log(ql_log_warn, vha, 0xe078, - "App Tag ERR: cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x] cmd=%p\n", - cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba, - a_ref_tag, e_ref_tag, a_app_tag, e_app_tag, - a_guard, e_guard, cmd); - goto out; + /* check guard */ + if (cmd->e_guard != cmd->a_guard) { + ql_dbg(ql_dbg_tgt_dif, vha, 0xffff, + "Guard ERR: cdb[%x] lba[%llx %llx] blks[%x] [Actual|Expected] " + "Ref[%x|%x], App[%x|%x], " + "Guard [%x|%x] cmd=%p ox_id[%04x]", + cmd->cdb[0], lba, (lba+cmd->num_blks), cmd->num_blks, + cmd->a_ref_tag, cmd->e_ref_tag, + cmd->a_app_tag, cmd->e_app_tag, + cmd->a_guard, cmd->e_guard, + cmd, cmd->atio.u.isp24.fcp_hdr.ox_id); + cmd->dif_err_code = DIF_ERR_GRD; + scsi_status = SAM_STAT_CHECK_CONDITION; + sense_key = ABORTED_COMMAND; + asc = 0x10; + ascq = 0x1; } out: - return 1; -} + switch (cmd->state) { + case QLA_TGT_STATE_NEED_DATA: + /* handle_data will load DIF error code */ + cmd->state = QLA_TGT_STATE_DATA_IN; + vha->hw->tgt.tgt_ops->handle_data(cmd); + break; + default: + spin_lock_irqsave(&cmd->cmd_lock, flags); + if (cmd->aborted) { + spin_unlock_irqrestore(&cmd->cmd_lock, flags); + vha->hw->tgt.tgt_ops->free_cmd(cmd); + break; + } + spin_unlock_irqrestore(&cmd->cmd_lock, flags); + qlt_send_resp_ctio(vha, cmd, scsi_status, sense_key, asc, ascq); + /* assume scsi status gets out on the wire. + * Will not wait for completion. + */ + vha->hw->tgt.tgt_ops->free_cmd(cmd); + break; + } +} /* If hardware_lock held on entry, might drop it, then reaquire */ /* This function sends the appropriate CTIO to ISP 2xxx or 24xx */ @@ -3552,6 +3662,16 @@ static int qlt_term_ctio_exchange(struct scsi_qla_host *vha, void *ctio, { int term = 0; + if (cmd->se_cmd.prot_op) + ql_dbg(ql_dbg_tgt_dif, vha, 0xffff, + "Term DIF cmd: lba[0x%llx|%lld] len[0x%x] " + "se_cmd=%p tag[%x] op %#x/%s", + cmd->lba, cmd->lba, + cmd->num_blks, &cmd->se_cmd, + cmd->atio.u.isp24.exchange_addr, + cmd->se_cmd.prot_op, + prot_op_str(cmd->se_cmd.prot_op)); + if (ctio != NULL) { struct ctio7_from_24xx *c = (struct ctio7_from_24xx *)ctio; term = !(c->flags & @@ -3769,32 +3889,15 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle, struct ctio_crc_from_fw *crc = (struct ctio_crc_from_fw *)ctio; ql_dbg(ql_dbg_tgt_mgt, vha, 0xf073, - "qla_target(%d): CTIO with DIF_ERROR status %x received (state %x, se_cmd %p) actual_dif[0x%llx] expect_dif[0x%llx]\n", + "qla_target(%d): CTIO with DIF_ERROR status %x " + "received (state %x, ulp_cmd %p) actual_dif[0x%llx] " + "expect_dif[0x%llx]\n", vha->vp_idx, status, cmd->state, se_cmd, *((u64 *)&crc->actual_dif[0]), *((u64 *)&crc->expected_dif[0])); - if (qlt_handle_dif_error(vha, cmd, ctio)) { - if (cmd->state == QLA_TGT_STATE_NEED_DATA) { - /* scsi Write/xfer rdy complete */ - goto skip_term; - } else { - /* scsi read/xmit respond complete - * call handle dif to send scsi status - * rather than terminate exchange. - */ - cmd->state = QLA_TGT_STATE_PROCESSED; - ha->tgt.tgt_ops->handle_dif_err(cmd); - return; - } - } else { - /* Need to generate a SCSI good completion. - * because FW did not send scsi status. - */ - status = 0; - goto skip_term; - } - break; + qlt_handle_dif_error(vha, cmd, ctio); + return; } default: ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05b, @@ -3817,7 +3920,6 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle, return; } } -skip_term: if (cmd->state == QLA_TGT_STATE_PROCESSED) { cmd->trc_flags |= TRC_CTIO_DONE; diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index a7f90dcaae37..c35f889b94a6 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -378,6 +378,14 @@ static inline void adjust_corrupted_atio(struct atio_from_isp *atio) atio->u.isp24.fcp_cmnd.add_cdb_len = 0; } +static inline int get_datalen_for_atio(struct atio_from_isp *atio) +{ + int len = atio->u.isp24.fcp_cmnd.add_cdb_len; + + return (be32_to_cpu(get_unaligned((uint32_t *) + &atio->u.isp24.fcp_cmnd.add_cdb[len * 4]))); +} + #define CTIO_TYPE7 0x12 /* Continue target I/O entry (for 24xx) */ /* @@ -667,7 +675,6 @@ struct qla_tgt_func_tmpl { int (*handle_cmd)(struct scsi_qla_host *, struct qla_tgt_cmd *, unsigned char *, uint32_t, int, int, int); void (*handle_data)(struct qla_tgt_cmd *); - void (*handle_dif_err)(struct qla_tgt_cmd *); int (*handle_tmr)(struct qla_tgt_mgmt_cmd *, uint32_t, uint16_t, uint32_t); void (*free_cmd)(struct qla_tgt_cmd *); @@ -684,6 +691,8 @@ struct qla_tgt_func_tmpl { void (*clear_nacl_from_fcport_map)(struct fc_port *); void (*put_sess)(struct fc_port *); void (*shutdown_sess)(struct fc_port *); + int (*get_dif_tags)(struct qla_tgt_cmd *cmd, uint16_t *pfw_prot_opts); + int (*chk_dif_tags)(uint32_t tag); }; int qla2x00_wait_for_hba_online(struct scsi_qla_host *); @@ -720,8 +729,8 @@ int qla2x00_wait_for_hba_online(struct scsi_qla_host *); #define QLA_TGT_ABORT_ALL 0xFFFE #define QLA_TGT_NEXUS_LOSS_SESS 0xFFFD #define QLA_TGT_NEXUS_LOSS 0xFFFC -#define QLA_TGT_ABTS 0xFFFB -#define QLA_TGT_2G_ABORT_TASK 0xFFFA +#define QLA_TGT_ABTS 0xFFFB +#define QLA_TGT_2G_ABORT_TASK 0xFFFA /* Notify Acknowledge flags */ #define NOTIFY_ACK_RES_COUNT BIT_8 @@ -845,6 +854,7 @@ enum trace_flags { TRC_CMD_FREE = BIT_17, TRC_DATA_IN = BIT_18, TRC_ABORT = BIT_19, + TRC_DIF_ERR = BIT_20, }; struct qla_tgt_cmd { @@ -862,7 +872,6 @@ struct qla_tgt_cmd { unsigned int sg_mapped:1; unsigned int free_sg:1; unsigned int write_data_transferred:1; - unsigned int ctx_dsd_alloced:1; unsigned int q_full:1; unsigned int term_exchg:1; unsigned int cmd_sent_to_fw:1; @@ -885,11 +894,25 @@ struct qla_tgt_cmd { struct list_head cmd_list; struct atio_from_isp atio; - /* t10dif */ + + uint8_t ctx_dsd_alloced; + + /* T10-DIF */ +#define DIF_ERR_NONE 0 +#define DIF_ERR_GRD 1 +#define DIF_ERR_REF 2 +#define DIF_ERR_APP 3 + int8_t dif_err_code; struct scatterlist *prot_sg; uint32_t prot_sg_cnt; - uint32_t blk_sz; + uint32_t blk_sz, num_blks; + uint8_t scsi_status, sense_key, asc, ascq; + struct crc_context *ctx; + uint8_t *cdb; + uint64_t lba; + uint16_t a_guard, e_guard, a_app_tag, e_app_tag; + uint32_t a_ref_tag, e_ref_tag; uint64_t jiffies_at_alloc; uint64_t jiffies_at_free; @@ -1053,4 +1076,7 @@ extern int qlt_free_qfull_cmds(struct scsi_qla_host *); extern void qlt_logo_completion_handler(fc_port_t *, int); extern void qlt_do_generation_tick(struct scsi_qla_host *, int *); +void qlt_send_resp_ctio(scsi_qla_host_t *, struct qla_tgt_cmd *, uint8_t, + uint8_t, uint8_t, uint8_t); + #endif /* __QLA_TARGET_H */ diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 8e8ab0fa9672..7443e4efa3ae 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -531,6 +531,24 @@ static void tcm_qla2xxx_handle_data_work(struct work_struct *work) return; } + switch (cmd->dif_err_code) { + case DIF_ERR_GRD: + cmd->se_cmd.pi_err = + TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED; + break; + case DIF_ERR_REF: + cmd->se_cmd.pi_err = + TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED; + break; + case DIF_ERR_APP: + cmd->se_cmd.pi_err = + TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED; + break; + case DIF_ERR_NONE: + default: + break; + } + if (cmd->se_cmd.pi_err) transport_generic_request_failure(&cmd->se_cmd, cmd->se_cmd.pi_err); @@ -555,25 +573,23 @@ static void tcm_qla2xxx_handle_data(struct qla_tgt_cmd *cmd) queue_work_on(smp_processor_id(), tcm_qla2xxx_free_wq, &cmd->work); } -static void tcm_qla2xxx_handle_dif_work(struct work_struct *work) +static int tcm_qla2xxx_chk_dif_tags(uint32_t tag) { - struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work); - - /* take an extra kref to prevent cmd free too early. - * need to wait for SCSI status/check condition to - * finish responding generate by transport_generic_request_failure. - */ - kref_get(&cmd->se_cmd.cmd_kref); - transport_generic_request_failure(&cmd->se_cmd, cmd->se_cmd.pi_err); + return 0; } -/* - * Called from qla_target.c:qlt_do_ctio_completion() - */ -static void tcm_qla2xxx_handle_dif_err(struct qla_tgt_cmd *cmd) +static int tcm_qla2xxx_dif_tags(struct qla_tgt_cmd *cmd, + uint16_t *pfw_prot_opts) { - INIT_WORK(&cmd->work, tcm_qla2xxx_handle_dif_work); - queue_work(tcm_qla2xxx_free_wq, &cmd->work); + struct se_cmd *se_cmd = &cmd->se_cmd; + + if (!(se_cmd->prot_checks & TARGET_DIF_CHECK_GUARD)) + *pfw_prot_opts |= PO_DISABLE_GUARD_CHECK; + + if (!(se_cmd->prot_checks & TARGET_DIF_CHECK_APPTAG)) + *pfw_prot_opts |= PO_DIS_APP_TAG_VALD; + + return 0; } /* @@ -1610,7 +1626,6 @@ static void tcm_qla2xxx_update_sess(struct fc_port *sess, port_id_t s_id, static struct qla_tgt_func_tmpl tcm_qla2xxx_template = { .handle_cmd = tcm_qla2xxx_handle_cmd, .handle_data = tcm_qla2xxx_handle_data, - .handle_dif_err = tcm_qla2xxx_handle_dif_err, .handle_tmr = tcm_qla2xxx_handle_tmr, .free_cmd = tcm_qla2xxx_free_cmd, .free_mcmd = tcm_qla2xxx_free_mcmd, @@ -1622,6 +1637,8 @@ static struct qla_tgt_func_tmpl tcm_qla2xxx_template = { .clear_nacl_from_fcport_map = tcm_qla2xxx_clear_nacl_from_fcport_map, .put_sess = tcm_qla2xxx_put_sess, .shutdown_sess = tcm_qla2xxx_shutdown_sess, + .get_dif_tags = tcm_qla2xxx_dif_tags, + .chk_dif_tags = tcm_qla2xxx_chk_dif_tags, }; static int tcm_qla2xxx_init_lport(struct tcm_qla2xxx_lport *lport) -- cgit v1.2.3 From 54b9993c8cf2d77c0f23be828a22e0817f742442 Mon Sep 17 00:00:00 2001 From: Anil Gurumurthy Date: Wed, 15 Mar 2017 09:48:50 -0700 Subject: qla2xxx: Export DIF stats via debugfs Signed-off-by: Anil Gurumurthy Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_def.h | 12 ++++++++++++ drivers/scsi/qla2xxx/qla_dfs.c | 15 +++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 9d1d3dcf1c87..8228dfac6a31 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -3108,6 +3108,16 @@ struct qla_chip_state_84xx { uint32_t gold_fw_version; }; +struct qla_dif_statistics { + uint64_t dif_input_bytes; + uint64_t dif_output_bytes; + uint64_t dif_input_requests; + uint64_t dif_output_requests; + uint32_t dif_guard_err; + uint32_t dif_ref_tag_err; + uint32_t dif_app_tag_err; +}; + struct qla_statistics { uint32_t total_isp_aborts; uint64_t input_bytes; @@ -3120,6 +3130,8 @@ struct qla_statistics { uint32_t stat_max_pend_cmds; uint32_t stat_max_qfull_cmds_alloc; uint32_t stat_max_qfull_cmds_dropped; + + struct qla_dif_statistics qla_dif_stats; }; struct bidi_statistics { diff --git a/drivers/scsi/qla2xxx/qla_dfs.c b/drivers/scsi/qla2xxx/qla_dfs.c index b48cce696bac..3b35905619b0 100644 --- a/drivers/scsi/qla2xxx/qla_dfs.c +++ b/drivers/scsi/qla2xxx/qla_dfs.c @@ -114,6 +114,21 @@ qla_dfs_tgt_counters_show(struct seq_file *s, void *unused) seq_printf(s, "num Q full sent = %lld\n", vha->tgt_counters.num_q_full_sent); + /* DIF stats */ + seq_printf(s, "DIF Inp Bytes = %lld\n", + vha->qla_stats.qla_dif_stats.dif_input_bytes); + seq_printf(s, "DIF Outp Bytes = %lld\n", + vha->qla_stats.qla_dif_stats.dif_output_bytes); + seq_printf(s, "DIF Inp Req = %lld\n", + vha->qla_stats.qla_dif_stats.dif_input_requests); + seq_printf(s, "DIF Outp Req = %lld\n", + vha->qla_stats.qla_dif_stats.dif_output_requests); + seq_printf(s, "DIF Guard err = %d\n", + vha->qla_stats.qla_dif_stats.dif_guard_err); + seq_printf(s, "DIF Ref tag err = %d\n", + vha->qla_stats.qla_dif_stats.dif_ref_tag_err); + seq_printf(s, "DIF App tag err = %d\n", + vha->qla_stats.qla_dif_stats.dif_app_tag_err); return 0; } -- cgit v1.2.3 From f1443eebca7792b3b8b41b27652d67ddc5d31fa2 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 15 Mar 2017 09:48:51 -0700 Subject: qla2xxx: Add async new target notification Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_target.c | 6 +++--- drivers/scsi/qla2xxx/qla_target.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 532004981dbd..563116188c43 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -6005,13 +6005,13 @@ int qlt_add_target(struct qla_hw_data *ha, struct scsi_qla_host *base_vha) tgt->datasegs_per_cmd = QLA_TGT_DATASEGS_PER_CMD_24XX; tgt->datasegs_per_cont = QLA_TGT_DATASEGS_PER_CONT_24XX; - if (base_vha->fc_vport) - return 0; - mutex_lock(&qla_tgt_mutex); list_add_tail(&tgt->tgt_list_entry, &qla_tgt_glist); mutex_unlock(&qla_tgt_mutex); + if (ha->tgt.tgt_ops && ha->tgt.tgt_ops->add_target) + ha->tgt.tgt_ops->add_target(base_vha); + return 0; } diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index c35f889b94a6..d64420251194 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -693,6 +693,7 @@ struct qla_tgt_func_tmpl { void (*shutdown_sess)(struct fc_port *); int (*get_dif_tags)(struct qla_tgt_cmd *cmd, uint16_t *pfw_prot_opts); int (*chk_dif_tags)(uint32_t tag); + void (*add_target)(struct scsi_qla_host *); }; int qla2x00_wait_for_hba_online(struct scsi_qla_host *); -- cgit v1.2.3 From 15f30a5752287f20c7de428423c34bc51cfbe465 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 15 Mar 2017 09:48:52 -0700 Subject: qla2xxx: Use IOCB interface to submit non-critical MBX. The Mailbox interface is currently over subscribed. We like to reserve the Mailbox interface for the chip managment and link initialization. Any non essential Mailbox command will be routed through the IOCB interface. The IOCB interface is able to absorb more commands. Following commands are being routed through IOCB interface - Get ID List (007Ch) - Get Port DB (0064h) - Get Link Priv Stats (006Dh) Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_def.h | 12 +- drivers/scsi/qla2xxx/qla_gbl.h | 10 +- drivers/scsi/qla2xxx/qla_init.c | 46 +------ drivers/scsi/qla2xxx/qla_isr.c | 2 +- drivers/scsi/qla2xxx/qla_mbx.c | 270 ++++++++++++++++++++++++++++++++++++-- drivers/scsi/qla2xxx/qla_target.c | 4 +- 6 files changed, 279 insertions(+), 65 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 8228dfac6a31..ae38b7a789b1 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -395,11 +395,15 @@ struct srb_iocb { struct completion comp; } abt; struct ct_arg ctarg; +#define MAX_IOCB_MB_REG 28 +#define SIZEOF_IOCB_MB_REG (MAX_IOCB_MB_REG * sizeof(uint16_t)) struct { - __le16 in_mb[28]; /* fr fw */ - __le16 out_mb[28]; /* to fw */ + __le16 in_mb[MAX_IOCB_MB_REG]; /* from FW */ + __le16 out_mb[MAX_IOCB_MB_REG]; /* to FW */ void *out, *in; dma_addr_t out_dma, in_dma; + struct completion comp; + int rc; } mbx; struct { struct imm_ntfy_from_isp *ntfy; @@ -437,7 +441,7 @@ typedef struct srb { uint32_t handle; uint16_t flags; uint16_t type; - char *name; + const char *name; int iocbs; struct qla_qpair *qpair; u32 gen1; /* scratch */ @@ -3364,6 +3368,8 @@ struct qla_hw_data { uint32_t exlogins_enabled:1; uint32_t exchoffld_enabled:1; /* 35 bits */ + + uint32_t fw_started:1; } flags; /* This spinlock is used to protect "io transactions", you must diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index ca6f122e5865..323b912b47f7 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -193,6 +193,7 @@ extern int qla24xx_post_upd_fcport_work(struct scsi_qla_host *, fc_port_t *); void qla2x00_handle_login_done_event(struct scsi_qla_host *, fc_port_t *, uint16_t *); int qla24xx_post_gnl_work(struct scsi_qla_host *, fc_port_t *); +int qla24xx_async_abort_cmd(srb_t *); /* * Global Functions in qla_mid.c source file. @@ -368,7 +369,7 @@ qla2x00_get_link_status(scsi_qla_host_t *, uint16_t, struct link_statistics *, extern int qla24xx_get_isp_stats(scsi_qla_host_t *, struct link_statistics *, - dma_addr_t, uint); + dma_addr_t, uint16_t); extern int qla24xx_abort_command(srb_t *); extern int qla24xx_async_abort_command(srb_t *); @@ -472,6 +473,13 @@ qla2x00_dump_mctp_data(scsi_qla_host_t *, dma_addr_t, uint32_t, uint32_t); extern int qla26xx_dport_diagnostics(scsi_qla_host_t *, void *, uint, uint); +int qla24xx_send_mb_cmd(struct scsi_qla_host *, mbx_cmd_t *); +int qla24xx_gpdb_wait(struct scsi_qla_host *, fc_port_t *, u8); +int qla24xx_gidlist_wait(struct scsi_qla_host *, void *, dma_addr_t, + uint16_t *); +int __qla24xx_parse_gpdb(struct scsi_qla_host *, fc_port_t *, + struct port_database_24xx *); + /* * Global Function Prototypes in qla_isr.c source file. */ diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index a7865a5d556d..b1bfa63f7d4e 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -629,7 +629,6 @@ void qla24xx_async_gpdb_sp_done(void *s, int res) struct srb *sp = s; struct scsi_qla_host *vha = sp->vha; struct qla_hw_data *ha = vha->hw; - uint64_t zero = 0; struct port_database_24xx *pd; fc_port_t *fcport = sp->fcport; u16 *mb = sp->u.iocb_cmd.u.mbx.in_mb; @@ -649,48 +648,7 @@ void qla24xx_async_gpdb_sp_done(void *s, int res) pd = (struct port_database_24xx *)sp->u.iocb_cmd.u.mbx.in; - /* Check for logged in state. */ - if (pd->current_login_state != PDS_PRLI_COMPLETE && - pd->last_login_state != PDS_PRLI_COMPLETE) { - ql_dbg(ql_dbg_mbx, vha, 0xffff, - "Unable to verify login-state (%x/%x) for " - "loop_id %x.\n", pd->current_login_state, - pd->last_login_state, fcport->loop_id); - rval = QLA_FUNCTION_FAILED; - goto gpd_error_out; - } - - if (fcport->loop_id == FC_NO_LOOP_ID || - (memcmp(fcport->port_name, (uint8_t *)&zero, 8) && - memcmp(fcport->port_name, pd->port_name, 8))) { - /* We lost the device mid way. */ - rval = QLA_NOT_LOGGED_IN; - goto gpd_error_out; - } - - /* Names are little-endian. */ - memcpy(fcport->node_name, pd->node_name, WWN_SIZE); - - /* Get port_id of device. */ - fcport->d_id.b.domain = pd->port_id[0]; - fcport->d_id.b.area = pd->port_id[1]; - fcport->d_id.b.al_pa = pd->port_id[2]; - fcport->d_id.b.rsvd_1 = 0; - - /* If not target must be initiator or unknown type. */ - if ((pd->prli_svc_param_word_3[0] & BIT_4) == 0) - fcport->port_type = FCT_INITIATOR; - else - fcport->port_type = FCT_TARGET; - - /* Passback COS information. */ - fcport->supported_classes = (pd->flags & PDF_CLASS_2) ? - FC_COS_CLASS2 : FC_COS_CLASS3; - - if (pd->prli_svc_param_word_3[0] & BIT_7) { - fcport->flags |= FCF_CONF_COMP_SUPPORTED; - fcport->conf_compl_supported = 1; - } + rval = __qla24xx_parse_gpdb(vha, fcport, pd); gpd_error_out: memset(&ea, 0, sizeof(ea)); @@ -1266,7 +1224,7 @@ qla24xx_abort_sp_done(void *ptr, int res) complete(&abt->u.abt.comp); } -static int +int qla24xx_async_abort_cmd(srb_t *cmd_sp) { scsi_qla_host_t *vha = cmd_sp->vha; diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index b2c6da752edd..3953c8d6af69 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -2692,7 +2692,7 @@ qla24xx_abort_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, return; abt = &sp->u.iocb_cmd; - abt->u.abt.comp_status = le32_to_cpu(pkt->nport_handle); + abt->u.abt.comp_status = le16_to_cpu(pkt->nport_handle); sp->done(sp, 0); } diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 35079f417417..e40ed570d3c1 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -10,6 +10,28 @@ #include #include +static struct mb_cmd_name { + uint16_t cmd; + const char *str; +} mb_str[] = { + {MBC_GET_PORT_DATABASE, "GPDB"}, + {MBC_GET_ID_LIST, "GIDList"}, + {MBC_GET_LINK_PRIV_STATS, "Stats"}, +}; + +static const char *mb_to_str(uint16_t cmd) +{ + int i; + struct mb_cmd_name *e; + + for (i = 0; i < ARRAY_SIZE(mb_str); i++) { + e = mb_str + i; + if (cmd == e->cmd) + return e->str; + } + return "unknown"; +} + static struct rom_cmd { uint16_t cmd; } rom_cmds[] = { @@ -2818,7 +2840,7 @@ qla2x00_get_link_status(scsi_qla_host_t *vha, uint16_t loop_id, int qla24xx_get_isp_stats(scsi_qla_host_t *vha, struct link_statistics *stats, - dma_addr_t stats_dma, uint options) + dma_addr_t stats_dma, uint16_t options) { int rval; mbx_cmd_t mc; @@ -2828,19 +2850,17 @@ qla24xx_get_isp_stats(scsi_qla_host_t *vha, struct link_statistics *stats, ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1088, "Entered %s.\n", __func__); - mcp->mb[0] = MBC_GET_LINK_PRIV_STATS; - mcp->mb[2] = MSW(stats_dma); - mcp->mb[3] = LSW(stats_dma); - mcp->mb[6] = MSW(MSD(stats_dma)); - mcp->mb[7] = LSW(MSD(stats_dma)); - mcp->mb[8] = sizeof(struct link_statistics) / 4; - mcp->mb[9] = vha->vp_idx; - mcp->mb[10] = options; - mcp->out_mb = MBX_10|MBX_9|MBX_8|MBX_7|MBX_6|MBX_3|MBX_2|MBX_0; - mcp->in_mb = MBX_2|MBX_1|MBX_0; - mcp->tov = MBX_TOV_SECONDS; - mcp->flags = IOCTL_CMD; - rval = qla2x00_mailbox_command(vha, mcp); + memset(&mc, 0, sizeof(mc)); + mc.mb[0] = MBC_GET_LINK_PRIV_STATS; + mc.mb[2] = MSW(stats_dma); + mc.mb[3] = LSW(stats_dma); + mc.mb[6] = MSW(MSD(stats_dma)); + mc.mb[7] = LSW(MSD(stats_dma)); + mc.mb[8] = sizeof(struct link_statistics) / 4; + mc.mb[9] = cpu_to_le16(vha->vp_idx); + mc.mb[10] = cpu_to_le16(options); + + rval = qla24xx_send_mb_cmd(vha, &mc); if (rval == QLA_SUCCESS) { if (mcp->mb[0] != MBS_COMMAND_COMPLETE) { @@ -5827,3 +5847,225 @@ qla26xx_dport_diagnostics(scsi_qla_host_t *vha, return rval; } + +static void qla2x00_async_mb_sp_done(void *s, int res) +{ + struct srb *sp = s; + + sp->u.iocb_cmd.u.mbx.rc = res; + + complete(&sp->u.iocb_cmd.u.mbx.comp); + /* don't free sp here. Let the caller do the free */ +} + +/* + * This mailbox uses the iocb interface to send MB command. + * This allows non-critial (non chip setup) command to go + * out in parrallel. + */ +int qla24xx_send_mb_cmd(struct scsi_qla_host *vha, mbx_cmd_t *mcp) +{ + int rval = QLA_FUNCTION_FAILED; + srb_t *sp; + struct srb_iocb *c; + + if (!vha->hw->flags.fw_started) + goto done; + + sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL); + if (!sp) + goto done; + + sp->type = SRB_MB_IOCB; + sp->name = mb_to_str(mcp->mb[0]); + + qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + + memcpy(sp->u.iocb_cmd.u.mbx.out_mb, mcp->mb, SIZEOF_IOCB_MB_REG); + + c = &sp->u.iocb_cmd; + c->timeout = qla2x00_async_iocb_timeout; + init_completion(&c->u.mbx.comp); + + sp->done = qla2x00_async_mb_sp_done; + + rval = qla2x00_start_sp(sp); + if (rval != QLA_SUCCESS) { + ql_dbg(ql_dbg_mbx, vha, 0xffff, + "%s: %s Failed submission. %x.\n", + __func__, sp->name, rval); + goto done_free_sp; + } + + ql_dbg(ql_dbg_mbx, vha, 0xffff, "MB:%s hndl %x submitted\n", + sp->name, sp->handle); + + wait_for_completion(&c->u.mbx.comp); + memcpy(mcp->mb, sp->u.iocb_cmd.u.mbx.in_mb, SIZEOF_IOCB_MB_REG); + + rval = c->u.mbx.rc; + switch (rval) { + case QLA_FUNCTION_TIMEOUT: + ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %s Timeout. %x.\n", + __func__, sp->name, rval); + break; + case QLA_SUCCESS: + ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %s done.\n", + __func__, sp->name); + sp->free(sp); + break; + default: + ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %s Failed. %x.\n", + __func__, sp->name, rval); + sp->free(sp); + break; + } + + return rval; + +done_free_sp: + sp->free(sp); +done: + return rval; +} + +/* + * qla24xx_gpdb_wait + * NOTE: Do not call this routine from DPC thread + */ +int qla24xx_gpdb_wait(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt) +{ + int rval = QLA_FUNCTION_FAILED; + dma_addr_t pd_dma; + struct port_database_24xx *pd; + struct qla_hw_data *ha = vha->hw; + mbx_cmd_t mc; + + if (!vha->hw->flags.fw_started) + goto done; + + pd = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &pd_dma); + if (pd == NULL) { + ql_log(ql_log_warn, vha, 0xffff, + "Failed to allocate port database structure.\n"); + goto done_free_sp; + } + memset(pd, 0, max(PORT_DATABASE_SIZE, PORT_DATABASE_24XX_SIZE)); + + memset(&mc, 0, sizeof(mc)); + mc.mb[0] = MBC_GET_PORT_DATABASE; + mc.mb[1] = cpu_to_le16(fcport->loop_id); + mc.mb[2] = MSW(pd_dma); + mc.mb[3] = LSW(pd_dma); + mc.mb[6] = MSW(MSD(pd_dma)); + mc.mb[7] = LSW(MSD(pd_dma)); + mc.mb[9] = cpu_to_le16(vha->vp_idx); + mc.mb[10] = cpu_to_le16((uint16_t)opt); + + rval = qla24xx_send_mb_cmd(vha, &mc); + if (rval != QLA_SUCCESS) { + ql_dbg(ql_dbg_mbx, vha, 0xffff, + "%s: %8phC fail\n", __func__, fcport->port_name); + goto done_free_sp; + } + + rval = __qla24xx_parse_gpdb(vha, fcport, pd); + + ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %8phC done\n", + __func__, fcport->port_name); + +done_free_sp: + if (pd) + dma_pool_free(ha->s_dma_pool, pd, pd_dma); +done: + return rval; +} + +int __qla24xx_parse_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, + struct port_database_24xx *pd) +{ + int rval = QLA_SUCCESS; + uint64_t zero = 0; + + /* Check for logged in state. */ + if (pd->current_login_state != PDS_PRLI_COMPLETE && + pd->last_login_state != PDS_PRLI_COMPLETE) { + ql_dbg(ql_dbg_mbx, vha, 0xffff, + "Unable to verify login-state (%x/%x) for " + "loop_id %x.\n", pd->current_login_state, + pd->last_login_state, fcport->loop_id); + rval = QLA_FUNCTION_FAILED; + goto gpd_error_out; + } + + if (fcport->loop_id == FC_NO_LOOP_ID || + (memcmp(fcport->port_name, (uint8_t *)&zero, 8) && + memcmp(fcport->port_name, pd->port_name, 8))) { + /* We lost the device mid way. */ + rval = QLA_NOT_LOGGED_IN; + goto gpd_error_out; + } + + /* Names are little-endian. */ + memcpy(fcport->node_name, pd->node_name, WWN_SIZE); + memcpy(fcport->port_name, pd->port_name, WWN_SIZE); + + /* Get port_id of device. */ + fcport->d_id.b.domain = pd->port_id[0]; + fcport->d_id.b.area = pd->port_id[1]; + fcport->d_id.b.al_pa = pd->port_id[2]; + fcport->d_id.b.rsvd_1 = 0; + + /* If not target must be initiator or unknown type. */ + if ((pd->prli_svc_param_word_3[0] & BIT_4) == 0) + fcport->port_type = FCT_INITIATOR; + else + fcport->port_type = FCT_TARGET; + + /* Passback COS information. */ + fcport->supported_classes = (pd->flags & PDF_CLASS_2) ? + FC_COS_CLASS2 : FC_COS_CLASS3; + + if (pd->prli_svc_param_word_3[0] & BIT_7) { + fcport->flags |= FCF_CONF_COMP_SUPPORTED; + fcport->conf_compl_supported = 1; + } + +gpd_error_out: + return rval; +} + +/* + * qla24xx_gidlist__wait + * NOTE: don't call this routine from DPC thread. + */ +int qla24xx_gidlist_wait(struct scsi_qla_host *vha, + void *id_list, dma_addr_t id_list_dma, uint16_t *entries) +{ + int rval = QLA_FUNCTION_FAILED; + mbx_cmd_t mc; + + if (!vha->hw->flags.fw_started) + goto done; + + memset(&mc, 0, sizeof(mc)); + mc.mb[0] = MBC_GET_ID_LIST; + mc.mb[2] = MSW(id_list_dma); + mc.mb[3] = LSW(id_list_dma); + mc.mb[6] = MSW(MSD(id_list_dma)); + mc.mb[7] = LSW(MSD(id_list_dma)); + mc.mb[8] = 0; + mc.mb[9] = cpu_to_le16(vha->vp_idx); + + rval = qla24xx_send_mb_cmd(vha, &mc); + if (rval != QLA_SUCCESS) { + ql_dbg(ql_dbg_mbx, vha, 0xffff, + "%s: fail\n", __func__); + } else { + *entries = mc.mb[1]; + ql_dbg(ql_dbg_mbx, vha, 0xffff, + "%s: done\n", __func__); + } +done: + return rval; +} diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 563116188c43..38bdcd325fa4 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1238,7 +1238,7 @@ static int qla24xx_get_loop_id(struct scsi_qla_host *vha, const uint8_t *s_id, } /* Get list of logged in devices */ - rc = qla2x00_get_id_list(vha, gid_list, gid_list_dma, &entries); + rc = qla24xx_gidlist_wait(vha, gid_list, gid_list_dma, &entries); if (rc != QLA_SUCCESS) { ql_dbg(ql_dbg_tgt_mgt, vha, 0xf045, "qla_target(%d): get_id_list() failed: %x\n", @@ -5648,7 +5648,7 @@ static fc_port_t *qlt_get_port_database(struct scsi_qla_host *vha, fcport->loop_id = loop_id; - rc = qla2x00_get_port_database(vha, fcport, 0); + rc = qla24xx_gpdb_wait(vha, fcport, 0); if (rc != QLA_SUCCESS) { ql_dbg(ql_dbg_tgt_mgt, vha, 0xf070, "qla_target(%d): Failed to retrieve fcport " -- cgit v1.2.3 From c423437e3ff41b8ca551ab6621baf11538dbfe9d Mon Sep 17 00:00:00 2001 From: Himanshu Madhani Date: Wed, 15 Mar 2017 09:48:53 -0700 Subject: qla2xxx: Add DebugFS node to display Port Database Signed-off-by: Himanshu Madhani Signed-off-by: Giridhar Malavali Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_def.h | 2 + drivers/scsi/qla2xxx/qla_dfs.c | 92 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 90 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index ae38b7a789b1..089480280558 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -3300,6 +3300,8 @@ struct qlt_hw_data { uint8_t tgt_node_name[WWN_SIZE]; struct dentry *dfs_tgt_sess; + struct dentry *dfs_tgt_port_database; + struct list_head q_full_list; uint32_t num_pend_cmds; uint32_t num_qfull_cmds_alloc; diff --git a/drivers/scsi/qla2xxx/qla_dfs.c b/drivers/scsi/qla2xxx/qla_dfs.c index 3b35905619b0..989e17b0758c 100644 --- a/drivers/scsi/qla2xxx/qla_dfs.c +++ b/drivers/scsi/qla2xxx/qla_dfs.c @@ -19,11 +19,11 @@ qla2x00_dfs_tgt_sess_show(struct seq_file *s, void *unused) struct qla_hw_data *ha = vha->hw; unsigned long flags; struct fc_port *sess = NULL; - struct qla_tgt *tgt= vha->vha_tgt.qla_tgt; + struct qla_tgt *tgt = vha->vha_tgt.qla_tgt; - seq_printf(s, "%s\n",vha->host_str); + seq_printf(s, "%s\n", vha->host_str); if (tgt) { - seq_printf(s, "Port ID Port Name Handle\n"); + seq_puts(s, "Port ID Port Name Handle\n"); spin_lock_irqsave(&ha->tgt.sess_lock, flags); list_for_each_entry(sess, &vha->vp_fcports, list) @@ -44,7 +44,6 @@ qla2x00_dfs_tgt_sess_open(struct inode *inode, struct file *file) return single_open(file, qla2x00_dfs_tgt_sess_show, vha); } - static const struct file_operations dfs_tgt_sess_ops = { .open = qla2x00_dfs_tgt_sess_open, .read = seq_read, @@ -52,6 +51,78 @@ static const struct file_operations dfs_tgt_sess_ops = { .release = single_release, }; +static int +qla2x00_dfs_tgt_port_database_show(struct seq_file *s, void *unused) +{ + scsi_qla_host_t *vha = s->private; + struct qla_hw_data *ha = vha->hw; + struct gid_list_info *gid_list; + dma_addr_t gid_list_dma; + fc_port_t fc_port; + char *id_iter; + int rc, i; + uint16_t entries, loop_id; + struct qla_tgt *tgt = vha->vha_tgt.qla_tgt; + + seq_printf(s, "%s\n", vha->host_str); + if (tgt) { + gid_list = dma_alloc_coherent(&ha->pdev->dev, + qla2x00_gid_list_size(ha), + &gid_list_dma, GFP_KERNEL); + if (!gid_list) { + ql_dbg(ql_dbg_user, vha, 0x705c, + "DMA allocation failed for %u\n", + qla2x00_gid_list_size(ha)); + return 0; + } + + rc = qla24xx_gidlist_wait(vha, gid_list, gid_list_dma, + &entries); + if (rc != QLA_SUCCESS) + goto out_free_id_list; + + id_iter = (char *)gid_list; + + seq_puts(s, "Port Name Port ID Loop ID\n"); + + for (i = 0; i < entries; i++) { + struct gid_list_info *gid = + (struct gid_list_info *)id_iter; + loop_id = le16_to_cpu(gid->loop_id); + memset(&fc_port, 0, sizeof(fc_port_t)); + + fc_port.loop_id = loop_id; + + rc = qla24xx_gpdb_wait(vha, &fc_port, 0); + seq_printf(s, "%8phC %02x%02x%02x %d\n", + fc_port.port_name, fc_port.d_id.b.domain, + fc_port.d_id.b.area, fc_port.d_id.b.al_pa, + fc_port.loop_id); + id_iter += ha->gid_list_info_size; + } +out_free_id_list: + dma_free_coherent(&ha->pdev->dev, qla2x00_gid_list_size(ha), + gid_list, gid_list_dma); + } + + return 0; +} + +static int +qla2x00_dfs_tgt_port_database_open(struct inode *inode, struct file *file) +{ + scsi_qla_host_t *vha = inode->i_private; + + return single_open(file, qla2x00_dfs_tgt_port_database_show, vha); +} + +static const struct file_operations dfs_tgt_port_database_ops = { + .open = qla2x00_dfs_tgt_port_database_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int qla_dfs_fw_resource_cnt_show(struct seq_file *s, void *unused) { @@ -296,6 +367,14 @@ create_nodes: goto out; } + ha->tgt.dfs_tgt_port_database = debugfs_create_file("tgt_port_database", + S_IRUSR, ha->dfs_dir, vha, &dfs_tgt_port_database_ops); + if (!ha->tgt.dfs_tgt_port_database) { + ql_log(ql_log_warn, vha, 0xffff, + "Unable to create debugFS tgt_port_database node.\n"); + goto out; + } + ha->dfs_fce = debugfs_create_file("fce", S_IRUSR, ha->dfs_dir, vha, &dfs_fce_ops); if (!ha->dfs_fce) { @@ -326,6 +405,11 @@ qla2x00_dfs_remove(scsi_qla_host_t *vha) ha->tgt.dfs_tgt_sess = NULL; } + if (ha->tgt.dfs_tgt_port_database) { + debugfs_remove(ha->tgt.dfs_tgt_port_database); + ha->tgt.dfs_tgt_port_database = NULL; + } + if (ha->dfs_fw_resource_cnt) { debugfs_remove(ha->dfs_fw_resource_cnt); ha->dfs_fw_resource_cnt = NULL; -- cgit v1.2.3 From 482c9dc79204bb83c3433a59680c787a0b98c000 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 15 Mar 2017 09:48:54 -0700 Subject: qla2xxx: Change scsi host lookup method. For target mode, when new scsi command arrive, driver first performs a look up of the SCSI Host. The current look up method is based on the ALPA portion of the NPort ID. For Cisco switch, the ALPA can not be used as the index. Instead, the new search method is based on the full value of the Nport_ID via btree lib. Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/Kconfig | 1 + drivers/scsi/qla2xxx/qla_def.h | 2 + drivers/scsi/qla2xxx/qla_gbl.h | 2 + drivers/scsi/qla2xxx/qla_init.c | 14 +++--- drivers/scsi/qla2xxx/qla_mbx.c | 28 ++++-------- drivers/scsi/qla2xxx/qla_os.c | 1 + drivers/scsi/qla2xxx/qla_target.c | 92 +++++++++++++++++++++++++++++++++------ 7 files changed, 100 insertions(+), 40 deletions(-) diff --git a/drivers/scsi/qla2xxx/Kconfig b/drivers/scsi/qla2xxx/Kconfig index 67c0d5aa3212..de952935b5d2 100644 --- a/drivers/scsi/qla2xxx/Kconfig +++ b/drivers/scsi/qla2xxx/Kconfig @@ -3,6 +3,7 @@ config SCSI_QLA_FC depends on PCI && SCSI depends on SCSI_FC_ATTRS select FW_LOADER + select BTREE ---help--- This qla2xxx driver supports all QLogic Fibre Channel PCI and PCIe host adapters. diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 089480280558..9251918773b1 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -3311,6 +3312,7 @@ struct qlt_hw_data { spinlock_t sess_lock; int rspq_vector_cpuid; spinlock_t atio_lock ____cacheline_aligned; + struct btree_head32 host_map; }; #define MAX_QFULL_CMDS_ALLOC 8192 diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index 323b912b47f7..5b2451745e9f 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -854,5 +854,7 @@ extern struct fc_port *qlt_find_sess_invalidate_other(scsi_qla_host_t *, uint64_t wwn, port_id_t port_id, uint16_t loop_id, struct fc_port **); void qla24xx_delete_sess_fn(struct work_struct *); void qlt_unknown_atio_work_fn(struct work_struct *); +void qlt_update_host_map(struct scsi_qla_host *, port_id_t); +void qlt_remove_target_resources(struct qla_hw_data *); #endif /* _QLA_GBL_H */ diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index b1bfa63f7d4e..b0f6ad3020d3 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -3340,8 +3340,8 @@ qla2x00_configure_hba(scsi_qla_host_t *vha) uint8_t domain; char connect_type[22]; struct qla_hw_data *ha = vha->hw; - unsigned long flags; scsi_qla_host_t *base_vha = pci_get_drvdata(ha->pdev); + port_id_t id; /* Get host addresses. */ rval = qla2x00_get_adapter_id(vha, @@ -3419,13 +3419,11 @@ qla2x00_configure_hba(scsi_qla_host_t *vha) /* Save Host port and loop ID. */ /* byte order - Big Endian */ - vha->d_id.b.domain = domain; - vha->d_id.b.area = area; - vha->d_id.b.al_pa = al_pa; - - spin_lock_irqsave(&ha->vport_slock, flags); - qlt_update_vp_map(vha, SET_AL_PA); - spin_unlock_irqrestore(&ha->vport_slock, flags); + id.b.domain = domain; + id.b.area = area; + id.b.al_pa = al_pa; + id.b.rsvd_1 = 0; + qlt_update_host_map(vha, id); if (!vha->flags.init_done) ql_log(ql_log_info, vha, 0x2010, diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index e40ed570d3c1..53d9579acc74 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -3623,6 +3623,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, scsi_qla_host_t *vp = NULL; unsigned long flags; int found; + port_id_t id; ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b6, "Entered %s.\n", __func__); @@ -3630,6 +3631,11 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, if (rptid_entry->entry_status != 0) return; + id.b.domain = rptid_entry->port_id[2]; + id.b.area = rptid_entry->port_id[1]; + id.b.al_pa = rptid_entry->port_id[0]; + id.b.rsvd_1 = 0; + if (rptid_entry->format == 0) { /* loop */ ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b7, @@ -3641,13 +3647,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, rptid_entry->port_id[2], rptid_entry->port_id[1], rptid_entry->port_id[0]); - vha->d_id.b.domain = rptid_entry->port_id[2]; - vha->d_id.b.area = rptid_entry->port_id[1]; - vha->d_id.b.al_pa = rptid_entry->port_id[0]; - - spin_lock_irqsave(&ha->vport_slock, flags); - qlt_update_vp_map(vha, SET_AL_PA); - spin_unlock_irqrestore(&ha->vport_slock, flags); + qlt_update_host_map(vha, id); } else if (rptid_entry->format == 1) { /* fabric */ @@ -3673,12 +3673,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, WWN_SIZE); } - vha->d_id.b.domain = rptid_entry->port_id[2]; - vha->d_id.b.area = rptid_entry->port_id[1]; - vha->d_id.b.al_pa = rptid_entry->port_id[0]; - spin_lock_irqsave(&ha->vport_slock, flags); - qlt_update_vp_map(vha, SET_AL_PA); - spin_unlock_irqrestore(&ha->vport_slock, flags); + qlt_update_host_map(vha, id); } fc_host_port_name(vha->host) = @@ -3714,12 +3709,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, if (!found) return; - vp->d_id.b.domain = rptid_entry->port_id[2]; - vp->d_id.b.area = rptid_entry->port_id[1]; - vp->d_id.b.al_pa = rptid_entry->port_id[0]; - spin_lock_irqsave(&ha->vport_slock, flags); - qlt_update_vp_map(vp, SET_AL_PA); - spin_unlock_irqrestore(&ha->vport_slock, flags); + qlt_update_host_map(vp, id); /* * Cannot configure here as we are still sitting on the diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 54d4e802bde0..344faf59783d 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3469,6 +3469,7 @@ qla2x00_remove_one(struct pci_dev *pdev) qla2x00_free_sysfs_attr(base_vha, true); fc_remove_host(base_vha->host); + qlt_remove_target_resources(ha); scsi_remove_host(base_vha->host); diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 38bdcd325fa4..7278e046bf87 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -187,21 +187,23 @@ static inline struct scsi_qla_host *qlt_find_host_by_d_id(struct scsi_qla_host *vha, uint8_t *d_id) { - struct qla_hw_data *ha = vha->hw; - uint8_t vp_idx; - - if ((vha->d_id.b.area != d_id[1]) || (vha->d_id.b.domain != d_id[0])) - return NULL; + struct scsi_qla_host *host; + uint32_t key = 0; - if (vha->d_id.b.al_pa == d_id[2]) + if ((vha->d_id.b.area == d_id[1]) && (vha->d_id.b.domain == d_id[0]) && + (vha->d_id.b.al_pa == d_id[2])) return vha; - BUG_ON(ha->tgt.tgt_vp_map == NULL); - vp_idx = ha->tgt.tgt_vp_map[d_id[2]].idx; - if (likely(test_bit(vp_idx, ha->vp_idx_map))) - return ha->tgt.tgt_vp_map[vp_idx].vha; + key = (uint32_t)d_id[0] << 16; + key |= (uint32_t)d_id[1] << 8; + key |= (uint32_t)d_id[2]; - return NULL; + host = btree_lookup32(&vha->hw->tgt.host_map, key); + if (!host) + ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff, + "Unable to find host %06x\n", key); + + return host; } static inline @@ -6040,6 +6042,17 @@ int qlt_remove_target(struct qla_hw_data *ha, struct scsi_qla_host *vha) return 0; } +void qlt_remove_target_resources(struct qla_hw_data *ha) +{ + struct scsi_qla_host *node; + u32 key = 0; + + btree_for_each_safe32(&ha->tgt.host_map, key, node) + btree_remove32(&ha->tgt.host_map, key); + + btree_destroy32(&ha->tgt.host_map); +} + static void qlt_lport_dump(struct scsi_qla_host *vha, u64 wwpn, unsigned char *b) { @@ -6693,6 +6706,8 @@ qlt_modify_vp_config(struct scsi_qla_host *vha, void qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha) { + int rc; + if (!QLA_TGT_MODE_ENABLED()) return; @@ -6712,6 +6727,13 @@ qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha) qlt_unknown_atio_work_fn); qlt_clear_mode(base_vha); + + rc = btree_init32(&ha->tgt.host_map); + if (rc) + ql_log(ql_log_info, base_vha, 0xffff, + "Unable to initialize ha->host_map btree\n"); + + qlt_update_vp_map(base_vha, SET_VP_IDX); } irqreturn_t @@ -6820,25 +6842,69 @@ qlt_mem_free(struct qla_hw_data *ha) void qlt_update_vp_map(struct scsi_qla_host *vha, int cmd) { + void *slot; + u32 key; + int rc; + if (!QLA_TGT_MODE_ENABLED()) return; + key = vha->d_id.b24; + switch (cmd) { case SET_VP_IDX: vha->hw->tgt.tgt_vp_map[vha->vp_idx].vha = vha; break; case SET_AL_PA: - vha->hw->tgt.tgt_vp_map[vha->d_id.b.al_pa].idx = vha->vp_idx; + slot = btree_lookup32(&vha->hw->tgt.host_map, key); + if (!slot) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff, + "Save vha in host_map %p %06x\n", vha, key); + rc = btree_insert32(&vha->hw->tgt.host_map, + key, vha, GFP_ATOMIC); + if (rc) + ql_log(ql_log_info, vha, 0xffff, + "Unable to insert s_id into host_map: %06x\n", + key); + return; + } + ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff, + "replace existing vha in host_map %p %06x\n", vha, key); + btree_update32(&vha->hw->tgt.host_map, key, vha); break; case RESET_VP_IDX: vha->hw->tgt.tgt_vp_map[vha->vp_idx].vha = NULL; break; case RESET_AL_PA: - vha->hw->tgt.tgt_vp_map[vha->d_id.b.al_pa].idx = 0; + ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff, + "clear vha in host_map %p %06x\n", vha, key); + slot = btree_lookup32(&vha->hw->tgt.host_map, key); + if (slot) + btree_remove32(&vha->hw->tgt.host_map, key); + vha->d_id.b24 = 0; break; } } +void qlt_update_host_map(struct scsi_qla_host *vha, port_id_t id) +{ + unsigned long flags; + struct qla_hw_data *ha = vha->hw; + + if (!vha->d_id.b24) { + spin_lock_irqsave(&ha->vport_slock, flags); + vha->d_id = id; + qlt_update_vp_map(vha, SET_AL_PA); + spin_unlock_irqrestore(&ha->vport_slock, flags); + } else if (vha->d_id.b24 != id.b24) { + spin_lock_irqsave(&ha->vport_slock, flags); + qlt_update_vp_map(vha, RESET_AL_PA); + vha->d_id = id; + qlt_update_vp_map(vha, SET_AL_PA); + spin_unlock_irqrestore(&ha->vport_slock, flags); + } +} + static int __init qlt_parse_ini_mode(void) { if (strcasecmp(qlini_mode, QLA2XXX_INI_MODE_STR_EXCLUSIVE) == 0) -- cgit v1.2.3 From ec7193e26055112bc824929fd943035f9a30b06f Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 15 Mar 2017 09:48:55 -0700 Subject: qla2xxx: Fix delayed response to command for loop mode/direct connect. Current driver wait for FW to be in the ready state before processing in-coming commands. For Arbitrated Loop or Point-to- Point (not switch), FW Ready state can take a while. FW will transition to ready state after all Nports have been logged in. In the mean time, certain initiators have completed the login and starts IO. Driver needs to start processing all queues if FW is already started. Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_def.h | 10 ++++++++-- drivers/scsi/qla2xxx/qla_init.c | 12 ++++++++++++ drivers/scsi/qla2xxx/qla_isr.c | 14 +++++++++++++- drivers/scsi/qla2xxx/qla_mbx.c | 6 +++--- drivers/scsi/qla2xxx/qla_os.c | 21 ++++++++++++++++++++- drivers/scsi/qla2xxx/qla_target.c | 38 +++++++++++++++++++++++++------------- 6 files changed, 81 insertions(+), 20 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 9251918773b1..ae119018dfaa 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -3322,6 +3322,10 @@ struct qlt_hw_data { #define LEAK_EXCHG_THRESH_HOLD_PERCENT 75 /* 75 percent */ +#define QLA_EARLY_LINKUP(_ha) \ + ((_ha->flags.n2n_ae || _ha->flags.lip_ae) && \ + _ha->flags.fw_started && !_ha->flags.fw_init_done) + /* * Qlogic host adapter specific data structure. */ @@ -3371,9 +3375,11 @@ struct qla_hw_data { uint32_t fawwpn_enabled:1; uint32_t exlogins_enabled:1; uint32_t exchoffld_enabled:1; - /* 35 bits */ + uint32_t lip_ae:1; + uint32_t n2n_ae:1; uint32_t fw_started:1; + uint32_t fw_init_done:1; } flags; /* This spinlock is used to protect "io transactions", you must @@ -3466,7 +3472,6 @@ struct qla_hw_data { #define P2P_LOOP 3 uint8_t interrupts_on; uint32_t isp_abort_cnt; - #define PCI_DEVICE_ID_QLOGIC_ISP2532 0x2532 #define PCI_DEVICE_ID_QLOGIC_ISP8432 0x8432 #define PCI_DEVICE_ID_QLOGIC_ISP8001 0x8001 @@ -3947,6 +3952,7 @@ typedef struct scsi_qla_host { struct list_head vp_fcports; /* list of fcports */ struct list_head work_list; spinlock_t work_lock; + struct work_struct iocb_work; /* Commonly used flags and state information. */ struct Scsi_Host *host; diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index b0f6ad3020d3..f9d2fe7b1ade 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -3178,6 +3178,7 @@ next_check: } else { ql_dbg(ql_dbg_init, vha, 0x00d3, "Init Firmware -- success.\n"); + ha->flags.fw_started = 1; } return (rval); @@ -4000,6 +4001,7 @@ qla2x00_configure_loop(scsi_qla_host_t *vha) atomic_set(&vha->loop_state, LOOP_READY); ql_dbg(ql_dbg_disc, vha, 0x2069, "LOOP READY.\n"); + ha->flags.fw_init_done = 1; /* * Process any ATIO queue entries that came in @@ -5491,6 +5493,11 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha) if (!(IS_P3P_TYPE(ha))) ha->isp_ops->reset_chip(vha); + ha->flags.n2n_ae = 0; + ha->flags.lip_ae = 0; + ha->current_topology = 0; + ha->flags.fw_started = 0; + ha->flags.fw_init_done = 0; ha->chip_reset++; atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME); @@ -6767,6 +6774,8 @@ qla2x00_try_to_stop_firmware(scsi_qla_host_t *vha) return; if (!ha->fw_major_version) return; + if (!ha->flags.fw_started) + return; ret = qla2x00_stop_firmware(vha); for (retries = 5; ret != QLA_SUCCESS && ret != QLA_FUNCTION_TIMEOUT && @@ -6780,6 +6789,9 @@ qla2x00_try_to_stop_firmware(scsi_qla_host_t *vha) "Attempting retry of stop-firmware command.\n"); ret = qla2x00_stop_firmware(vha); } + + ha->flags.fw_started = 0; + ha->flags.fw_init_done = 0; } int diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 3953c8d6af69..3203367a4f42 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -708,6 +708,8 @@ skip_rio: "mbx7=%xh.\n", mb[1], mb[2], mb[3], mbx); ha->isp_ops->fw_dump(vha, 1); + ha->flags.fw_init_done = 0; + ha->flags.fw_started = 0; if (IS_FWI2_CAPABLE(ha)) { if (mb[1] == 0 && mb[2] == 0) { @@ -761,6 +763,9 @@ skip_rio: break; case MBA_LIP_OCCURRED: /* Loop Initialization Procedure */ + ha->flags.lip_ae = 1; + ha->flags.n2n_ae = 0; + ql_dbg(ql_dbg_async, vha, 0x5009, "LIP occurred (%x).\n", mb[1]); @@ -797,6 +802,10 @@ skip_rio: break; case MBA_LOOP_DOWN: /* Loop Down Event */ + ha->flags.n2n_ae = 0; + ha->flags.lip_ae = 0; + ha->current_topology = 0; + mbx = (IS_QLA81XX(ha) || IS_QLA8031(ha)) ? RD_REG_WORD(®24->mailbox4) : 0; mbx = (IS_P3P_TYPE(ha)) ? RD_REG_WORD(®82->mailbox_out[4]) @@ -866,6 +875,9 @@ skip_rio: /* case MBA_DCBX_COMPLETE: */ case MBA_POINT_TO_POINT: /* Point-to-Point */ + ha->flags.lip_ae = 0; + ha->flags.n2n_ae = 1; + if (IS_QLA2100(ha)) break; @@ -2706,7 +2718,7 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha, struct sts_entry_24xx *pkt; struct qla_hw_data *ha = vha->hw; - if (!vha->flags.online) + if (!ha->flags.fw_started) return; while (rsp->ring_ptr->signature != RESPONSE_PROCESSED) { diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 53d9579acc74..a113ab3592a7 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -3638,11 +3638,11 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, if (rptid_entry->format == 0) { /* loop */ - ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b7, + ql_dbg(ql_dbg_async, vha, 0x10b7, "Format 0 : Number of VPs setup %d, number of " "VPs acquired %d.\n", rptid_entry->vp_setup, rptid_entry->vp_acquired); - ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b8, + ql_dbg(ql_dbg_async, vha, 0x10b8, "Primary port id %02x%02x%02x.\n", rptid_entry->port_id[2], rptid_entry->port_id[1], rptid_entry->port_id[0]); @@ -3651,7 +3651,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, } else if (rptid_entry->format == 1) { /* fabric */ - ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b9, + ql_dbg(ql_dbg_async, vha, 0x10b9, "Format 1: VP[%d] enabled - status %d - with " "port id %02x%02x%02x.\n", rptid_entry->vp_idx, rptid_entry->vp_status, diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 344faf59783d..41d5b09f7326 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -2560,6 +2560,20 @@ qla2xxx_scan_finished(struct Scsi_Host *shost, unsigned long time) return atomic_read(&vha->loop_state) == LOOP_READY; } +static void qla2x00_iocb_work_fn(struct work_struct *work) +{ + struct scsi_qla_host *vha = container_of(work, + struct scsi_qla_host, iocb_work); + int cnt = 0; + + while (!list_empty(&vha->work_list)) { + qla2x00_do_work(vha); + cnt++; + if (cnt > 10) + break; + } +} + /* * PCI driver interface */ @@ -3078,6 +3092,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) */ qla2xxx_wake_dpc(base_vha); + INIT_WORK(&base_vha->iocb_work, qla2x00_iocb_work_fn); INIT_WORK(&ha->board_disable, qla2x00_disable_board_on_pci_error); if (IS_QLA8031(ha) || IS_MCTP_CAPABLE(ha)) { @@ -4321,7 +4336,11 @@ qla2x00_post_work(struct scsi_qla_host *vha, struct qla_work_evt *e) spin_lock_irqsave(&vha->work_lock, flags); list_add_tail(&e->list, &vha->work_list); spin_unlock_irqrestore(&vha->work_lock, flags); - qla2xxx_wake_dpc(vha); + + if (QLA_EARLY_LINKUP(vha->hw)) + schedule_work(&vha->iocb_work); + else + qla2xxx_wake_dpc(vha); return QLA_SUCCESS; } diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 7278e046bf87..0e03ca2ab3e5 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -638,6 +638,7 @@ int qla24xx_async_notify_ack(scsi_qla_host_t *vha, fc_port_t *fcport, break; case SRB_NACK_PRLI: fcport->fw_login_state = DSC_LS_PRLI_PEND; + fcport->deleted = 0; c = "PRLI"; break; case SRB_NACK_LOGO: @@ -1576,6 +1577,9 @@ static void qlt_send_notify_ack(struct scsi_qla_host *vha, request_t *pkt; struct nack_to_isp *nack; + if (!ha->flags.fw_started) + return; + ql_dbg(ql_dbg_tgt, vha, 0xe004, "Sending NOTIFY_ACK (ha=%p)\n", ha); /* Send marker if required */ @@ -3053,7 +3057,7 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, else vha->tgt_counters.core_qla_que_buf++; - if (!vha->flags.online || cmd->reset_count != ha->chip_reset) { + if (!ha->flags.fw_started || cmd->reset_count != ha->chip_reset) { /* * Either the port is not online or this request was from * previous life, just abort the processing. @@ -3194,7 +3198,7 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd) spin_lock_irqsave(&ha->hardware_lock, flags); - if (!vha->flags.online || (cmd->reset_count != ha->chip_reset) || + if (!ha->flags.fw_started || (cmd->reset_count != ha->chip_reset) || (cmd->sess && cmd->sess->deleted)) { /* * Either the port is not online or this request was from @@ -3372,7 +3376,7 @@ static int __qlt_send_term_imm_notif(struct scsi_qla_host *vha, ql_dbg(ql_dbg_tgt_tmr, vha, 0xe01c, "Sending TERM ELS CTIO (ha=%p)\n", ha); - pkt = (request_t *)qla2x00_alloc_iocbs_ready(vha, NULL); + pkt = (request_t *)qla2x00_alloc_iocbs(vha, NULL); if (pkt == NULL) { ql_dbg(ql_dbg_tgt, vha, 0xe080, "qla_target(%d): %s failed: unable to allocate " @@ -4697,7 +4701,8 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha, } if (sess != NULL) { - if (sess->fw_login_state == DSC_LS_PLOGI_PEND) { + if (sess->fw_login_state != DSC_LS_PLOGI_PEND && + sess->fw_login_state != DSC_LS_PLOGI_COMP) { /* * Impatient initiator sent PRLI before last * PLOGI could finish. Will force him to re-try, @@ -4736,15 +4741,23 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha, /* Make session global (not used in fabric mode) */ if (ha->current_topology != ISP_CFG_F) { - set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags); - set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags); - qla2xxx_wake_dpc(vha); + if (sess) { + ql_dbg(ql_dbg_disc, vha, 0xffff, + "%s %d %8phC post nack\n", + __func__, __LINE__, sess->port_name); + qla24xx_post_nack_work(vha, sess, iocb, + SRB_NACK_PRLI); + res = 0; + } else { + set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags); + set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags); + qla2xxx_wake_dpc(vha); + } } else { if (sess) { ql_dbg(ql_dbg_disc, vha, 0xffff, - "%s %d %8phC post nack\n", - __func__, __LINE__, sess->port_name); - + "%s %d %8phC post nack\n", + __func__, __LINE__, sess->port_name); qla24xx_post_nack_work(vha, sess, iocb, SRB_NACK_PRLI); res = 0; @@ -4752,7 +4765,6 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha, } break; - case ELS_TPRLO: if (le16_to_cpu(iocb->u.isp24.flags) & NOTIFY24XX_FLAGS_GLOBAL_TPRLO) { @@ -5222,7 +5234,7 @@ static void qlt_24xx_atio_pkt(struct scsi_qla_host *vha, unsigned long flags; if (unlikely(tgt == NULL)) { - ql_dbg(ql_dbg_io, vha, 0x3064, + ql_dbg(ql_dbg_tgt, vha, 0x3064, "ATIO pkt, but no tgt (ha %p)", ha); return; } @@ -6359,7 +6371,7 @@ qlt_24xx_process_atio_queue(struct scsi_qla_host *vha, uint8_t ha_locked) struct atio_from_isp *pkt; int cnt, i; - if (!vha->flags.online) + if (!ha->flags.fw_started) return; while ((ha->tgt.atio_ring_ptr->signature != ATIO_PROCESSED) || -- cgit v1.2.3 From 6c611d18f386d37cce3afbd921568e2a895bd86e Mon Sep 17 00:00:00 2001 From: Himanshu Madhani Date: Wed, 15 Mar 2017 09:48:56 -0700 Subject: qla2xxx: Update driver version to 9.00.00.00-k Signed-off-by: Himanshu Madhani signed-off-by: Giridhar Malavali Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_version.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h index 3cb1964b7786..45bc84e8e3bf 100644 --- a/drivers/scsi/qla2xxx/qla_version.h +++ b/drivers/scsi/qla2xxx/qla_version.h @@ -7,9 +7,9 @@ /* * Driver version */ -#define QLA2XXX_VERSION "8.07.00.38-k" +#define QLA2XXX_VERSION "9.00.00.00-k" -#define QLA_DRIVER_MAJOR_VER 8 -#define QLA_DRIVER_MINOR_VER 7 +#define QLA_DRIVER_MAJOR_VER 9 +#define QLA_DRIVER_MINOR_VER 0 #define QLA_DRIVER_PATCH_VER 0 #define QLA_DRIVER_BETA_VER 0 -- cgit v1.2.3 From 6985bd5e21901c2d2bfc924b114887e20c002901 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 14 Mar 2017 22:05:20 +0100 Subject: iio: imu: st_lsm6dsx: fix FIFO_CTRL2 overwrite during watermark configuration Fixes: 290a6ce11d93 (iio: imu: add support to lsm6dsx driver) Signed-off-by: Lorenzo Bianconi Signed-off-by: Jonathan Cameron --- drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c index 78532ce07449..81b572d7699a 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c @@ -193,8 +193,8 @@ int st_lsm6dsx_update_watermark(struct st_lsm6dsx_sensor *sensor, u16 watermark) if (err < 0) goto out; - fifo_watermark = ((data & ~ST_LSM6DSX_FIFO_TH_MASK) << 8) | - (fifo_watermark & ST_LSM6DSX_FIFO_TH_MASK); + fifo_watermark = ((data << 8) & ~ST_LSM6DSX_FIFO_TH_MASK) | + (fifo_watermark & ST_LSM6DSX_FIFO_TH_MASK); wdata = cpu_to_le16(fifo_watermark); err = hw->tf->write(hw->dev, ST_LSM6DSX_REG_FIFO_THL_ADDR, -- cgit v1.2.3 From 625fe857e4fac6518716f3c0ff5e5deb8ec6d238 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 17 Mar 2017 17:02:01 -0700 Subject: scsi: scsi_dh_alua: Check scsi_device_get() return value Do not queue ALUA work nor call scsi_device_put() if the scsi_device_get() call fails. This patch fixes the following crash: general protection fault: 0000 [#1] SMP RIP: 0010:scsi_device_put+0xb/0x30 Call Trace: scsi_disk_put+0x2d/0x40 sd_release+0x3d/0xb0 __blkdev_put+0x29e/0x360 blkdev_put+0x49/0x170 dm_put_table_device+0x58/0xc0 [dm_mod] dm_put_device+0x70/0xc0 [dm_mod] free_priority_group+0x92/0xc0 [dm_multipath] free_multipath+0x70/0xc0 [dm_multipath] multipath_dtr+0x19/0x20 [dm_multipath] dm_table_destroy+0x67/0x120 [dm_mod] dev_suspend+0xde/0x240 [dm_mod] ctl_ioctl+0x1f5/0x520 [dm_mod] dm_ctl_ioctl+0xe/0x20 [dm_mod] do_vfs_ioctl+0x8f/0x700 SyS_ioctl+0x3c/0x70 entry_SYSCALL_64_fastpath+0x18/0xad Fixes: commit 03197b61c5ec ("scsi_dh_alua: Use workqueue for RTPG") Signed-off-by: Bart Van Assche Cc: Hannes Reinecke Cc: Tang Junhui Cc: Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/device_handler/scsi_dh_alua.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index 48e200102221..e0b15f3dd303 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -870,7 +870,7 @@ static void alua_rtpg_queue(struct alua_port_group *pg, unsigned long flags; struct workqueue_struct *alua_wq = kaluad_wq; - if (!pg) + if (!pg || scsi_device_get(sdev)) return; spin_lock_irqsave(&pg->lock, flags); @@ -884,14 +884,12 @@ static void alua_rtpg_queue(struct alua_port_group *pg, pg->flags |= ALUA_PG_RUN_RTPG; kref_get(&pg->kref); pg->rtpg_sdev = sdev; - scsi_device_get(sdev); start_queue = 1; } else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) { pg->flags |= ALUA_PG_RUN_RTPG; /* Do not queue if the worker is already running */ if (!(pg->flags & ALUA_PG_RUNNING)) { kref_get(&pg->kref); - sdev = NULL; start_queue = 1; } } @@ -900,13 +898,15 @@ static void alua_rtpg_queue(struct alua_port_group *pg, alua_wq = kaluad_sync_wq; spin_unlock_irqrestore(&pg->lock, flags); - if (start_queue && - !queue_delayed_work(alua_wq, &pg->rtpg_work, - msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) { - if (sdev) - scsi_device_put(sdev); - kref_put(&pg->kref, release_port_group); + if (start_queue) { + if (queue_delayed_work(alua_wq, &pg->rtpg_work, + msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) + sdev = NULL; + else + kref_put(&pg->kref, release_port_group); } + if (sdev) + scsi_device_put(sdev); } /* -- cgit v1.2.3 From 7cb689fe42927281b8d98606ae5450173fcc66a9 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 17 Mar 2017 17:02:02 -0700 Subject: scsi: scsi_dh_alua: Ensure that alua_activate() calls the completion function Callers of scsi_dh_activate(), e.g. dm-mpath, assume that this function either returns an error code or calls the completion function. Make alua_activate() call the completion function even if scsi_device_get() fails. Signed-off-by: Bart Van Assche Cc: Hannes Reinecke Cc: Tang Junhui Cc: Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/device_handler/scsi_dh_alua.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index e0b15f3dd303..b6849d3ecefe 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -113,7 +113,7 @@ struct alua_queue_data { #define ALUA_POLICY_SWITCH_ALL 1 static void alua_rtpg_work(struct work_struct *work); -static void alua_rtpg_queue(struct alua_port_group *pg, +static bool alua_rtpg_queue(struct alua_port_group *pg, struct scsi_device *sdev, struct alua_queue_data *qdata, bool force); static void alua_check(struct scsi_device *sdev, bool force); @@ -862,7 +862,13 @@ static void alua_rtpg_work(struct work_struct *work) kref_put(&pg->kref, release_port_group); } -static void alua_rtpg_queue(struct alua_port_group *pg, +/** + * alua_rtpg_queue() - cause RTPG to be submitted asynchronously + * + * Returns true if and only if alua_rtpg_work() will be called asynchronously. + * That function is responsible for calling @qdata->fn(). + */ +static bool alua_rtpg_queue(struct alua_port_group *pg, struct scsi_device *sdev, struct alua_queue_data *qdata, bool force) { @@ -871,7 +877,7 @@ static void alua_rtpg_queue(struct alua_port_group *pg, struct workqueue_struct *alua_wq = kaluad_wq; if (!pg || scsi_device_get(sdev)) - return; + return false; spin_lock_irqsave(&pg->lock, flags); if (qdata) { @@ -907,6 +913,8 @@ static void alua_rtpg_queue(struct alua_port_group *pg, } if (sdev) scsi_device_put(sdev); + + return true; } /* @@ -1007,11 +1015,13 @@ static int alua_activate(struct scsi_device *sdev, mutex_unlock(&h->init_mutex); goto out; } - fn = NULL; rcu_read_unlock(); mutex_unlock(&h->init_mutex); - alua_rtpg_queue(pg, sdev, qdata, true); + if (alua_rtpg_queue(pg, sdev, qdata, true)) + fn = NULL; + else + err = SCSI_DH_DEV_OFFLINED; kref_put(&pg->kref, release_port_group); out: if (fn) -- cgit v1.2.3 From 0aeccdfe220c360ab888816da06b8eb67d910ff6 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 17 Mar 2017 17:02:03 -0700 Subject: scsi: scsi_dh_alua: Warn if the first argument of alua_rtpg_queue() is NULL Callers must provide a valid port group to alua_rtpg_queue(). Issue a kernel warning if that is not the case. Signed-off-by: Bart Van Assche Cc: Hannes Reinecke Cc: Tang Junhui Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/device_handler/scsi_dh_alua.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index b6849d3ecefe..c01b47e5b55a 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -876,7 +876,7 @@ static bool alua_rtpg_queue(struct alua_port_group *pg, unsigned long flags; struct workqueue_struct *alua_wq = kaluad_wq; - if (!pg || scsi_device_get(sdev)) + if (WARN_ON_ONCE(!pg) || scsi_device_get(sdev)) return false; spin_lock_irqsave(&pg->lock, flags); -- cgit v1.2.3 From 452b94b8c8c7eb7dd0d0fa9a9776e0d02cd73b97 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 19 Mar 2017 19:00:47 -0700 Subject: mm/swap: don't BUG_ON() due to uninitialized swap slot cache This BUG_ON() triggered for me once at shutdown, and I don't see a reason for the check. The code correctly checks whether the swap slot cache is usable or not, so an uninitialized swap slot cache is not actually problematic afaik. I've temporarily just switched the BUG_ON() to a WARN_ON_ONCE(), since I'm not sure why that seemingly pointless check was there. I suspect the real fix is to just remove it entirely, but for now we'll warn about it but not bring the machine down. Cc: "Huang, Ying" Cc: Tim Chen Cc: Michal Hocko Cc: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swap_slots.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/swap_slots.c b/mm/swap_slots.c index 9b5bc86f96ad..7ebb23836f68 100644 --- a/mm/swap_slots.c +++ b/mm/swap_slots.c @@ -267,7 +267,7 @@ int free_swap_slot(swp_entry_t entry) { struct swap_slots_cache *cache; - BUG_ON(!swap_slot_cache_initialized); + WARN_ON_ONCE(!swap_slot_cache_initialized); cache = &get_cpu_var(swp_slots); if (use_swap_slot_cache && cache->slots_ret) { -- cgit v1.2.3 From 97da3854c526d3a6ee05c849c96e48d21527606c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 19 Mar 2017 19:09:39 -0700 Subject: Linux 4.11-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b841fb36beb2..b2faa9319372 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit v1.2.3 From 93a15b58cfb8a24e666ffca432f19fe65c1cd7d1 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Sun, 19 Mar 2017 20:38:40 -0600 Subject: drm/i915/kvmgt: Hold struct kvm reference The kvmgt code keeps a pointer to the struct kvm associated with the device, but doesn't actually hold a reference to it. If we do unclean shutdown testing (ie. killing the user process), then we can see the kvm association to the device unset, which causes kvmgt to trigger a device release via a work queue. Naturally we cannot guarantee that the cached struct kvm pointer is still valid at this point without holding a reference. The observed failure in this case is a stuck cpu trying to acquire the spinlock from the invalid reference, but other failure modes are clearly possible. Hold a reference to avoid this. Signed-off-by: Alex Williamson Cc: stable@vger.kernel.org #v4.10 Cc: Jike Song Cc: Paolo Bonzini Cc: Zhenyu Wang Cc: Zhi Wang Reviewed-by: Jike Song Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/kvmgt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 1ea3eb270de8..d641214578a7 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1326,6 +1326,7 @@ static int kvmgt_guest_init(struct mdev_device *mdev) vgpu->handle = (unsigned long)info; info->vgpu = vgpu; info->kvm = kvm; + kvm_get_kvm(info->kvm); kvmgt_protect_table_init(info); gvt_cache_init(vgpu); @@ -1347,6 +1348,7 @@ static bool kvmgt_guest_exit(struct kvmgt_guest_info *info) } kvm_page_track_unregister_notifier(info->kvm, &info->track_node); + kvm_put_kvm(info->kvm); kvmgt_protect_table_destroy(info); gvt_cache_destroy(info->vgpu); vfree(info); -- cgit v1.2.3 From 0cdefd5b5485ee6eb3512a75739d09a4090176ed Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 18 Mar 2017 21:53:20 -0700 Subject: ARM: dts: sun7i: lamobo-r1: Fix CPU port RGMII settings The CPU port of the BCM53125 is configured with RGMII (no delays) but this should actually be RGMII with transmit delay (rgmii-txid) because STMMAC takes care of inserting the transmitter delay. This fixes occasional packet loss encountered. Fixes: d7b9eaff5f0c ("ARM: dts: sun7i: Add BCM53125 switch nodes to the lamobo-r1 board") Reported-by: Hartmut Knaack Signed-off-by: Florian Fainelli Signed-off-by: Maxime Ripard --- arch/arm/boot/dts/sun7i-a20-lamobo-r1.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun7i-a20-lamobo-r1.dts b/arch/arm/boot/dts/sun7i-a20-lamobo-r1.dts index 72ec0d5ae052..bbf1c8cbaac6 100644 --- a/arch/arm/boot/dts/sun7i-a20-lamobo-r1.dts +++ b/arch/arm/boot/dts/sun7i-a20-lamobo-r1.dts @@ -167,7 +167,7 @@ reg = <8>; label = "cpu"; ethernet = <&gmac>; - phy-mode = "rgmii"; + phy-mode = "rgmii-txid"; fixed-link { speed = <1000>; full-duplex; -- cgit v1.2.3 From 9693219aa61dc7a75ac015e5c011e889cb821eec Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Sat, 18 Mar 2017 05:23:15 +0800 Subject: ARM: sun8i: a23/a33: drop bl_en_pin GPIO pinmux in reference design DTSI The bl_en_pin GPIO pinmux is configured as "gpio_in", which makes it conflicts with the real GPIO usage (out), and makes the backlight not usable. Drop the GPIO pinmux for it, thus this GPIO can be correctly used. Signed-off-by: Icenowy Zheng Signed-off-by: Maxime Ripard --- arch/arm/boot/dts/sun8i-reference-design-tablet.dtsi | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/arm/boot/dts/sun8i-reference-design-tablet.dtsi b/arch/arm/boot/dts/sun8i-reference-design-tablet.dtsi index 7097c18ff487..d6bd15898db6 100644 --- a/arch/arm/boot/dts/sun8i-reference-design-tablet.dtsi +++ b/arch/arm/boot/dts/sun8i-reference-design-tablet.dtsi @@ -50,8 +50,6 @@ backlight: backlight { compatible = "pwm-backlight"; - pinctrl-names = "default"; - pinctrl-0 = <&bl_en_pin>; pwms = <&pwm 0 50000 PWM_POLARITY_INVERTED>; brightness-levels = <0 10 20 30 40 50 60 70 80 90 100>; default-brightness-level = <8>; @@ -93,11 +91,6 @@ }; &pio { - bl_en_pin: bl_en_pin@0 { - pins = "PH6"; - function = "gpio_in"; - }; - mmc0_cd_pin: mmc0_cd_pin@0 { pins = "PB4"; function = "gpio_in"; -- cgit v1.2.3 From 975629c3f76ce3d86e1f943db9847e0312a98daf Mon Sep 17 00:00:00 2001 From: Pei Zhang Date: Mon, 20 Mar 2017 23:49:19 +0800 Subject: drm/i915/gvt: add write handler for mmio mbctl Guest will write mmio mbctl which need a special handler in gvt to clear the bit 4 to inidcate the write operation success. V2: use bit definition macro to make code readable. Signed-off-by: Pei Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index eaff45d417e8..6da9ae1618e3 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -970,6 +970,14 @@ static int dp_aux_ch_ctl_mmio_write(struct intel_vgpu *vgpu, return 0; } +static int mbctl_write(struct intel_vgpu *vgpu, unsigned int offset, + void *p_data, unsigned int bytes) +{ + *(u32 *)p_data &= (~GEN6_MBCTL_ENABLE_BOOT_FETCH); + write_vreg(vgpu, offset, p_data, bytes); + return 0; +} + static int vga_control_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { @@ -2238,7 +2246,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(0x7180, D_ALL); MMIO_D(0x7408, D_ALL); MMIO_D(0x7c00, D_ALL); - MMIO_D(GEN6_MBCTL, D_ALL); + MMIO_DH(GEN6_MBCTL, D_ALL, NULL, mbctl_write); MMIO_D(0x911c, D_ALL); MMIO_D(0x9120, D_ALL); MMIO_DFH(GEN7_UCGCTL4, D_ALL, F_CMD_ACCESS, NULL, NULL); -- cgit v1.2.3 From fe686babf4cf62b9b214b99847c735baa35a9fa2 Mon Sep 17 00:00:00 2001 From: Philipp Tomsich Date: Wed, 15 Mar 2017 12:23:58 +0100 Subject: clk: sunxi-ng: Fix div/mult settings for osc12M on A64 The mult/div for osc12M was previously backwards (giving a 48M rate for osc12M). Fix it. Signed-off-by: Philipp Tomsich Tested-by: Christoph Muellner Signed-off-by: Maxime Ripard --- drivers/clk/sunxi-ng/ccu-sun50i-a64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c index e3c084cc6da5..f54114c607df 100644 --- a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c +++ b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c @@ -566,7 +566,7 @@ static SUNXI_CCU_M_WITH_GATE(gpu_clk, "gpu", "pll-gpu", 0x1a0, 0, 3, BIT(31), CLK_SET_RATE_PARENT); /* Fixed Factor clocks */ -static CLK_FIXED_FACTOR(osc12M_clk, "osc12M", "osc24M", 1, 2, 0); +static CLK_FIXED_FACTOR(osc12M_clk, "osc12M", "osc24M", 2, 1, 0); /* We hardcode the divider to 4 for now */ static CLK_FIXED_FACTOR(pll_audio_clk, "pll-audio", -- cgit v1.2.3 From f363a06642f28caaa78cb6446bbad90c73fe183c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 20 Mar 2017 10:08:19 +0100 Subject: ALSA: ctxfi: Fix the incorrect check of dma_set_mask() call In the commit [15c75b09f8d1: ALSA: ctxfi: Fallback DMA mask to 32bit], I forgot to put "!" at dam_set_mask() call check in cthw20k1.c (while cthw20k2.c is OK). This patch fixes that obvious bug. (As a side note: although the original commit was completely wrong, it's still working for most of machines, as it sets to 32bit DMA mask in the end. So the bug severity is low.) Fixes: 15c75b09f8d1 ("ALSA: ctxfi: Fallback DMA mask to 32bit") Cc: Signed-off-by: Takashi Iwai --- sound/pci/ctxfi/cthw20k1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/ctxfi/cthw20k1.c b/sound/pci/ctxfi/cthw20k1.c index ab4cdab5cfa5..79edd88d5cd0 100644 --- a/sound/pci/ctxfi/cthw20k1.c +++ b/sound/pci/ctxfi/cthw20k1.c @@ -1905,7 +1905,7 @@ static int hw_card_start(struct hw *hw) return err; /* Set DMA transfer mask */ - if (dma_set_mask(&pci->dev, DMA_BIT_MASK(dma_bits))) { + if (!dma_set_mask(&pci->dev, DMA_BIT_MASK(dma_bits))) { dma_set_coherent_mask(&pci->dev, DMA_BIT_MASK(dma_bits)); } else { dma_set_mask(&pci->dev, DMA_BIT_MASK(32)); -- cgit v1.2.3 From 07f5ab6002a4f0b633f3495157166f9f6180871b Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Thu, 23 Feb 2017 08:57:26 +0530 Subject: cxl: Route eeh events to all slices for pci_channel_io_perm_failure state Fix a boundary condition where in some cases an eeh event with state == pci_channel_io_perm_failure wont be passed on to a driver attached to the virtual PCI device associated with a slice. This will happen in case the slice just before (n-1) doesn't have any vPHB bus associated with it, that results in an early return from cxl_pci_error_detected() callback. With state == pci_channel_io_perm_failure, the adapter will be removed irrespective of the return value of cxl_vphb_error_detected(). So we now always return PCI_ERS_RESULT_DISCONNECTED for this case i.e even if the AFU isn't using a vPHB (currently returns PCI_ERS_RESULT_NONE). Fixes: e4f5fc001a6("cxl: Do not create vPHB if there are no AFU configuration records") Signed-off-by: Vaibhav Jain Reviewed-by: Matthew R. Ochs Reviewed-by: Andrew Donnellan Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman --- drivers/misc/cxl/pci.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 91f645992c94..b27ea98b781f 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1792,15 +1792,14 @@ static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev, /* If we're permanently dead, give up. */ if (state == pci_channel_io_perm_failure) { - /* Tell the AFU drivers; but we don't care what they - * say, we're going away. - */ for (i = 0; i < adapter->slices; i++) { afu = adapter->afu[i]; - /* Only participate in EEH if we are on a virtual PHB */ - if (afu->phb == NULL) - return PCI_ERS_RESULT_NONE; - cxl_vphb_error_detected(afu, state); + /* + * Tell the AFU drivers; but we don't care what they + * say, we're going away. + */ + if (afu->phb != NULL) + cxl_vphb_error_detected(afu, state); } return PCI_ERS_RESULT_DISCONNECT; } -- cgit v1.2.3 From b467e08a15563dede0d37d3233baa24fb97a7310 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Sat, 18 Mar 2017 04:19:43 +0800 Subject: clk: sunxi-ng: fix recalc_rate formula of NKMP clocks In commit e66f81bbd746 ("clk: sunxi-ng: Implement factors offsets"), the final formula of NKMP clocks' recalc_rate is refactored; however, the refactored formula broke the calculation due to some C language operand priority problem -- the priority of operand >> is lower than * and /, makes the formula being parsed as "(parent_rate * n * k) >> (p / m)", but it should be "(parent_rate * n * k >> p) / m". Add the pair of parentheses to fix up this issue. This pair of parentheses used to exist in the old formula. Fixes: e66f81bbd746 ("clk: sunxi-ng: Implement factors offsets") Signed-off-by: Icenowy Zheng Signed-off-by: Maxime Ripard --- drivers/clk/sunxi-ng/ccu_nkmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/sunxi-ng/ccu_nkmp.c b/drivers/clk/sunxi-ng/ccu_nkmp.c index a2b40a000157..488055ed944f 100644 --- a/drivers/clk/sunxi-ng/ccu_nkmp.c +++ b/drivers/clk/sunxi-ng/ccu_nkmp.c @@ -107,7 +107,7 @@ static unsigned long ccu_nkmp_recalc_rate(struct clk_hw *hw, p = reg >> nkmp->p.shift; p &= (1 << nkmp->p.width) - 1; - return parent_rate * n * k >> p / m; + return (parent_rate * n * k >> p) / m; } static long ccu_nkmp_round_rate(struct clk_hw *hw, unsigned long rate, -- cgit v1.2.3 From 6d98ce0be541d4a3cfbb52cd75072c0339ebb500 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 17 Mar 2017 15:13:20 +1000 Subject: powerpc/64s: Fix idle wakeup potential to clobber registers We concluded there may be a window where the idle wakeup code could get to pnv_wakeup_tb_loss() (which clobbers non-volatile GPRs), but the hardware may set SRR1[46:47] to 01b (no state loss) which would result in the wakeup code failing to restore non-volatile GPRs. I was not able to trigger this condition with trivial tests on real hardware or simulator, but the ISA (at least 2.07) seems to allow for it, and Gautham says that it can happen if there is an exception pending when the sleep/winkle instruction is executed. Fixes: 1706567117ba ("powerpc/kvm: make hypervisor state restore a function") Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Nicholas Piggin Acked-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/idle_book3s.S | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 995728736677..6fd08219248d 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -449,9 +449,23 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) _GLOBAL(pnv_wakeup_tb_loss) ld r1,PACAR1(r13) /* - * Before entering any idle state, the NVGPRs are saved in the stack - * and they are restored before switching to the process context. Hence - * until they are restored, they are free to be used. + * Before entering any idle state, the NVGPRs are saved in the stack. + * If there was a state loss, or PACA_NAPSTATELOST was set, then the + * NVGPRs are restored. If we are here, it is likely that state is lost, + * but not guaranteed -- neither ISA207 nor ISA300 tests to reach + * here are the same as the test to restore NVGPRS: + * PACA_THREAD_IDLE_STATE test for ISA207, PSSCR test for ISA300, + * and SRR1 test for restoring NVGPRs. + * + * We are about to clobber NVGPRs now, so set NAPSTATELOST to + * guarantee they will always be restored. This might be tightened + * with careful reading of specs (particularly for ISA300) but this + * is already a slow wakeup path and it's simpler to be safe. + */ + li r0,1 + stb r0,PACA_NAPSTATELOST(r13) + + /* * * Save SRR1 and LR in NVGPRs as they might be clobbered in * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required -- cgit v1.2.3 From 066def56dc712329561abadcea15be9cad7393b6 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 2 Jan 2017 13:51:43 +0100 Subject: m68k/bitops: Correct signature of test_bit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mm/filemap.c: In function ‘clear_bit_unlock_is_negative_byte’: mm/filemap.c:933: warning: passing argument 2 of ‘test_bit’ discards qualifiers from pointer target type Make the bitmask pointed to by the "vaddr" parameter volatile to fix this, like is done on other architectures. Signed-off-by: Geert Uytterhoeven --- arch/m68k/include/asm/bitops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h index b4a9b0d5928d..dda58cfe8c22 100644 --- a/arch/m68k/include/asm/bitops.h +++ b/arch/m68k/include/asm/bitops.h @@ -148,7 +148,7 @@ static inline void bfchg_mem_change_bit(int nr, volatile unsigned long *vaddr) #define __change_bit(nr, vaddr) change_bit(nr, vaddr) -static inline int test_bit(int nr, const unsigned long *vaddr) +static inline int test_bit(int nr, const volatile unsigned long *vaddr) { return (vaddr[nr >> 5] & (1UL << (nr & 31))) != 0; } -- cgit v1.2.3 From 3820ed470e71d7aa3d355495f7fd1cd8457e7c96 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 6 Mar 2017 10:40:02 +0100 Subject: m68k/defconfig: Update defconfigs for v4.11-rc1 Signed-off-by: Geert Uytterhoeven --- arch/m68k/configs/amiga_defconfig | 14 +++++++++++++- arch/m68k/configs/apollo_defconfig | 14 +++++++++++++- arch/m68k/configs/atari_defconfig | 14 +++++++++++++- arch/m68k/configs/bvme6000_defconfig | 14 +++++++++++++- arch/m68k/configs/hp300_defconfig | 14 +++++++++++++- arch/m68k/configs/mac_defconfig | 14 +++++++++++++- arch/m68k/configs/multi_defconfig | 14 +++++++++++++- arch/m68k/configs/mvme147_defconfig | 14 +++++++++++++- arch/m68k/configs/mvme16x_defconfig | 14 +++++++++++++- arch/m68k/configs/q40_defconfig | 14 +++++++++++++- arch/m68k/configs/sun3_defconfig | 14 +++++++++++++- arch/m68k/configs/sun3x_defconfig | 14 +++++++++++++- 12 files changed, 156 insertions(+), 12 deletions(-) diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 048bf076f7df..531cb9eb3319 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -25,6 +25,7 @@ CONFIG_SUN_PARTITION=y # CONFIG_EFI_PARTITION is not set CONFIG_SYSV68_PARTITION=y CONFIG_IOSCHED_DEADLINE=m +CONFIG_MQ_IOSCHED_DEADLINE=m CONFIG_KEXEC=y CONFIG_BOOTINFO_PROC=y CONFIG_M68020=y @@ -60,6 +61,7 @@ CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_INET_AH=m CONFIG_INET_ESP=m +CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m @@ -71,6 +73,7 @@ CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m +CONFIG_INET6_ESP_OFFLOAD=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m @@ -101,6 +104,7 @@ CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_SET_RBTREE=m CONFIG_NFT_SET_HASH=m +CONFIG_NFT_SET_BITMAP=m CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -298,6 +302,8 @@ CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set +CONFIG_PSAMPLE=m +CONFIG_NET_IFE=m CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -371,6 +377,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m CONFIG_MACVLAN=m CONFIG_MACVTAP=m CONFIG_IPVLAN=m +CONFIG_IPVTAP=m CONFIG_VXLAN=m CONFIG_GENEVE=m CONFIG_GTP=m @@ -383,6 +390,7 @@ CONFIG_VETH=m # CONFIG_NET_VENDOR_AMAZON is not set CONFIG_A2065=y CONFIG_ARIADNE=y +# CONFIG_NET_VENDOR_AQUANTIA is not set # CONFIG_NET_VENDOR_ARC is not set # CONFIG_NET_CADENCE is not set # CONFIG_NET_VENDOR_BROADCOM is not set @@ -404,7 +412,6 @@ CONFIG_ZORRO8390=y # CONFIG_NET_VENDOR_SOLARFLARE is not set # CONFIG_NET_VENDOR_SMSC is not set # CONFIG_NET_VENDOR_STMICRO is not set -# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -564,6 +571,8 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y +CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m @@ -594,6 +603,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m +CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m @@ -605,6 +615,7 @@ CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m @@ -629,4 +640,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set +CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index d4de24963f5f..ca91d39555da 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -26,6 +26,7 @@ CONFIG_SUN_PARTITION=y # CONFIG_EFI_PARTITION is not set CONFIG_SYSV68_PARTITION=y CONFIG_IOSCHED_DEADLINE=m +CONFIG_MQ_IOSCHED_DEADLINE=m CONFIG_KEXEC=y CONFIG_BOOTINFO_PROC=y CONFIG_M68020=y @@ -58,6 +59,7 @@ CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_INET_AH=m CONFIG_INET_ESP=m +CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m @@ -69,6 +71,7 @@ CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m +CONFIG_INET6_ESP_OFFLOAD=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m @@ -99,6 +102,7 @@ CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_SET_RBTREE=m CONFIG_NFT_SET_HASH=m +CONFIG_NFT_SET_BITMAP=m CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -296,6 +300,8 @@ CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set +CONFIG_PSAMPLE=m +CONFIG_NET_IFE=m CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -353,6 +359,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m CONFIG_MACVLAN=m CONFIG_MACVTAP=m CONFIG_IPVLAN=m +CONFIG_IPVTAP=m CONFIG_VXLAN=m CONFIG_GENEVE=m CONFIG_GTP=m @@ -362,6 +369,7 @@ CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_VETH=m # CONFIG_NET_VENDOR_ALACRITECH is not set # CONFIG_NET_VENDOR_AMAZON is not set +# CONFIG_NET_VENDOR_AQUANTIA is not set # CONFIG_NET_VENDOR_ARC is not set # CONFIG_NET_CADENCE is not set # CONFIG_NET_VENDOR_BROADCOM is not set @@ -378,7 +386,6 @@ CONFIG_VETH=m # CONFIG_NET_VENDOR_SEEQ is not set # CONFIG_NET_VENDOR_SOLARFLARE is not set # CONFIG_NET_VENDOR_STMICRO is not set -# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -523,6 +530,8 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y +CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m @@ -553,6 +562,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m +CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m @@ -564,6 +574,7 @@ CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m @@ -588,4 +599,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set +CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index fc0fd3f871f3..23a3d8a691e2 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -25,6 +25,7 @@ CONFIG_SUN_PARTITION=y # CONFIG_EFI_PARTITION is not set CONFIG_SYSV68_PARTITION=y CONFIG_IOSCHED_DEADLINE=m +CONFIG_MQ_IOSCHED_DEADLINE=m CONFIG_KEXEC=y CONFIG_BOOTINFO_PROC=y CONFIG_M68020=y @@ -58,6 +59,7 @@ CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_INET_AH=m CONFIG_INET_ESP=m +CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m @@ -69,6 +71,7 @@ CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m +CONFIG_INET6_ESP_OFFLOAD=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m @@ -99,6 +102,7 @@ CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_SET_RBTREE=m CONFIG_NFT_SET_HASH=m +CONFIG_NFT_SET_BITMAP=m CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -296,6 +300,8 @@ CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set +CONFIG_PSAMPLE=m +CONFIG_NET_IFE=m CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -362,6 +368,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m CONFIG_MACVLAN=m CONFIG_MACVTAP=m CONFIG_IPVLAN=m +CONFIG_IPVTAP=m CONFIG_VXLAN=m CONFIG_GENEVE=m CONFIG_GTP=m @@ -372,6 +379,7 @@ CONFIG_VETH=m # CONFIG_NET_VENDOR_ALACRITECH is not set # CONFIG_NET_VENDOR_AMAZON is not set CONFIG_ATARILANCE=y +# CONFIG_NET_VENDOR_AQUANTIA is not set # CONFIG_NET_VENDOR_ARC is not set # CONFIG_NET_CADENCE is not set # CONFIG_NET_VENDOR_BROADCOM is not set @@ -389,7 +397,6 @@ CONFIG_NE2000=y # CONFIG_NET_VENDOR_SOLARFLARE is not set CONFIG_SMC91X=y # CONFIG_NET_VENDOR_STMICRO is not set -# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -544,6 +551,8 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y +CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m @@ -574,6 +583,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m +CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m @@ -585,6 +595,7 @@ CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m @@ -609,4 +620,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set +CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 52e984a0aa69..95deb95140fe 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -25,6 +25,7 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_SUN_PARTITION=y # CONFIG_EFI_PARTITION is not set CONFIG_IOSCHED_DEADLINE=m +CONFIG_MQ_IOSCHED_DEADLINE=m CONFIG_KEXEC=y CONFIG_BOOTINFO_PROC=y CONFIG_M68040=y @@ -56,6 +57,7 @@ CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_INET_AH=m CONFIG_INET_ESP=m +CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m @@ -67,6 +69,7 @@ CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m +CONFIG_INET6_ESP_OFFLOAD=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m @@ -97,6 +100,7 @@ CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_SET_RBTREE=m CONFIG_NFT_SET_HASH=m +CONFIG_NFT_SET_BITMAP=m CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -294,6 +298,8 @@ CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set +CONFIG_PSAMPLE=m +CONFIG_NET_IFE=m CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -352,6 +358,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m CONFIG_MACVLAN=m CONFIG_MACVTAP=m CONFIG_IPVLAN=m +CONFIG_IPVTAP=m CONFIG_VXLAN=m CONFIG_GENEVE=m CONFIG_GTP=m @@ -361,6 +368,7 @@ CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_VETH=m # CONFIG_NET_VENDOR_ALACRITECH is not set # CONFIG_NET_VENDOR_AMAZON is not set +# CONFIG_NET_VENDOR_AQUANTIA is not set # CONFIG_NET_VENDOR_ARC is not set # CONFIG_NET_CADENCE is not set # CONFIG_NET_VENDOR_BROADCOM is not set @@ -377,7 +385,6 @@ CONFIG_BVME6000_NET=y # CONFIG_NET_VENDOR_SEEQ is not set # CONFIG_NET_VENDOR_SOLARFLARE is not set # CONFIG_NET_VENDOR_STMICRO is not set -# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -515,6 +522,8 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y +CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m @@ -545,6 +554,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m +CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m @@ -556,6 +566,7 @@ CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m @@ -580,4 +591,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set +CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index aaeed4422cc9..afae6958db2d 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -26,6 +26,7 @@ CONFIG_SUN_PARTITION=y # CONFIG_EFI_PARTITION is not set CONFIG_SYSV68_PARTITION=y CONFIG_IOSCHED_DEADLINE=m +CONFIG_MQ_IOSCHED_DEADLINE=m CONFIG_KEXEC=y CONFIG_BOOTINFO_PROC=y CONFIG_M68020=y @@ -58,6 +59,7 @@ CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_INET_AH=m CONFIG_INET_ESP=m +CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m @@ -69,6 +71,7 @@ CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m +CONFIG_INET6_ESP_OFFLOAD=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m @@ -99,6 +102,7 @@ CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_SET_RBTREE=m CONFIG_NFT_SET_HASH=m +CONFIG_NFT_SET_BITMAP=m CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -296,6 +300,8 @@ CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set +CONFIG_PSAMPLE=m +CONFIG_NET_IFE=m CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -353,6 +359,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m CONFIG_MACVLAN=m CONFIG_MACVTAP=m CONFIG_IPVLAN=m +CONFIG_IPVTAP=m CONFIG_VXLAN=m CONFIG_GENEVE=m CONFIG_GTP=m @@ -363,6 +370,7 @@ CONFIG_VETH=m # CONFIG_NET_VENDOR_ALACRITECH is not set # CONFIG_NET_VENDOR_AMAZON is not set CONFIG_HPLANCE=y +# CONFIG_NET_VENDOR_AQUANTIA is not set # CONFIG_NET_VENDOR_ARC is not set # CONFIG_NET_CADENCE is not set # CONFIG_NET_VENDOR_BROADCOM is not set @@ -379,7 +387,6 @@ CONFIG_HPLANCE=y # CONFIG_NET_VENDOR_SEEQ is not set # CONFIG_NET_VENDOR_SOLARFLARE is not set # CONFIG_NET_VENDOR_STMICRO is not set -# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -525,6 +532,8 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y +CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m @@ -555,6 +564,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m +CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m @@ -566,6 +576,7 @@ CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m @@ -590,4 +601,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set +CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 3bbc9b2f0dac..b010734729a7 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -25,6 +25,7 @@ CONFIG_SUN_PARTITION=y # CONFIG_EFI_PARTITION is not set CONFIG_SYSV68_PARTITION=y CONFIG_IOSCHED_DEADLINE=m +CONFIG_MQ_IOSCHED_DEADLINE=m CONFIG_KEXEC=y CONFIG_BOOTINFO_PROC=y CONFIG_M68020=y @@ -57,6 +58,7 @@ CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_INET_AH=m CONFIG_INET_ESP=m +CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m @@ -68,6 +70,7 @@ CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m +CONFIG_INET6_ESP_OFFLOAD=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m @@ -98,6 +101,7 @@ CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_SET_RBTREE=m CONFIG_NFT_SET_HASH=m +CONFIG_NFT_SET_BITMAP=m CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -298,6 +302,8 @@ CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set +CONFIG_PSAMPLE=m +CONFIG_NET_IFE=m CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -369,6 +375,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m CONFIG_MACVLAN=m CONFIG_MACVTAP=m CONFIG_IPVLAN=m +CONFIG_IPVTAP=m CONFIG_VXLAN=m CONFIG_GENEVE=m CONFIG_GTP=m @@ -379,6 +386,7 @@ CONFIG_VETH=m # CONFIG_NET_VENDOR_ALACRITECH is not set # CONFIG_NET_VENDOR_AMAZON is not set CONFIG_MACMACE=y +# CONFIG_NET_VENDOR_AQUANTIA is not set # CONFIG_NET_VENDOR_ARC is not set # CONFIG_NET_CADENCE is not set # CONFIG_NET_VENDOR_BROADCOM is not set @@ -398,7 +406,6 @@ CONFIG_MAC8390=y # CONFIG_NET_VENDOR_SOLARFLARE is not set # CONFIG_NET_VENDOR_SMSC is not set # CONFIG_NET_VENDOR_STMICRO is not set -# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -547,6 +554,8 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y +CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m @@ -577,6 +586,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m +CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m @@ -588,6 +598,7 @@ CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m @@ -612,4 +623,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set +CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 8f2c0decb2f8..0e414549b235 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -21,6 +21,7 @@ CONFIG_SOLARIS_X86_PARTITION=y CONFIG_UNIXWARE_DISKLABEL=y # CONFIG_EFI_PARTITION is not set CONFIG_IOSCHED_DEADLINE=m +CONFIG_MQ_IOSCHED_DEADLINE=m CONFIG_KEXEC=y CONFIG_BOOTINFO_PROC=y CONFIG_M68020=y @@ -67,6 +68,7 @@ CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_INET_AH=m CONFIG_INET_ESP=m +CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m @@ -78,6 +80,7 @@ CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m +CONFIG_INET6_ESP_OFFLOAD=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m @@ -108,6 +111,7 @@ CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_SET_RBTREE=m CONFIG_NFT_SET_HASH=m +CONFIG_NFT_SET_BITMAP=m CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -308,6 +312,8 @@ CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set +CONFIG_PSAMPLE=m +CONFIG_NET_IFE=m CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -402,6 +408,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m CONFIG_MACVLAN=m CONFIG_MACVTAP=m CONFIG_IPVLAN=m +CONFIG_IPVTAP=m CONFIG_VXLAN=m CONFIG_GENEVE=m CONFIG_GTP=m @@ -419,6 +426,7 @@ CONFIG_HPLANCE=y CONFIG_MVME147_NET=y CONFIG_SUN3LANCE=y CONFIG_MACMACE=y +# CONFIG_NET_VENDOR_AQUANTIA is not set # CONFIG_NET_VENDOR_ARC is not set # CONFIG_NET_CADENCE is not set # CONFIG_NET_VENDOR_BROADCOM is not set @@ -444,7 +452,6 @@ CONFIG_ZORRO8390=y # CONFIG_NET_VENDOR_SOLARFLARE is not set CONFIG_SMC91X=y # CONFIG_NET_VENDOR_STMICRO is not set -# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PLIP=m @@ -627,6 +634,8 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y +CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m @@ -657,6 +666,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m +CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m @@ -668,6 +678,7 @@ CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m @@ -692,4 +703,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set +CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index c743dd22e96f..b2e687a0ec3d 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -25,6 +25,7 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_SUN_PARTITION=y # CONFIG_EFI_PARTITION is not set CONFIG_IOSCHED_DEADLINE=m +CONFIG_MQ_IOSCHED_DEADLINE=m CONFIG_KEXEC=y CONFIG_BOOTINFO_PROC=y CONFIG_M68030=y @@ -55,6 +56,7 @@ CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_INET_AH=m CONFIG_INET_ESP=m +CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m @@ -66,6 +68,7 @@ CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m +CONFIG_INET6_ESP_OFFLOAD=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m @@ -96,6 +99,7 @@ CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_SET_RBTREE=m CONFIG_NFT_SET_HASH=m +CONFIG_NFT_SET_BITMAP=m CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -293,6 +297,8 @@ CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set +CONFIG_PSAMPLE=m +CONFIG_NET_IFE=m CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -351,6 +357,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m CONFIG_MACVLAN=m CONFIG_MACVTAP=m CONFIG_IPVLAN=m +CONFIG_IPVTAP=m CONFIG_VXLAN=m CONFIG_GENEVE=m CONFIG_GTP=m @@ -361,6 +368,7 @@ CONFIG_VETH=m # CONFIG_NET_VENDOR_ALACRITECH is not set # CONFIG_NET_VENDOR_AMAZON is not set CONFIG_MVME147_NET=y +# CONFIG_NET_VENDOR_AQUANTIA is not set # CONFIG_NET_VENDOR_ARC is not set # CONFIG_NET_CADENCE is not set # CONFIG_NET_VENDOR_BROADCOM is not set @@ -377,7 +385,6 @@ CONFIG_MVME147_NET=y # CONFIG_NET_VENDOR_SEEQ is not set # CONFIG_NET_VENDOR_SOLARFLARE is not set # CONFIG_NET_VENDOR_STMICRO is not set -# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -515,6 +522,8 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y +CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m @@ -545,6 +554,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m +CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m @@ -556,6 +566,7 @@ CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m @@ -580,4 +591,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set +CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 2ccaca858f05..cbd8ee24d1bc 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -25,6 +25,7 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_SUN_PARTITION=y # CONFIG_EFI_PARTITION is not set CONFIG_IOSCHED_DEADLINE=m +CONFIG_MQ_IOSCHED_DEADLINE=m CONFIG_KEXEC=y CONFIG_BOOTINFO_PROC=y CONFIG_M68040=y @@ -56,6 +57,7 @@ CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_INET_AH=m CONFIG_INET_ESP=m +CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m @@ -67,6 +69,7 @@ CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m +CONFIG_INET6_ESP_OFFLOAD=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m @@ -97,6 +100,7 @@ CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_SET_RBTREE=m CONFIG_NFT_SET_HASH=m +CONFIG_NFT_SET_BITMAP=m CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -294,6 +298,8 @@ CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set +CONFIG_PSAMPLE=m +CONFIG_NET_IFE=m CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -352,6 +358,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m CONFIG_MACVLAN=m CONFIG_MACVTAP=m CONFIG_IPVLAN=m +CONFIG_IPVTAP=m CONFIG_VXLAN=m CONFIG_GENEVE=m CONFIG_GTP=m @@ -361,6 +368,7 @@ CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_VETH=m # CONFIG_NET_VENDOR_ALACRITECH is not set # CONFIG_NET_VENDOR_AMAZON is not set +# CONFIG_NET_VENDOR_AQUANTIA is not set # CONFIG_NET_VENDOR_ARC is not set # CONFIG_NET_CADENCE is not set # CONFIG_NET_VENDOR_BROADCOM is not set @@ -377,7 +385,6 @@ CONFIG_MVME16x_NET=y # CONFIG_NET_VENDOR_SEEQ is not set # CONFIG_NET_VENDOR_SOLARFLARE is not set # CONFIG_NET_VENDOR_STMICRO is not set -# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -515,6 +522,8 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y +CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m @@ -545,6 +554,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m +CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m @@ -556,6 +566,7 @@ CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m @@ -580,4 +591,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set +CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 5599f3fd5fcd..1e82cc944339 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -26,6 +26,7 @@ CONFIG_SUN_PARTITION=y # CONFIG_EFI_PARTITION is not set CONFIG_SYSV68_PARTITION=y CONFIG_IOSCHED_DEADLINE=m +CONFIG_MQ_IOSCHED_DEADLINE=m CONFIG_KEXEC=y CONFIG_BOOTINFO_PROC=y CONFIG_M68040=y @@ -56,6 +57,7 @@ CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_INET_AH=m CONFIG_INET_ESP=m +CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m @@ -67,6 +69,7 @@ CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m +CONFIG_INET6_ESP_OFFLOAD=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m @@ -97,6 +100,7 @@ CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_SET_RBTREE=m CONFIG_NFT_SET_HASH=m +CONFIG_NFT_SET_BITMAP=m CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -294,6 +298,8 @@ CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set +CONFIG_PSAMPLE=m +CONFIG_NET_IFE=m CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -358,6 +364,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m CONFIG_MACVLAN=m CONFIG_MACVTAP=m CONFIG_IPVLAN=m +CONFIG_IPVTAP=m CONFIG_VXLAN=m CONFIG_GENEVE=m CONFIG_GTP=m @@ -369,6 +376,7 @@ CONFIG_VETH=m # CONFIG_NET_VENDOR_ALACRITECH is not set # CONFIG_NET_VENDOR_AMAZON is not set # CONFIG_NET_VENDOR_AMD is not set +# CONFIG_NET_VENDOR_AQUANTIA is not set # CONFIG_NET_VENDOR_ARC is not set # CONFIG_NET_CADENCE is not set # CONFIG_NET_VENDOR_BROADCOM is not set @@ -388,7 +396,6 @@ CONFIG_NE2000=y # CONFIG_NET_VENDOR_SOLARFLARE is not set # CONFIG_NET_VENDOR_SMSC is not set # CONFIG_NET_VENDOR_STMICRO is not set -# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PLIP=m @@ -538,6 +545,8 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y +CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m @@ -568,6 +577,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m +CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m @@ -579,6 +589,7 @@ CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m @@ -603,4 +614,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set +CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 313bf0a562ad..f9e77f57a972 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -25,6 +25,7 @@ CONFIG_UNIXWARE_DISKLABEL=y # CONFIG_EFI_PARTITION is not set CONFIG_SYSV68_PARTITION=y CONFIG_IOSCHED_DEADLINE=m +CONFIG_MQ_IOSCHED_DEADLINE=m CONFIG_KEXEC=y CONFIG_BOOTINFO_PROC=y CONFIG_SUN3=y @@ -53,6 +54,7 @@ CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_INET_AH=m CONFIG_INET_ESP=m +CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m @@ -64,6 +66,7 @@ CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m +CONFIG_INET6_ESP_OFFLOAD=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m @@ -94,6 +97,7 @@ CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_SET_RBTREE=m CONFIG_NFT_SET_HASH=m +CONFIG_NFT_SET_BITMAP=m CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -291,6 +295,8 @@ CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set +CONFIG_PSAMPLE=m +CONFIG_NET_IFE=m CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -349,6 +355,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m CONFIG_MACVLAN=m CONFIG_MACVTAP=m CONFIG_IPVLAN=m +CONFIG_IPVTAP=m CONFIG_VXLAN=m CONFIG_GENEVE=m CONFIG_GTP=m @@ -359,6 +366,7 @@ CONFIG_VETH=m # CONFIG_NET_VENDOR_ALACRITECH is not set # CONFIG_NET_VENDOR_AMAZON is not set CONFIG_SUN3LANCE=y +# CONFIG_NET_VENDOR_AQUANTIA is not set # CONFIG_NET_VENDOR_ARC is not set # CONFIG_NET_CADENCE is not set # CONFIG_NET_VENDOR_EZCHIP is not set @@ -375,7 +383,6 @@ CONFIG_SUN3_82586=y # CONFIG_NET_VENDOR_SOLARFLARE is not set # CONFIG_NET_VENDOR_STMICRO is not set # CONFIG_NET_VENDOR_SUN is not set -# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -517,6 +524,8 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y +CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m @@ -546,6 +555,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m +CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m @@ -557,6 +567,7 @@ CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m @@ -581,4 +592,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set +CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 38b61365f769..3c394fcfb368 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -25,6 +25,7 @@ CONFIG_UNIXWARE_DISKLABEL=y # CONFIG_EFI_PARTITION is not set CONFIG_SYSV68_PARTITION=y CONFIG_IOSCHED_DEADLINE=m +CONFIG_MQ_IOSCHED_DEADLINE=m CONFIG_KEXEC=y CONFIG_BOOTINFO_PROC=y CONFIG_SUN3X=y @@ -53,6 +54,7 @@ CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_INET_AH=m CONFIG_INET_ESP=m +CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m CONFIG_INET_XFRM_MODE_TRANSPORT=m CONFIG_INET_XFRM_MODE_TUNNEL=m @@ -64,6 +66,7 @@ CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m +CONFIG_INET6_ESP_OFFLOAD=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m @@ -94,6 +97,7 @@ CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_SET_RBTREE=m CONFIG_NFT_SET_HASH=m +CONFIG_NFT_SET_BITMAP=m CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -291,6 +295,8 @@ CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set +CONFIG_PSAMPLE=m +CONFIG_NET_IFE=m CONFIG_NET_DEVLINK=m # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -349,6 +355,7 @@ CONFIG_NET_TEAM_MODE_LOADBALANCE=m CONFIG_MACVLAN=m CONFIG_MACVTAP=m CONFIG_IPVLAN=m +CONFIG_IPVTAP=m CONFIG_VXLAN=m CONFIG_GENEVE=m CONFIG_GTP=m @@ -359,6 +366,7 @@ CONFIG_VETH=m # CONFIG_NET_VENDOR_ALACRITECH is not set # CONFIG_NET_VENDOR_AMAZON is not set CONFIG_SUN3LANCE=y +# CONFIG_NET_VENDOR_AQUANTIA is not set # CONFIG_NET_VENDOR_ARC is not set # CONFIG_NET_CADENCE is not set # CONFIG_NET_VENDOR_BROADCOM is not set @@ -375,7 +383,6 @@ CONFIG_SUN3LANCE=y # CONFIG_NET_VENDOR_SEEQ is not set # CONFIG_NET_VENDOR_SOLARFLARE is not set # CONFIG_NET_VENDOR_STMICRO is not set -# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -517,6 +524,8 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y +CONFIG_WW_MUTEX_SELFTEST=m +CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m @@ -547,6 +556,7 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m +CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_MICHAEL_MIC=m @@ -558,6 +568,7 @@ CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m @@ -582,4 +593,5 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m # CONFIG_CRYPTO_HW is not set +CONFIG_CRC32_SELFTEST=m CONFIG_XZ_DEC_TEST=m -- cgit v1.2.3 From e3b1ebd673876f2cdb0b84205e52a33b94a9860f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 6 Mar 2017 11:04:22 +0100 Subject: m68k: Wire up statx Signed-off-by: Geert Uytterhoeven --- arch/m68k/include/asm/unistd.h | 2 +- arch/m68k/include/uapi/asm/unistd.h | 1 + arch/m68k/kernel/syscalltable.S | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h index a857d82ec509..aab1edd0d4ba 100644 --- a/arch/m68k/include/asm/unistd.h +++ b/arch/m68k/include/asm/unistd.h @@ -4,7 +4,7 @@ #include -#define NR_syscalls 379 +#define NR_syscalls 380 #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_OLD_STAT diff --git a/arch/m68k/include/uapi/asm/unistd.h b/arch/m68k/include/uapi/asm/unistd.h index 9fe674bf911f..25589f5b8669 100644 --- a/arch/m68k/include/uapi/asm/unistd.h +++ b/arch/m68k/include/uapi/asm/unistd.h @@ -384,5 +384,6 @@ #define __NR_copy_file_range 376 #define __NR_preadv2 377 #define __NR_pwritev2 378 +#define __NR_statx 379 #endif /* _UAPI_ASM_M68K_UNISTD_H_ */ diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S index d6fd6d9ced24..8c9fcfafe0dd 100644 --- a/arch/m68k/kernel/syscalltable.S +++ b/arch/m68k/kernel/syscalltable.S @@ -399,3 +399,4 @@ ENTRY(sys_call_table) .long sys_copy_file_range .long sys_preadv2 .long sys_pwritev2 + .long sys_statx -- cgit v1.2.3 From 90f6e150e44a0dc3883110eeb3ab35d1be42b6bb Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 16 Mar 2017 18:20:49 +0000 Subject: arm/arm64: KVM: Take mmap_sem in stage2_unmap_vm We don't hold the mmap_sem while searching for the VMAs when we try to unmap each memslot for a VM. Fix this properly to avoid unexpected results. Fixes: commit 957db105c997 ("arm/arm64: KVM: Introduce stage2_unmap_vm") Cc: stable@vger.kernel.org # v3.19+ Reviewed-by: Christoffer Dall Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- arch/arm/kvm/mmu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 962616fd4ddd..f2e2e0c6d6fd 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -803,6 +803,7 @@ void stage2_unmap_vm(struct kvm *kvm) int idx; idx = srcu_read_lock(&kvm->srcu); + down_read(¤t->mm->mmap_sem); spin_lock(&kvm->mmu_lock); slots = kvm_memslots(kvm); @@ -810,6 +811,7 @@ void stage2_unmap_vm(struct kvm *kvm) stage2_unmap_memslot(kvm, memslot); spin_unlock(&kvm->mmu_lock); + up_read(¤t->mm->mmap_sem); srcu_read_unlock(&kvm->srcu, idx); } -- cgit v1.2.3 From 72f310481a08db821b614e7b5d00febcc9064b36 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 16 Mar 2017 18:20:50 +0000 Subject: arm/arm64: KVM: Take mmap_sem in kvm_arch_prepare_memory_region We don't hold the mmap_sem while searching for VMAs (via find_vma), in kvm_arch_prepare_memory_region, which can end up in expected failures. Fixes: commit 8eef91239e57 ("arm/arm64: KVM: map MMIO regions at creation time") Cc: Ard Biesheuvel Cc: Eric Auger Cc: stable@vger.kernel.org # v3.18+ Reviewed-by: Christoffer Dall [ Handle dirty page logging failure case ] Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- arch/arm/kvm/mmu.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index f2e2e0c6d6fd..13b9c1fa8961 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1803,6 +1803,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, (KVM_PHYS_SIZE >> PAGE_SHIFT)) return -EFAULT; + down_read(¤t->mm->mmap_sem); /* * A memory region could potentially cover multiple VMAs, and any holes * between them, so iterate over all of them to find out if we can map @@ -1846,8 +1847,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, pa += vm_start - vma->vm_start; /* IO region dirty page logging not allowed */ - if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) - return -EINVAL; + if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) { + ret = -EINVAL; + goto out; + } ret = kvm_phys_addr_ioremap(kvm, gpa, pa, vm_end - vm_start, @@ -1859,7 +1862,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, } while (hva < reg_end); if (change == KVM_MR_FLAGS_ONLY) - return ret; + goto out; spin_lock(&kvm->mmu_lock); if (ret) @@ -1867,6 +1870,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, else stage2_flush_memslot(kvm, memslot); spin_unlock(&kvm->mmu_lock); +out: + up_read(¤t->mm->mmap_sem); return ret; } -- cgit v1.2.3 From 04d5466a976b096364a39a63ac264c1b3a5f8fa1 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Thu, 9 Mar 2017 13:29:13 +0100 Subject: ALSA: hda - add support for docking station for HP 820 G2 This tested patch adds missing initialization for Line-In/Out PINs for the docking station for HP 820 G2. Signed-off-by: Jaroslav Kysela Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 4e112221d825..8d6b3703d0a2 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4847,6 +4847,7 @@ enum { ALC286_FIXUP_HP_GPIO_LED, ALC280_FIXUP_HP_GPIO2_MIC_HOTKEY, ALC280_FIXUP_HP_DOCK_PINS, + ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED, ALC280_FIXUP_HP_9480M, ALC288_FIXUP_DELL_HEADSET_MODE, ALC288_FIXUP_DELL1_MIC_NO_PRESENCE, @@ -5388,6 +5389,16 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC280_FIXUP_HP_GPIO4 }, + [ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1b, 0x21011020 }, /* line-out */ + { 0x18, 0x2181103f }, /* line-in */ + { }, + }, + .chained = true, + .chain_id = ALC269_FIXUP_HP_GPIO_MIC1_LED + }, [ALC280_FIXUP_HP_9480M] = { .type = HDA_FIXUP_FUNC, .v.func = alc280_fixup_hp_9480m, @@ -5647,7 +5658,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x2256, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED), SND_PCI_QUIRK(0x103c, 0x2257, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED), SND_PCI_QUIRK(0x103c, 0x2259, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED), - SND_PCI_QUIRK(0x103c, 0x225a, "HP", ALC269_FIXUP_HP_GPIO_MIC1_LED), + SND_PCI_QUIRK(0x103c, 0x225a, "HP", ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED), SND_PCI_QUIRK(0x103c, 0x2260, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2263, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x2264, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), @@ -5816,6 +5827,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC, .name = "headset-mode-no-hp-mic"}, {.id = ALC269_FIXUP_LENOVO_DOCK, .name = "lenovo-dock"}, {.id = ALC269_FIXUP_HP_GPIO_LED, .name = "hp-gpio-led"}, + {.id = ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED, .name = "hp-dock-gpio-mic1-led"}, {.id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, .name = "dell-headset-multi"}, {.id = ALC269_FIXUP_DELL2_MIC_NO_PRESENCE, .name = "dell-headset-dock"}, {.id = ALC283_FIXUP_CHROME_BOOK, .name = "alc283-dac-wcaps"}, -- cgit v1.2.3 From cc3a47a248d7791ef0d2c81a35c46769e55e4c6c Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Thu, 9 Mar 2017 13:30:09 +0100 Subject: ALSA: hda - add support for docking station for HP 840 G3 This tested patch adds missing initialization for Line-In/Out PINs for the docking station for HP 840 G3. Signed-off-by: Jaroslav Kysela Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_conexant.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index c15c51bea26d..69266b8ea2ad 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -261,6 +261,7 @@ enum { CXT_FIXUP_HP_530, CXT_FIXUP_CAP_MIX_AMP_5047, CXT_FIXUP_MUTE_LED_EAPD, + CXT_FIXUP_HP_DOCK, CXT_FIXUP_HP_SPECTRE, CXT_FIXUP_HP_GATE_MIC, }; @@ -778,6 +779,14 @@ static const struct hda_fixup cxt_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = cxt_fixup_mute_led_eapd, }, + [CXT_FIXUP_HP_DOCK] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x16, 0x21011020 }, /* line-out */ + { 0x18, 0x2181103f }, /* line-in */ + { } + } + }, [CXT_FIXUP_HP_SPECTRE] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -839,6 +848,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x1025, 0x0543, "Acer Aspire One 522", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1025, 0x054c, "Acer Aspire 3830TG", CXT_FIXUP_ASPIRE_DMIC), SND_PCI_QUIRK(0x1025, 0x054f, "Acer Aspire 4830T", CXT_FIXUP_ASPIRE_DMIC), + SND_PCI_QUIRK(0x103c, 0x8079, "HP EliteBook 840 G3", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE), SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC), SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN), @@ -871,6 +881,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = { { .id = CXT_PINCFG_LEMOTE_A1205, .name = "lemote-a1205" }, { .id = CXT_FIXUP_OLPC_XO, .name = "olpc-xo" }, { .id = CXT_FIXUP_MUTE_LED_EAPD, .name = "mute-led-eapd" }, + { .id = CXT_FIXUP_HP_DOCK, .name = "hp-dock" }, {} }; -- cgit v1.2.3 From 83749abaafed348cdac6a63b5f76aa9b1b42409b Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Mon, 20 Mar 2017 10:20:53 +0800 Subject: ASoC: rt5665: fix wrong shift rt5665_if2_1_adc_in_enum The shift is RT5665_IF2_1_ADC_IN_SFT not RT5665_IF3_ADC_IN_SFT. Signed-off-by: Bard Liao Signed-off-by: Mark Brown --- sound/soc/codecs/rt5665.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt5665.c b/sound/soc/codecs/rt5665.c index 61137160c116..476135ec5726 100644 --- a/sound/soc/codecs/rt5665.c +++ b/sound/soc/codecs/rt5665.c @@ -2252,7 +2252,7 @@ static const char * const rt5665_if2_1_adc_in_src[] = { static const SOC_ENUM_SINGLE_DECL( rt5665_if2_1_adc_in_enum, RT5665_DIG_INF2_DATA, - RT5665_IF3_ADC_IN_SFT, rt5665_if2_1_adc_in_src); + RT5665_IF2_1_ADC_IN_SFT, rt5665_if2_1_adc_in_src); static const struct snd_kcontrol_new rt5665_if2_1_adc_in_mux = SOC_DAPM_ENUM("IF2_1 ADC IN Source", rt5665_if2_1_adc_in_enum); -- cgit v1.2.3 From fdfe4a393e9cd8c92f4489ca207d410f44d05043 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Mon, 13 Mar 2017 23:45:21 +0900 Subject: generic syscalls: Wire up statx syscall The new syscall statx is implemented as generic code, so enable it for architectures like openrisc which use the generic syscall table. Fixes: a528d35e8bfcc ("statx: Add a system call to make enhanced file info available") Cc: Thomas Gleixner Cc: Al Viro Cc: David Howells Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Stafford Horne Signed-off-by: Will Deacon --- include/uapi/asm-generic/unistd.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 9b1462e38b82..a076cf1a3a23 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -730,9 +730,11 @@ __SYSCALL(__NR_pkey_mprotect, sys_pkey_mprotect) __SYSCALL(__NR_pkey_alloc, sys_pkey_alloc) #define __NR_pkey_free 290 __SYSCALL(__NR_pkey_free, sys_pkey_free) +#define __NR_statx 291 +__SYSCALL(__NR_statx, sys_statx) #undef __NR_syscalls -#define __NR_syscalls 291 +#define __NR_syscalls 292 /* * All syscalls below here should go away really, -- cgit v1.2.3 From 9702c67c6066f583b629cf037d2056245bb7a8e6 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 16 Mar 2017 23:07:28 +0800 Subject: scsi: libsas: fix ata xfer length The total ata xfer length may not be calculated properly, in that we do not use the proper method to get an sg element dma length. According to the code comment, sg_dma_len() should be used after dma_map_sg() is called. This issue was found by turning on the SMMUv3 in front of the hisi_sas controller in hip07. Multiple sg elements were being combined into a single element, but the original first element length was being use as the total xfer length. Cc: Fixes: ff2aeb1eb64c8a4770a6 ("libata: convert to chained sg") Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/libsas/sas_ata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 763f012fdeca..87f5e694dbed 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -221,7 +221,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) task->num_scatter = qc->n_elem; } else { for_each_sg(qc->sg, sg, qc->n_elem, si) - xfer += sg->length; + xfer += sg_dma_len(sg); task->total_xfer_len = xfer; task->num_scatter = si; -- cgit v1.2.3 From 720037f939fa50fc3531035ae61b4cf4b0ff35e5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 6 Mar 2017 11:59:56 -0800 Subject: f2fs: don't overwrite node block by SSR This patch fixes that SSR can overwrite previous warm node block consisting of a node chain since the last checkpoint. Fixes: 5b6c6be2d878 ("f2fs: use SSR for warm node as well") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4bd7a8b19332..29ef7088c558 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1163,6 +1163,12 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) if (f2fs_discard_en(sbi) && !f2fs_test_and_set_bit(offset, se->discard_map)) sbi->discard_blks--; + + /* don't overwrite by SSR to keep node chain */ + if (se->type == CURSEG_WARM_NODE) { + if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) + se->ckpt_valid_blocks++; + } } else { if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) { #ifdef CONFIG_F2FS_CHECK_FS -- cgit v1.2.3 From 9f7e4a2c49fd166f17cf4125766a68dce8716764 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 10 Mar 2017 09:39:57 -0800 Subject: f2fs: declare static functions This is to avoid build warning reported by kbuild test robot. Signed-off-by: Fengguang Wu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 94967171dee8..a0a060c2979b 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1823,7 +1823,8 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) kmem_cache_free(free_nid_slab, i); } -void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set) +static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, + bool set) { struct f2fs_nm_info *nm_i = NM_I(sbi); unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid); @@ -2383,7 +2384,7 @@ add_out: list_add_tail(&nes->set_list, head); } -void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, +static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, struct page *page) { struct f2fs_nm_info *nm_i = NM_I(sbi); @@ -2638,7 +2639,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi) return 0; } -int init_free_nid_cache(struct f2fs_sb_info *sbi) +static int init_free_nid_cache(struct f2fs_sb_info *sbi) { struct f2fs_nm_info *nm_i = NM_I(sbi); -- cgit v1.2.3 From 23380b8568b85cd4b7a056891f4dbf131f7b871d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 7 Mar 2017 14:11:06 -0800 Subject: f2fs: use __set{__clear}_bit_le This patch uses __set{__clear}_bit_le for highter speed. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 2 +- fs/f2fs/node.c | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 4650c9b85de7..8d5c62b07b28 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -750,7 +750,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, dentry_blk = page_address(page); bit_pos = dentry - dentry_blk->dentry; for (i = 0; i < slots; i++) - clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); + __clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); /* Let's check and deallocate this dentry page */ bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index a0a060c2979b..8c81ff614d1a 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -339,7 +339,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, __set_nat_cache_dirty(nm_i, e); if (enabled_nat_bits(sbi, NULL) && new_blkaddr == NEW_ADDR) - clear_bit_le(NAT_BLOCK_OFFSET(ni->nid), nm_i->empty_nat_bits); + __clear_bit_le(NAT_BLOCK_OFFSET(ni->nid), nm_i->empty_nat_bits); /* update fsync_mark if its inode nat entry is still alive */ if (ni->nid != ni->ino) @@ -1834,9 +1834,9 @@ static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, return; if (set) - set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); + __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); else - clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); + __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); } static void scan_nat_page(struct f2fs_sb_info *sbi, @@ -1848,7 +1848,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid); int i; - set_bit_le(nat_ofs, nm_i->nat_block_bitmap); + __set_bit_le(nat_ofs, nm_i->nat_block_bitmap); i = start_nid % NAT_ENTRY_PER_BLOCK; @@ -2403,16 +2403,16 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, valid++; } if (valid == 0) { - set_bit_le(nat_index, nm_i->empty_nat_bits); - clear_bit_le(nat_index, nm_i->full_nat_bits); + __set_bit_le(nat_index, nm_i->empty_nat_bits); + __clear_bit_le(nat_index, nm_i->full_nat_bits); return; } - clear_bit_le(nat_index, nm_i->empty_nat_bits); + __clear_bit_le(nat_index, nm_i->empty_nat_bits); if (valid == NAT_ENTRY_PER_BLOCK) - set_bit_le(nat_index, nm_i->full_nat_bits); + __set_bit_le(nat_index, nm_i->full_nat_bits); else - clear_bit_le(nat_index, nm_i->full_nat_bits); + __clear_bit_le(nat_index, nm_i->full_nat_bits); } static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, -- cgit v1.2.3 From 586d1492f301982e349797cfb05d9f343002ffa2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 1 Mar 2017 17:09:07 +0800 Subject: f2fs: skip scanning free nid bitmap of full NAT blocks This patch adds to account free nids for each NAT blocks, and while scanning all free nid bitmap, do check count and skip lookuping in full NAT block. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 1 + fs/f2fs/f2fs.h | 2 ++ fs/f2fs/node.c | 33 +++++++++++++++++++++++++++------ 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index a77df377e2e8..ee2d0a485fc3 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -196,6 +196,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS); si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE; si->base_mem += NM_I(sbi)->nat_blocks / 8; + si->base_mem += NM_I(sbi)->nat_blocks * sizeof(unsigned short); get_cache: si->cache_mem = 0; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e849f83d6114..0a6e115562f6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -561,6 +561,8 @@ struct f2fs_nm_info { struct mutex build_lock; /* lock for build free nids */ unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE]; unsigned char *nat_block_bitmap; + unsigned short *free_nid_count; /* free nid count of NAT block */ + spinlock_t free_nid_lock; /* protect updating of nid count */ /* for checkpoint */ char *nat_bitmap; /* NAT bitmap pointer */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 8c81ff614d1a..87a2b1f740cc 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1824,7 +1824,7 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) } static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, - bool set) + bool set, bool build) { struct f2fs_nm_info *nm_i = NM_I(sbi); unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid); @@ -1837,6 +1837,13 @@ static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); else __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); + + spin_lock(&nm_i->free_nid_lock); + if (set) + nm_i->free_nid_count[nat_ofs]++; + else if (!build) + nm_i->free_nid_count[nat_ofs]--; + spin_unlock(&nm_i->free_nid_lock); } static void scan_nat_page(struct f2fs_sb_info *sbi, @@ -1848,6 +1855,9 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid); int i; + if (test_bit_le(nat_ofs, nm_i->nat_block_bitmap)) + return; + __set_bit_le(nat_ofs, nm_i->nat_block_bitmap); i = start_nid % NAT_ENTRY_PER_BLOCK; @@ -1862,7 +1872,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, blk_addr == NEW_ADDR); if (blk_addr == NULL_ADDR) freed = add_free_nid(sbi, start_nid, true); - update_free_nid_bitmap(sbi, start_nid, freed); + update_free_nid_bitmap(sbi, start_nid, freed, true); } } @@ -1878,6 +1888,8 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi) for (i = 0; i < nm_i->nat_blocks; i++) { if (!test_bit_le(i, nm_i->nat_block_bitmap)) continue; + if (!nm_i->free_nid_count[i]) + continue; for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) { nid_t nid; @@ -2082,7 +2094,7 @@ retry: __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false); nm_i->available_nids--; - update_free_nid_bitmap(sbi, *nid, false); + update_free_nid_bitmap(sbi, *nid, false, false); spin_unlock(&nm_i->nid_list_lock); return true; @@ -2138,7 +2150,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) nm_i->available_nids++; - update_free_nid_bitmap(sbi, nid, true); + update_free_nid_bitmap(sbi, nid, true, false); spin_unlock(&nm_i->nid_list_lock); @@ -2468,11 +2480,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, add_free_nid(sbi, nid, false); spin_lock(&NM_I(sbi)->nid_list_lock); NM_I(sbi)->available_nids++; - update_free_nid_bitmap(sbi, nid, true); + update_free_nid_bitmap(sbi, nid, true, false); spin_unlock(&NM_I(sbi)->nid_list_lock); } else { spin_lock(&NM_I(sbi)->nid_list_lock); - update_free_nid_bitmap(sbi, nid, false); + update_free_nid_bitmap(sbi, nid, false, false); spin_unlock(&NM_I(sbi)->nid_list_lock); } } @@ -2652,6 +2664,14 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi) GFP_KERNEL); if (!nm_i->nat_block_bitmap) return -ENOMEM; + + nm_i->free_nid_count = f2fs_kvzalloc(nm_i->nat_blocks * + sizeof(unsigned short), GFP_KERNEL); + if (!nm_i->free_nid_count) + return -ENOMEM; + + spin_lock_init(&nm_i->free_nid_lock); + return 0; } @@ -2731,6 +2751,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) kvfree(nm_i->nat_block_bitmap); kvfree(nm_i->free_nid_bitmap); + kvfree(nm_i->free_nid_count); kfree(nm_i->nat_bitmap); kfree(nm_i->nat_bits); -- cgit v1.2.3 From 7041d5d286fb54635f540c1bb3b43980ed65513a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 8 Mar 2017 20:07:49 +0800 Subject: f2fs: combine nat_bits and free_nid_bitmap cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both nat_bits cache and free_nid_bitmap cache provide same functionality as a intermediate cache between free nid cache and disk, but with different granularity of indicating free nid range, and different persistence policy. nat_bits cache provides better persistence ability, and free_nid_bitmap provides better granularity. In this patch we combine advantage of both caches, so finally policy of the intermediate cache would be: - init: load free nid status from nat_bits into free_nid_bitmap - lookup: scan free_nid_bitmap before load NAT blocks - update: update free_nid_bitmap in real-time - persistence: udpate and persist nat_bits in checkpoint This patch also resolves performance regression reported by lkp-robot. commit: 4ac912427c4214d8031d9ad6fbc3bc75e71512df ("f2fs: introduce free nid bitmap") d00030cf9cd0bb96fdccc41e33d3c91dcbb672ba ("f2fs: use __set{__clear}_bit_le") 1382c0f3f9d3f936c8bc42ed1591cf7a593ef9f7 ("f2fs: combine nat_bits and free_nid_bitmap cache") 4ac912427c4214d8 d00030cf9cd0bb96fdccc41e33 1382c0f3f9d3f936c8bc42ed15 ---------------- -------------------------- -------------------------- %stddev %change %stddev %change %stddev \ | \ | \ 77863 ± 0% +2.1% 79485 ± 1% +50.8% 117404 ± 0% aim7.jobs-per-min 231.63 ± 0% -2.0% 227.01 ± 1% -33.6% 153.80 ± 0% aim7.time.elapsed_time 231.63 ± 0% -2.0% 227.01 ± 1% -33.6% 153.80 ± 0% aim7.time.elapsed_time.max 896604 ± 0% -0.8% 889221 ± 3% -20.2% 715260 ± 1% aim7.time.involuntary_context_switches 2394 ± 1% +4.6% 2503 ± 1% +3.7% 2481 ± 2% aim7.time.maximum_resident_set_size 6240 ± 0% -1.5% 6145 ± 1% -14.1% 5360 ± 1% aim7.time.system_time 1111357 ± 3% +1.9% 1132509 ± 2% -6.2% 1041932 ± 2% aim7.time.voluntary_context_switches ... Signed-off-by: Chao Yu Tested-by: Xiaolong Ye Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 125 ++++++++++++++++++++++----------------------------------- 1 file changed, 47 insertions(+), 78 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 87a2b1f740cc..481aa8dc79f4 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -338,9 +338,6 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, set_nat_flag(e, IS_CHECKPOINTED, false); __set_nat_cache_dirty(nm_i, e); - if (enabled_nat_bits(sbi, NULL) && new_blkaddr == NEW_ADDR) - __clear_bit_le(NAT_BLOCK_OFFSET(ni->nid), nm_i->empty_nat_bits); - /* update fsync_mark if its inode nat entry is still alive */ if (ni->nid != ni->ino) e = __lookup_nat_cache(nm_i, ni->ino); @@ -1824,7 +1821,7 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) } static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, - bool set, bool build) + bool set, bool build, bool locked) { struct f2fs_nm_info *nm_i = NM_I(sbi); unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid); @@ -1838,12 +1835,14 @@ static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, else __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); - spin_lock(&nm_i->free_nid_lock); + if (!locked) + spin_lock(&nm_i->free_nid_lock); if (set) nm_i->free_nid_count[nat_ofs]++; else if (!build) nm_i->free_nid_count[nat_ofs]--; - spin_unlock(&nm_i->free_nid_lock); + if (!locked) + spin_unlock(&nm_i->free_nid_lock); } static void scan_nat_page(struct f2fs_sb_info *sbi, @@ -1872,7 +1871,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, blk_addr == NEW_ADDR); if (blk_addr == NULL_ADDR) freed = add_free_nid(sbi, start_nid, true); - update_free_nid_bitmap(sbi, start_nid, freed, true); + update_free_nid_bitmap(sbi, start_nid, freed, true, false); } } @@ -1920,58 +1919,6 @@ out: up_read(&nm_i->nat_tree_lock); } -static int scan_nat_bits(struct f2fs_sb_info *sbi) -{ - struct f2fs_nm_info *nm_i = NM_I(sbi); - struct page *page; - unsigned int i = 0; - nid_t nid; - - if (!enabled_nat_bits(sbi, NULL)) - return -EAGAIN; - - down_read(&nm_i->nat_tree_lock); -check_empty: - i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i); - if (i >= nm_i->nat_blocks) { - i = 0; - goto check_partial; - } - - for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK; - nid++) { - if (unlikely(nid >= nm_i->max_nid)) - break; - add_free_nid(sbi, nid, true); - } - - if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS) - goto out; - i++; - goto check_empty; - -check_partial: - i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i); - if (i >= nm_i->nat_blocks) { - disable_nat_bits(sbi, true); - up_read(&nm_i->nat_tree_lock); - return -EINVAL; - } - - nid = i * NAT_ENTRY_PER_BLOCK; - page = get_current_nat_page(sbi, nid); - scan_nat_page(sbi, page, nid); - f2fs_put_page(page, 1); - - if (nm_i->nid_cnt[FREE_NID_LIST] < MAX_FREE_NIDS) { - i++; - goto check_partial; - } -out: - up_read(&nm_i->nat_tree_lock); - return 0; -} - static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) { struct f2fs_nm_info *nm_i = NM_I(sbi); @@ -1993,21 +1940,6 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) if (nm_i->nid_cnt[FREE_NID_LIST]) return; - - /* try to find free nids with nat_bits */ - if (!scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST]) - return; - } - - /* find next valid candidate */ - if (enabled_nat_bits(sbi, NULL)) { - int idx = find_next_zero_bit_le(nm_i->full_nat_bits, - nm_i->nat_blocks, 0); - - if (idx >= nm_i->nat_blocks) - set_sbi_flag(sbi, SBI_NEED_FSCK); - else - nid = idx * NAT_ENTRY_PER_BLOCK; } /* readahead nat pages to be scanned */ @@ -2094,7 +2026,7 @@ retry: __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false); nm_i->available_nids--; - update_free_nid_bitmap(sbi, *nid, false, false); + update_free_nid_bitmap(sbi, *nid, false, false, false); spin_unlock(&nm_i->nid_list_lock); return true; @@ -2150,7 +2082,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) nm_i->available_nids++; - update_free_nid_bitmap(sbi, nid, true, false); + update_free_nid_bitmap(sbi, nid, true, false, false); spin_unlock(&nm_i->nid_list_lock); @@ -2480,11 +2412,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, add_free_nid(sbi, nid, false); spin_lock(&NM_I(sbi)->nid_list_lock); NM_I(sbi)->available_nids++; - update_free_nid_bitmap(sbi, nid, true, false); + update_free_nid_bitmap(sbi, nid, true, false, false); spin_unlock(&NM_I(sbi)->nid_list_lock); } else { spin_lock(&NM_I(sbi)->nid_list_lock); - update_free_nid_bitmap(sbi, nid, false, false); + update_free_nid_bitmap(sbi, nid, false, false, false); spin_unlock(&NM_I(sbi)->nid_list_lock); } } @@ -2590,6 +2522,40 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) return 0; } +inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + unsigned int i = 0; + nid_t nid, last_nid; + + if (!enabled_nat_bits(sbi, NULL)) + return; + + for (i = 0; i < nm_i->nat_blocks; i++) { + i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i); + if (i >= nm_i->nat_blocks) + break; + + __set_bit_le(i, nm_i->nat_block_bitmap); + + nid = i * NAT_ENTRY_PER_BLOCK; + last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK; + + spin_lock(&nm_i->free_nid_lock); + for (; nid < last_nid; nid++) + update_free_nid_bitmap(sbi, nid, true, true, true); + spin_unlock(&nm_i->free_nid_lock); + } + + for (i = 0; i < nm_i->nat_blocks; i++) { + i = find_next_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i); + if (i >= nm_i->nat_blocks) + break; + + __set_bit_le(i, nm_i->nat_block_bitmap); + } +} + static int init_node_manager(struct f2fs_sb_info *sbi) { struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi); @@ -2691,6 +2657,9 @@ int build_node_manager(struct f2fs_sb_info *sbi) if (err) return err; + /* load free nid status from nat_bits table */ + load_free_nid_bitmap(sbi); + build_free_nids(sbi, true, true); return 0; } -- cgit v1.2.3 From 6be3b6cce1e225f189b68b4e84fc711d19b4277b Mon Sep 17 00:00:00 2001 From: Ryan Hsu Date: Mon, 13 Mar 2017 15:49:03 -0700 Subject: ath10k: fix incorrect wlan_mac_base in qca6174_regs In the 'commit ebee76f7fa46 ("ath10k: allow setting coverage class")', it inherits the design and the address offset from ath9k, but the address is not applicable to QCA6174, which leads to a random crash while doing the resume() operation, since the set_coverage_class.ops will be called from ieee80211_reconfig() when resume() (if the wow is not configured). Fix the incorrect address offset here to avoid the random crash. Verified on QCA6174/hw3.0 with firmware WLAN.RM.4.4-00022-QCARMSWPZ-2. kvalo: this also seems to fix a regression with firmware restart. Fixes: ebee76f7fa46 ("ath10k: allow setting coverage class") Cc: # v4.10 Signed-off-by: Ryan Hsu Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/hw.c b/drivers/net/wireless/ath/ath10k/hw.c index 33fb26833cd0..d9f37ee4bfdd 100644 --- a/drivers/net/wireless/ath/ath10k/hw.c +++ b/drivers/net/wireless/ath/ath10k/hw.c @@ -51,7 +51,7 @@ const struct ath10k_hw_regs qca6174_regs = { .rtc_soc_base_address = 0x00000800, .rtc_wmac_base_address = 0x00001000, .soc_core_base_address = 0x0003a000, - .wlan_mac_base_address = 0x00020000, + .wlan_mac_base_address = 0x00010000, .ce_wrapper_base_address = 0x00034000, .ce0_base_address = 0x00034400, .ce1_base_address = 0x00034800, -- cgit v1.2.3 From 6c6c5e0311c83ffe75e14260fb83e05e21e1d488 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Fri, 13 Jan 2017 18:59:04 +0100 Subject: KVM: VMX: downgrade warning on unexpected exit code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We never needed the call trace and we better rate-limit if it can be triggered by a guest. Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 98e82ee1e699..e7ec88961b1a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8501,7 +8501,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) && kvm_vmx_exit_handlers[exit_reason]) return kvm_vmx_exit_handlers[exit_reason](vcpu); else { - WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason); + vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n", + exit_reason); kvm_queue_exception(vcpu, UD_VECTOR); return 1; } -- cgit v1.2.3 From 3863dff0c3dd72984395c93b12383b393c5c3989 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 24 Jan 2017 14:06:48 +0100 Subject: kvm: fix usage of uninit spinlock in avic_vm_destroy() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If avic is not enabled, avic_vm_init() does nothing and returns early. However, avic_vm_destroy() still tries to destroy what hasn't been created. The only bad consequence of this now is that avic_vm_destroy() uses svm_vm_data_hash_lock that hasn't been initialized (and is not meant to be used at all if avic is not enabled). Return early from avic_vm_destroy() if avic is not enabled. It has nothing to destroy. Signed-off-by: Dmitry Vyukov Cc: Joerg Roedel Cc: Paolo Bonzini Cc: "Radim Krčmář" Cc: David Hildenbrand Cc: kvm@vger.kernel.org Cc: syzkaller@googlegroups.com Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář --- arch/x86/kvm/svm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d1efe2c62b3f..5fba70646c32 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1379,6 +1379,9 @@ static void avic_vm_destroy(struct kvm *kvm) unsigned long flags; struct kvm_arch *vm_data = &kvm->arch; + if (!avic) + return; + avic_free_vm_id(vm_data->avic_vm_id); if (vm_data->avic_logical_id_table_page) -- cgit v1.2.3 From 49e190ec332e96ba28f24b86f7a92f614707819b Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Sat, 28 Jan 2017 05:01:51 +0800 Subject: PTP: fix ptr_ret.cocci warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/ptp/ptp_kvm.c:229:1-3: WARNING: PTR_ERR_OR_ZERO can be used Use PTR_ERR_OR_ZERO rather than if(IS_ERR(...)) + PTR_ERR Generated by: scripts/coccinelle/api/ptr_ret.cocci CC: Marcelo Tosatti Signed-off-by: Fengguang Wu Signed-off-by: Radim Krčmář --- drivers/ptp/ptp_kvm.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/ptp/ptp_kvm.c b/drivers/ptp/ptp_kvm.c index 09b4df74291e..bb865695d7a6 100644 --- a/drivers/ptp/ptp_kvm.c +++ b/drivers/ptp/ptp_kvm.c @@ -193,10 +193,7 @@ static int __init ptp_kvm_init(void) kvm_ptp_clock.ptp_clock = ptp_clock_register(&kvm_ptp_clock.caps, NULL); - if (IS_ERR(kvm_ptp_clock.ptp_clock)) - return PTR_ERR(kvm_ptp_clock.ptp_clock); - - return 0; + return PTR_ERR_OR_ZERO(kvm_ptp_clock.ptp_clock); } module_init(ptp_kvm_init); -- cgit v1.2.3 From 6d1b3ad2cd87150fc89bad2331beab173a8ad24d Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Sun, 12 Mar 2017 00:53:52 -0800 Subject: KVM: nVMX: don't reset kvm mmu twice MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kvm mmu is reset once successfully loading CR3 as part of emulating vmentry in nested_vmx_load_cr3(). We should not reset kvm mmu twice. Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e7ec88961b1a..c66436530a93 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10287,8 +10287,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, entry_failure_code)) return 1; - kvm_mmu_reset_context(vcpu); - if (!enable_ept) vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; -- cgit v1.2.3 From 3aa53859d23e1b9cf1a60f82a9008e35ef10bd6a Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Mon, 13 Mar 2017 09:08:20 -0400 Subject: KVM: Documentation: document MCE ioctls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Luiz Capitulino Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář --- Documentation/virtual/kvm/api.txt | 63 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 3c248f772ae6..fd106899afd1 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3377,6 +3377,69 @@ struct kvm_ppc_resize_hpt { __u32 pad; }; +4.104 KVM_X86_GET_MCE_CAP_SUPPORTED + +Capability: KVM_CAP_MCE +Architectures: x86 +Type: system ioctl +Parameters: u64 mce_cap (out) +Returns: 0 on success, -1 on error + +Returns supported MCE capabilities. The u64 mce_cap parameter +has the same format as the MSR_IA32_MCG_CAP register. Supported +capabilities will have the corresponding bits set. + +4.105 KVM_X86_SETUP_MCE + +Capability: KVM_CAP_MCE +Architectures: x86 +Type: vcpu ioctl +Parameters: u64 mcg_cap (in) +Returns: 0 on success, + -EFAULT if u64 mcg_cap cannot be read, + -EINVAL if the requested number of banks is invalid, + -EINVAL if requested MCE capability is not supported. + +Initializes MCE support for use. The u64 mcg_cap parameter +has the same format as the MSR_IA32_MCG_CAP register and +specifies which capabilities should be enabled. The maximum +supported number of error-reporting banks can be retrieved when +checking for KVM_CAP_MCE. The supported capabilities can be +retrieved with KVM_X86_GET_MCE_CAP_SUPPORTED. + +4.106 KVM_X86_SET_MCE + +Capability: KVM_CAP_MCE +Architectures: x86 +Type: vcpu ioctl +Parameters: struct kvm_x86_mce (in) +Returns: 0 on success, + -EFAULT if struct kvm_x86_mce cannot be read, + -EINVAL if the bank number is invalid, + -EINVAL if VAL bit is not set in status field. + +Inject a machine check error (MCE) into the guest. The input +parameter is: + +struct kvm_x86_mce { + __u64 status; + __u64 addr; + __u64 misc; + __u64 mcg_status; + __u8 bank; + __u8 pad1[7]; + __u64 pad2[3]; +}; + +If the MCE being reported is an uncorrected error, KVM will +inject it as an MCE exception into the guest. If the guest +MCG_STATUS register reports that an MCE is in progress, KVM +causes an KVM_EXIT_SHUTDOWN vmexit. + +Otherwise, if the MCE is a corrected error, KVM will just +store it in the corresponding bank (provided this bank is +not holding a previously reported uncorrected error). + 5. The kvm_run structure ------------------------ -- cgit v1.2.3 From 0d963b6e650d9d5533223f3dbcde7dda466df65c Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Mon, 20 Mar 2017 11:54:11 -0400 Subject: dm cache metadata: fix metadata2 format's blocks_are_clean_separate_dirty The dm_bitset_cursor_begin() call was using the incorrect nr_entries. Also, the last dm_bitset_cursor_next() must be avoided if we're at the end of the cursor. Fixes: 7f1b21591a6 ("dm cache metadata: use cursor api in blocks_are_clean_separate_dirty()") Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-metadata.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index e4c2c1a1e993..6735c8d6a445 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -932,7 +932,7 @@ static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd, *result = true; r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root, - from_cblock(begin), &cmd->dirty_cursor); + from_cblock(cmd->cache_blocks), &cmd->dirty_cursor); if (r) { DMERR("%s: dm_bitset_cursor_begin for dirty failed", __func__); return r; @@ -959,14 +959,16 @@ static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd, return 0; } + begin = to_cblock(from_cblock(begin) + 1); + if (begin == end) + break; + r = dm_bitset_cursor_next(&cmd->dirty_cursor); if (r) { DMERR("%s: dm_bitset_cursor_next for dirty failed", __func__); dm_bitset_cursor_end(&cmd->dirty_cursor); return r; } - - begin = to_cblock(from_cblock(begin) + 1); } dm_bitset_cursor_end(&cmd->dirty_cursor); -- cgit v1.2.3 From c3104aae5d8cc443556f8613466e16737326e215 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 13 Mar 2017 17:36:25 +0100 Subject: remoteproc: qcom: fix QCOM_SMD dependencies qcom_smd_register_edge() is provided by either QCOM_SMD or RPMSG_QCOM_SMD, and if both of them are disabled, it does nothing. The check for the PIL drivers however only checks for QCOM_SMD, so it breaks with QCOM_SMD=n && RPMSG_QCOM_SMD=m: drivers/remoteproc/built-in.o: In function `smd_subdev_remove': qcom_wcnss_iris.c:(.text+0x231c): undefined reference to `qcom_smd_unregister_edge' drivers/remoteproc/built-in.o: In function `smd_subdev_probe': qcom_wcnss_iris.c:(.text+0x2344): undefined reference to `qcom_smd_register_edge' drivers/remoteproc/built-in.o: In function `smd_subdev_probe': qcom_q6v5_pil.c:(.text+0x3538): undefined reference to `qcom_smd_register_edge' qcom_q6v5_pil.c:(.text+0x3538): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `qcom_smd_register_edge' This clarifies the Kconfig dependency. Fixes: 4b48921a8f74 ("remoteproc: qcom: Use common SMD edge handler") Signed-off-by: Arnd Bergmann Signed-off-by: Bjorn Andersson --- drivers/remoteproc/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig index 65f86bc24c07..1dc43fc5f65f 100644 --- a/drivers/remoteproc/Kconfig +++ b/drivers/remoteproc/Kconfig @@ -76,7 +76,7 @@ config QCOM_ADSP_PIL depends on OF && ARCH_QCOM depends on REMOTEPROC depends on QCOM_SMEM - depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n) + depends on RPMSG_QCOM_SMD || QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n && RPMSG_QCOM_SMD=n) select MFD_SYSCON select QCOM_MDT_LOADER select QCOM_RPROC_COMMON @@ -93,7 +93,7 @@ config QCOM_Q6V5_PIL depends on OF && ARCH_QCOM depends on QCOM_SMEM depends on REMOTEPROC - depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n) + depends on RPMSG_QCOM_SMD || QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n && RPMSG_QCOM_SMD=n) select MFD_SYSCON select QCOM_RPROC_COMMON select QCOM_SCM @@ -104,7 +104,7 @@ config QCOM_Q6V5_PIL config QCOM_WCNSS_PIL tristate "Qualcomm WCNSS Peripheral Image Loader" depends on OF && ARCH_QCOM - depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n) + depends on RPMSG_QCOM_SMD || QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n && RPMSG_QCOM_SMD=n) depends on QCOM_SMEM depends on REMOTEPROC select QCOM_MDT_LOADER -- cgit v1.2.3 From 4296f23ed49a15d36949458adcc66ff993dee2a8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 19 Mar 2017 14:30:02 +0100 Subject: cpufreq: schedutil: Fix per-CPU structure initialization in sugov_start() sugov_start() only initializes struct sugov_cpu per-CPU structures for shared policies, but it should do that for single-CPU policies too. That in particular makes the IO-wait boost mechanism work in the cases when cpufreq policies correspond to individual CPUs. Fixes: 21ca6d2c52f8 (cpufreq: schedutil: Add iowait boosting) Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Cc: 4.9+ # 4.9+ --- kernel/sched/cpufreq_schedutil.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index cd7cd489f739..54c577578da6 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -584,20 +584,14 @@ static int sugov_start(struct cpufreq_policy *policy) for_each_cpu(cpu, policy->cpus) { struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); + memset(sg_cpu, 0, sizeof(*sg_cpu)); sg_cpu->sg_policy = sg_policy; - if (policy_is_shared(policy)) { - sg_cpu->util = 0; - sg_cpu->max = 0; - sg_cpu->flags = SCHED_CPUFREQ_RT; - sg_cpu->last_update = 0; - sg_cpu->iowait_boost = 0; - sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; - cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, - sugov_update_shared); - } else { - cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, - sugov_update_single); - } + sg_cpu->flags = SCHED_CPUFREQ_RT; + sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; + cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, + policy_is_shared(policy) ? + sugov_update_shared : + sugov_update_single); } return 0; } -- cgit v1.2.3 From 814a585038e36cd158bee4ef964e579136cf24c6 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 20 Mar 2017 18:46:15 -0700 Subject: ARCv2: make unimplemented vectors as no-ops rather than halt core Signed-off-by: Vineet Gupta --- arch/arc/kernel/entry-arcv2.S | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S index 2585632eaa68..cc558a25b8fa 100644 --- a/arch/arc/kernel/entry-arcv2.S +++ b/arch/arc/kernel/entry-arcv2.S @@ -100,15 +100,21 @@ END(handle_interrupt) ;################### Non TLB Exception Handling ############################# ENTRY(EV_SWI) - flag 1 + ; TODO: implement this + EXCEPTION_PROLOGUE + b ret_from_exception END(EV_SWI) ENTRY(EV_DivZero) - flag 1 + ; TODO: implement this + EXCEPTION_PROLOGUE + b ret_from_exception END(EV_DivZero) ENTRY(EV_DCError) - flag 1 + ; TODO: implement this + EXCEPTION_PROLOGUE + b ret_from_exception END(EV_DCError) ; --------------------------------------------- -- cgit v1.2.3 From 4a53148868493bd5e0b18a9814aaa20bf74e3b26 Mon Sep 17 00:00:00 2001 From: Chuanxiao Dong Date: Tue, 21 Mar 2017 09:32:19 +0800 Subject: drm/i915/gvt: fix wrong offset when loading RCS mocs Fix the wrong offset of the RCS specific mocs Fixes: 178657139307 ("drm/i915/gvt: vGPU context switch") Signed-off-by: Chuanxiao Dong Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/render.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c index 95ee091ce085..0beb83563b08 100644 --- a/drivers/gpu/drm/i915/gvt/render.c +++ b/drivers/gpu/drm/i915/gvt/render.c @@ -207,7 +207,7 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id) l3_offset.reg = 0xb020; for (i = 0; i < 32; i++) { gen9_render_mocs_L3[i] = I915_READ(l3_offset); - I915_WRITE(l3_offset, vgpu_vreg(vgpu, offset)); + I915_WRITE(l3_offset, vgpu_vreg(vgpu, l3_offset)); POSTING_READ(l3_offset); l3_offset.reg += 4; } -- cgit v1.2.3 From 14f5ba26aa7060b4cad9c4c288b9a785bd0cd1a8 Mon Sep 17 00:00:00 2001 From: Xu Han Date: Tue, 21 Mar 2017 10:00:59 +0800 Subject: drm/i915/gvt: Fix guest fail to read EDID leading to black guest console issue. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It appears missing slaves on the i2c should cause 0xff to be returned rather than 0. So, when the Windows driver tried to address a slave at 0x40 and got 0’s back rather than 0xff’s it must have confused it. Signed-off-by: Paul Durrant Signed-off-by: Xu Han Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/edid.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/edid.c b/drivers/gpu/drm/i915/gvt/edid.c index f1648fe5e5ea..42cd09ec63fa 100644 --- a/drivers/gpu/drm/i915/gvt/edid.c +++ b/drivers/gpu/drm/i915/gvt/edid.c @@ -495,7 +495,8 @@ void intel_gvt_i2c_handle_aux_ch_write(struct intel_vgpu *vgpu, unsigned char val = edid_get_byte(vgpu); aux_data_for_write = (val << 16); - } + } else + aux_data_for_write = (0xff << 16); } /* write the return value in AUX_CH_DATA reg which includes: * ACK of I2C_WRITE -- cgit v1.2.3 From 359b69310014511901bd61cc5f7680cb5de1faef Mon Sep 17 00:00:00 2001 From: Xiaoguang Chen Date: Tue, 21 Mar 2017 10:54:21 +0800 Subject: drm/i915/gvt: set shadow entry to scratch page while p2m failed Sometimes guest driver will only update partial of the GGTT entry then access it. In this situation a failure will happen while translating the gpa to hpa. Now in this situation we let the corresponding shadow entry pointing to a scratch page. Signed-off-by: Zhi Wang Signed-off-by: Xiaoguang Chen Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index da7312715824..b832bea64e03 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1837,11 +1837,15 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, ret = gtt_entry_p2m(vgpu, &e, &m); if (ret) { gvt_vgpu_err("fail to translate guest gtt entry\n"); - return ret; + /* guest driver may read/write the entry when partial + * update the entry in this situation p2m will fail + * settting the shadow entry to point to a scratch page + */ + ops->set_pfn(&m, gvt->gtt.scratch_ggtt_mfn); } } else { m = e; - m.val64 = 0; + ops->set_pfn(&m, gvt->gtt.scratch_ggtt_mfn); } ggtt_set_shadow_entry(ggtt_mm, &m, g_gtt_index); -- cgit v1.2.3 From ac7ce78ba036c0f9952d9706b1941c7d80c78680 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 14 Feb 2017 10:46:20 +0300 Subject: drm/exynos/decon5433: & vs | typo "&" was obviously intended instead of "|". The original condition is always true. Fixes: b93c2e8b5d9d ("drm/exynos/decon5433: configure sysreg in case of hardware trigger") Signed-off-by: Dan Carpenter Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 0fd6f7a18364..cca32a4fdab3 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -678,7 +678,7 @@ static int exynos5433_decon_probe(struct platform_device *pdev) ctx->out_type |= IFTYPE_I80; } - if (ctx->out_type | I80_HW_TRG) { + if (ctx->out_type & I80_HW_TRG) { ctx->sysreg = syscon_regmap_lookup_by_phandle(dev->of_node, "samsung,disp-sysreg"); if (IS_ERR(ctx->sysreg)) { -- cgit v1.2.3 From 6bdc92ee4980ca10171a8de338fad612f00bb48f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 11 Mar 2017 20:04:16 +0200 Subject: drm/exynos: Remove support for Exynos4415 (SoC not supported anymore) Support for Exynos4415 is going away because there are no internal nor external users. Since commit 46dcf0ff0de3 ("ARM: dts: exynos: Remove exynos4415.dtsi"), the platform cannot be instantiated so remove also the drivers. Signed-off-by: Krzysztof Kozlowski Acked-by: Kukjin Kim Acked-by: Rob Herring Signed-off-by: Inki Dae --- .../devicetree/bindings/display/exynos/exynos_dsim.txt | 1 - .../bindings/display/exynos/samsung-fimd.txt | 1 - drivers/gpu/drm/exynos/exynos_drm_dsi.c | 15 +-------------- drivers/gpu/drm/exynos/exynos_drm_fimd.c | 18 ++---------------- 4 files changed, 3 insertions(+), 32 deletions(-) diff --git a/Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt b/Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt index a78265993665..ca5204b3bc21 100644 --- a/Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt +++ b/Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt @@ -4,7 +4,6 @@ Required properties: - compatible: value should be one of the following "samsung,exynos3250-mipi-dsi" /* for Exynos3250/3472 SoCs */ "samsung,exynos4210-mipi-dsi" /* for Exynos4 SoCs */ - "samsung,exynos4415-mipi-dsi" /* for Exynos4415 SoC */ "samsung,exynos5410-mipi-dsi" /* for Exynos5410/5420/5440 SoCs */ "samsung,exynos5422-mipi-dsi" /* for Exynos5422/5800 SoCs */ "samsung,exynos5433-mipi-dsi" /* for Exynos5433 SoCs */ diff --git a/Documentation/devicetree/bindings/display/exynos/samsung-fimd.txt b/Documentation/devicetree/bindings/display/exynos/samsung-fimd.txt index 18645e0228b0..5837402c3ade 100644 --- a/Documentation/devicetree/bindings/display/exynos/samsung-fimd.txt +++ b/Documentation/devicetree/bindings/display/exynos/samsung-fimd.txt @@ -11,7 +11,6 @@ Required properties: "samsung,s5pv210-fimd"; /* for S5PV210 SoC */ "samsung,exynos3250-fimd"; /* for Exynos3250/3472 SoCs */ "samsung,exynos4210-fimd"; /* for Exynos4 SoCs */ - "samsung,exynos4415-fimd"; /* for Exynos4415 SoC */ "samsung,exynos5250-fimd"; /* for Exynos5250 SoCs */ "samsung,exynos5420-fimd"; /* for Exynos5420/5422/5800 SoCs */ diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index 812e2ec0761d..ef6f9c6de098 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -86,7 +86,7 @@ #define DSIM_SYNC_INFORM (1 << 27) #define DSIM_EOT_DISABLE (1 << 28) #define DSIM_MFLUSH_VS (1 << 29) -/* This flag is valid only for exynos3250/3472/4415/5260/5430 */ +/* This flag is valid only for exynos3250/3472/5260/5430 */ #define DSIM_CLKLANE_STOP (1 << 30) /* DSIM_ESCMODE */ @@ -473,17 +473,6 @@ static const struct exynos_dsi_driver_data exynos4_dsi_driver_data = { .reg_values = reg_values, }; -static const struct exynos_dsi_driver_data exynos4415_dsi_driver_data = { - .reg_ofs = exynos_reg_ofs, - .plltmr_reg = 0x58, - .has_clklane_stop = 1, - .num_clks = 2, - .max_freq = 1000, - .wait_for_reset = 1, - .num_bits_resol = 11, - .reg_values = reg_values, -}; - static const struct exynos_dsi_driver_data exynos5_dsi_driver_data = { .reg_ofs = exynos_reg_ofs, .plltmr_reg = 0x58, @@ -521,8 +510,6 @@ static const struct of_device_id exynos_dsi_of_match[] = { .data = &exynos3_dsi_driver_data }, { .compatible = "samsung,exynos4210-mipi-dsi", .data = &exynos4_dsi_driver_data }, - { .compatible = "samsung,exynos4415-mipi-dsi", - .data = &exynos4415_dsi_driver_data }, { .compatible = "samsung,exynos5410-mipi-dsi", .data = &exynos5_dsi_driver_data }, { .compatible = "samsung,exynos5422-mipi-dsi", diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index a9fa444c6053..661b9fe049e2 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -71,10 +71,10 @@ #define TRIGCON 0x1A4 #define TRGMODE_ENABLE (1 << 0) #define SWTRGCMD_ENABLE (1 << 1) -/* Exynos3250, 3472, 4415, 5260 5410, 5420 and 5422 only supported. */ +/* Exynos3250, 3472, 5260 5410, 5420 and 5422 only supported. */ #define HWTRGEN_ENABLE (1 << 3) #define HWTRGMASK_ENABLE (1 << 4) -/* Exynos3250, 3472, 4415, 5260, 5420 and 5422 only supported. */ +/* Exynos3250, 3472, 5260, 5420 and 5422 only supported. */ #define HWTRIGEN_PER_ENABLE (1 << 31) /* display mode change control register except exynos4 */ @@ -138,18 +138,6 @@ static struct fimd_driver_data exynos4_fimd_driver_data = { .has_vtsel = 1, }; -static struct fimd_driver_data exynos4415_fimd_driver_data = { - .timing_base = 0x20000, - .lcdblk_offset = 0x210, - .lcdblk_vt_shift = 10, - .lcdblk_bypass_shift = 1, - .trg_type = I80_HW_TRG, - .has_shadowcon = 1, - .has_vidoutcon = 1, - .has_vtsel = 1, - .has_trigger_per_te = 1, -}; - static struct fimd_driver_data exynos5_fimd_driver_data = { .timing_base = 0x20000, .lcdblk_offset = 0x214, @@ -210,8 +198,6 @@ static const struct of_device_id fimd_driver_dt_match[] = { .data = &exynos3_fimd_driver_data }, { .compatible = "samsung,exynos4210-fimd", .data = &exynos4_fimd_driver_data }, - { .compatible = "samsung,exynos4415-fimd", - .data = &exynos4415_fimd_driver_data }, { .compatible = "samsung,exynos5250-fimd", .data = &exynos5_fimd_driver_data }, { .compatible = "samsung,exynos5420-fimd", -- cgit v1.2.3 From a392276d1dec63e5aabe6f527c37de29a729559a Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Tue, 14 Mar 2017 09:27:56 +0100 Subject: drm/exynos: move crtc event handling to drivers callbacks CRTC event is currently send with next vblank, or instantly in case crtc is being disabled. This approach usually works, but in corner cases it can result in premature event generation. Only device driver is able to verify if the event can be sent. This patch is a first step in that direction - it moves event handling to the drivers. Signed-off-by: Andrzej Hajda Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 1 + drivers/gpu/drm/exynos/exynos7_drm_decon.c | 1 + drivers/gpu/drm/exynos/exynos_drm_crtc.c | 29 +++++++++++++++------------ drivers/gpu/drm/exynos/exynos_drm_crtc.h | 2 ++ drivers/gpu/drm/exynos/exynos_drm_fimd.c | 2 ++ drivers/gpu/drm/exynos/exynos_drm_vidi.c | 1 + drivers/gpu/drm/exynos/exynos_mixer.c | 1 + 7 files changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index cca32a4fdab3..2130ccf1e036 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -378,6 +378,7 @@ static void decon_atomic_flush(struct exynos_drm_crtc *crtc) if (ctx->out_type & IFTYPE_I80) set_bit(BIT_WIN_UPDATED, &ctx->flags); + exynos_crtc_handle_event(crtc); } static void decon_swreset(struct decon_context *ctx) diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c index f9ab19e205e2..48811806fa27 100644 --- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c @@ -526,6 +526,7 @@ static void decon_atomic_flush(struct exynos_drm_crtc *crtc) for (i = 0; i < WINDOWS_NR; i++) decon_shadow_protect_win(ctx, i, false); + exynos_crtc_handle_event(crtc); } static void decon_init(struct decon_context *ctx) diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c index 5367b6664fe3..c65f4509932c 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_crtc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c @@ -85,16 +85,28 @@ static void exynos_crtc_atomic_flush(struct drm_crtc *crtc, struct drm_crtc_state *old_crtc_state) { struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc); - struct drm_pending_vblank_event *event; - unsigned long flags; if (exynos_crtc->ops->atomic_flush) exynos_crtc->ops->atomic_flush(exynos_crtc); +} + +static const struct drm_crtc_helper_funcs exynos_crtc_helper_funcs = { + .enable = exynos_drm_crtc_enable, + .disable = exynos_drm_crtc_disable, + .mode_set_nofb = exynos_drm_crtc_mode_set_nofb, + .atomic_check = exynos_crtc_atomic_check, + .atomic_begin = exynos_crtc_atomic_begin, + .atomic_flush = exynos_crtc_atomic_flush, +}; + +void exynos_crtc_handle_event(struct exynos_drm_crtc *exynos_crtc) +{ + struct drm_crtc *crtc = &exynos_crtc->base; + struct drm_pending_vblank_event *event = crtc->state->event; + unsigned long flags; - event = crtc->state->event; if (event) { crtc->state->event = NULL; - spin_lock_irqsave(&crtc->dev->event_lock, flags); if (drm_crtc_vblank_get(crtc) == 0) drm_crtc_arm_vblank_event(crtc, event); @@ -105,15 +117,6 @@ static void exynos_crtc_atomic_flush(struct drm_crtc *crtc, } -static const struct drm_crtc_helper_funcs exynos_crtc_helper_funcs = { - .enable = exynos_drm_crtc_enable, - .disable = exynos_drm_crtc_disable, - .mode_set_nofb = exynos_drm_crtc_mode_set_nofb, - .atomic_check = exynos_crtc_atomic_check, - .atomic_begin = exynos_crtc_atomic_begin, - .atomic_flush = exynos_crtc_atomic_flush, -}; - static void exynos_drm_crtc_destroy(struct drm_crtc *crtc) { struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc); diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.h b/drivers/gpu/drm/exynos/exynos_drm_crtc.h index 6a581a8af465..abd5d6ceac0c 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_crtc.h +++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.h @@ -40,4 +40,6 @@ int exynos_drm_crtc_get_pipe_from_type(struct drm_device *drm_dev, */ void exynos_drm_crtc_te_handler(struct drm_crtc *crtc); +void exynos_crtc_handle_event(struct exynos_drm_crtc *exynos_crtc); + #endif diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index 661b9fe049e2..a3162a4566ce 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -709,6 +709,8 @@ static void fimd_atomic_flush(struct exynos_drm_crtc *crtc) for (i = 0; i < WINDOWS_NR; i++) fimd_shadow_protect_win(ctx, i, false); + + exynos_crtc_handle_event(crtc); } static void fimd_update_plane(struct exynos_drm_crtc *crtc, diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c index 57fe514d5c5b..5d9a62a87eec 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c @@ -170,6 +170,7 @@ static const struct exynos_drm_crtc_ops vidi_crtc_ops = { .enable_vblank = vidi_enable_vblank, .disable_vblank = vidi_disable_vblank, .update_plane = vidi_update_plane, + .atomic_flush = exynos_crtc_handle_event, }; static void vidi_fake_vblank_timer(unsigned long arg) diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index 72143ac10525..25edb635a197 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -1012,6 +1012,7 @@ static void mixer_atomic_flush(struct exynos_drm_crtc *crtc) return; mixer_vsync_set_update(mixer_ctx, true); + exynos_crtc_handle_event(crtc); } static void mixer_enable(struct exynos_drm_crtc *crtc) -- cgit v1.2.3 From 73488331eb9460d14974a1e2c734f77ce8869183 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Tue, 14 Mar 2017 09:27:57 +0100 Subject: drm/exynos/decon5433: fix vblank event handling Current implementation of event handling assumes that vblank interrupt is always called at the right time. It is not true, it can be delayed due to various reasons. As a result different races can happen. The patch fixes the issue by using hardware frame counter present in DECON to serialize vblank and commit completion events. Signed-off-by: Andrzej Hajda Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 78 ++++++++++++++++++++++++++- include/video/exynos5433_decon.h | 8 +++ 2 files changed, 85 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 2130ccf1e036..cfe6f8af849f 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -68,6 +68,8 @@ struct decon_context { unsigned long flags; unsigned long out_type; int first_win; + spinlock_t vblank_lock; + u32 frame_id; }; static const uint32_t decon_formats[] = { @@ -122,6 +124,48 @@ static void decon_disable_vblank(struct exynos_drm_crtc *crtc) writel(0, ctx->addr + DECON_VIDINTCON0); } +/* return number of starts/ends of frame transmissions since reset */ +static u32 decon_get_frame_count(struct decon_context *ctx, bool end) +{ + u32 frm, pfrm, status, cnt = 2; + + /* To get consistent result repeat read until frame id is stable. + * Usually the loop will be executed once, in rare cases when the loop + * is executed at frame change time 2nd pass will be needed. + */ + frm = readl(ctx->addr + DECON_CRFMID); + do { + status = readl(ctx->addr + DECON_VIDCON1); + pfrm = frm; + frm = readl(ctx->addr + DECON_CRFMID); + } while (frm != pfrm && --cnt); + + /* CRFMID is incremented on BPORCH in case of I80 and on VSYNC in case + * of RGB, it should be taken into account. + */ + if (!frm) + return 0; + + switch (status & (VIDCON1_VSTATUS_MASK | VIDCON1_I80_ACTIVE)) { + case VIDCON1_VSTATUS_VS: + if (!(ctx->out_type & IFTYPE_I80)) + --frm; + break; + case VIDCON1_VSTATUS_BP: + --frm; + break; + case VIDCON1_I80_ACTIVE: + case VIDCON1_VSTATUS_AC: + if (end) + --frm; + break; + default: + break; + } + + return frm; +} + static void decon_setup_trigger(struct decon_context *ctx) { if (!(ctx->out_type & (IFTYPE_I80 | I80_HW_TRG))) @@ -365,11 +409,14 @@ static void decon_disable_plane(struct exynos_drm_crtc *crtc, static void decon_atomic_flush(struct exynos_drm_crtc *crtc) { struct decon_context *ctx = crtc->ctx; + unsigned long flags; int i; if (test_bit(BIT_SUSPENDED, &ctx->flags)) return; + spin_lock_irqsave(&ctx->vblank_lock, flags); + for (i = ctx->first_win; i < WINDOWS_NR; i++) decon_shadow_protect_win(ctx, i, false); @@ -378,12 +425,18 @@ static void decon_atomic_flush(struct exynos_drm_crtc *crtc) if (ctx->out_type & IFTYPE_I80) set_bit(BIT_WIN_UPDATED, &ctx->flags); + + ctx->frame_id = decon_get_frame_count(ctx, true); + exynos_crtc_handle_event(crtc); + + spin_unlock_irqrestore(&ctx->vblank_lock, flags); } static void decon_swreset(struct decon_context *ctx) { unsigned int tries; + unsigned long flags; writel(0, ctx->addr + DECON_VIDCON0); for (tries = 2000; tries; --tries) { @@ -401,6 +454,10 @@ static void decon_swreset(struct decon_context *ctx) WARN(tries == 0, "failed to software reset DECON\n"); + spin_lock_irqsave(&ctx->vblank_lock, flags); + ctx->frame_id = 0; + spin_unlock_irqrestore(&ctx->vblank_lock, flags); + if (!(ctx->out_type & IFTYPE_HDMI)) return; @@ -579,6 +636,24 @@ static const struct component_ops decon_component_ops = { .unbind = decon_unbind, }; +static void decon_handle_vblank(struct decon_context *ctx) +{ + u32 frm; + + spin_lock(&ctx->vblank_lock); + + frm = decon_get_frame_count(ctx, true); + + if (frm != ctx->frame_id) { + /* handle only if incremented, take care of wrap-around */ + if ((s32)(frm - ctx->frame_id) > 0) + drm_crtc_handle_vblank(&ctx->crtc->base); + ctx->frame_id = frm; + } + + spin_unlock(&ctx->vblank_lock); +} + static irqreturn_t decon_irq_handler(int irq, void *dev_id) { struct decon_context *ctx = dev_id; @@ -599,7 +674,7 @@ static irqreturn_t decon_irq_handler(int irq, void *dev_id) (VIDOUT_INTERLACE_EN_F | VIDOUT_INTERLACE_FIELD_F)) return IRQ_HANDLED; } - drm_crtc_handle_vblank(&ctx->crtc->base); + decon_handle_vblank(ctx); } out: @@ -672,6 +747,7 @@ static int exynos5433_decon_probe(struct platform_device *pdev) __set_bit(BIT_SUSPENDED, &ctx->flags); ctx->dev = dev; ctx->out_type = (unsigned long)of_device_get_match_data(dev); + spin_lock_init(&ctx->vblank_lock); if (ctx->out_type & IFTYPE_HDMI) { ctx->first_win = 1; diff --git a/include/video/exynos5433_decon.h b/include/video/exynos5433_decon.h index ef8e2a8ad0af..352fc0d9528f 100644 --- a/include/video/exynos5433_decon.h +++ b/include/video/exynos5433_decon.h @@ -46,6 +46,7 @@ #define DECON_FRAMEFIFO_STATUS 0x0524 #define DECON_CMU 0x1404 #define DECON_UPDATE 0x1410 +#define DECON_CRFMID 0x1414 #define DECON_UPDATE_SCHEME 0x1438 #define DECON_VIDCON1 0x2000 #define DECON_VIDCON2 0x2004 @@ -142,6 +143,13 @@ #define STANDALONE_UPDATE_F (1 << 0) /* DECON_VIDCON1 */ +#define VIDCON1_LINECNT_MASK (0x0fff << 16) +#define VIDCON1_I80_ACTIVE (1 << 15) +#define VIDCON1_VSTATUS_MASK (0x3 << 13) +#define VIDCON1_VSTATUS_VS (0 << 13) +#define VIDCON1_VSTATUS_BP (1 << 13) +#define VIDCON1_VSTATUS_AC (2 << 13) +#define VIDCON1_VSTATUS_FP (3 << 13) #define VIDCON1_VCLK_MASK (0x3 << 9) #define VIDCON1_VCLK_RUN_VDEN_DISABLE (0x3 << 9) #define VIDCON1_VCLK_HOLD (0x0 << 9) -- cgit v1.2.3 From f3cce673e1c11112c536fbc8e6912c5414d7141f Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Tue, 14 Mar 2017 09:27:58 +0100 Subject: drm/exynos/decon5433: signal frame done interrupt at front porch DECON in case of video mode generates interrupt by default at start of vertical back porch. As this interrupt is used to generate VBLANK events more optimal point is start of vertical front porch. Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 2 +- include/video/exynos5433_decon.h | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index cfe6f8af849f..a43d0fa0b051 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -105,7 +105,7 @@ static int decon_enable_vblank(struct exynos_drm_crtc *crtc) if (ctx->out_type & IFTYPE_I80) val |= VIDINTCON0_FRAMEDONE; else - val |= VIDINTCON0_INTFRMEN; + val |= VIDINTCON0_INTFRMEN | VIDINTCON0_FRAMESEL_FP; writel(val, ctx->addr + DECON_VIDINTCON0); } diff --git a/include/video/exynos5433_decon.h b/include/video/exynos5433_decon.h index 352fc0d9528f..6b083d327e98 100644 --- a/include/video/exynos5433_decon.h +++ b/include/video/exynos5433_decon.h @@ -127,6 +127,10 @@ /* VIDINTCON0 */ #define VIDINTCON0_FRAMEDONE (1 << 17) +#define VIDINTCON0_FRAMESEL_BP (0 << 15) +#define VIDINTCON0_FRAMESEL_VS (1 << 15) +#define VIDINTCON0_FRAMESEL_AC (2 << 15) +#define VIDINTCON0_FRAMESEL_FP (3 << 15) #define VIDINTCON0_INTFRMEN (1 << 12) #define VIDINTCON0_INTEN (1 << 0) -- cgit v1.2.3 From 82a01783252be726d76cdbbababc16540f582cec Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Tue, 14 Mar 2017 09:27:59 +0100 Subject: drm/exynos/fimd: signal frame done interrupt at front porch VBLANK interrupt should be signalled as soon as scanout ends, front porch is the best moment. Signed-off-by: Andrzej Hajda Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_fimd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index a3162a4566ce..3f04d72c448d 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -243,7 +243,7 @@ static int fimd_enable_vblank(struct exynos_drm_crtc *crtc) val |= VIDINTCON0_INT_FRAME; val &= ~VIDINTCON0_FRAMESEL0_MASK; - val |= VIDINTCON0_FRAMESEL0_VSYNC; + val |= VIDINTCON0_FRAMESEL0_FRONTPORCH; val &= ~VIDINTCON0_FRAMESEL1_MASK; val |= VIDINTCON0_FRAMESEL1_NONE; } -- cgit v1.2.3 From f07d9c2864d4ccf0a0b5bd2dd6491f5204eab5fe Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Tue, 14 Mar 2017 09:28:00 +0100 Subject: drm/exynos/decon5433: fix software trigger mask The patch fixes copy/paste bug introduced during code refactoring. Reported-by: Dan Carpenter Fixes: b93c2e8b5d9d ("drm/exynos/decon5433: configure sysreg in case of hardware trigger")Fixes: Signed-off-by: Andrzej Hajda Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index a43d0fa0b051..c0e8d3302292 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -172,8 +172,8 @@ static void decon_setup_trigger(struct decon_context *ctx) return; if (!(ctx->out_type & I80_HW_TRG)) { - writel(TRIGCON_TE_AUTO_MASK | TRIGCON_SWTRIGEN - | TRIGCON_TE_AUTO_MASK | TRIGCON_SWTRIGEN, + writel(TRIGCON_TRIGEN_PER_F | TRIGCON_TRIGEN_F | + TRIGCON_TE_AUTO_MASK | TRIGCON_SWTRIGEN, ctx->addr + DECON_TRIGCON); return; } -- cgit v1.2.3 From 9cdf0ed25a9b34fd82cb0eb47a8bdc47dc9f4ff5 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 14 Mar 2017 20:38:04 +0200 Subject: drm/exynos: Print kernel pointers in a restricted form Printing raw kernel pointers might reveal information which sometimes we try to hide (e.g. with Kernel Address Space Layout Randomization). Use the "%pK" format so these pointers will be hidden for unprivileged users. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_dsi.c | 4 ++-- drivers/gpu/drm/exynos/exynos_drm_fimc.c | 2 +- drivers/gpu/drm/exynos/exynos_drm_gem.c | 2 +- drivers/gpu/drm/exynos/exynos_drm_gsc.c | 2 +- drivers/gpu/drm/exynos/exynos_drm_ipp.c | 22 +++++++++++----------- drivers/gpu/drm/exynos/exynos_drm_rotator.c | 2 +- 6 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index ef6f9c6de098..c671f8e017db 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -966,7 +966,7 @@ static void exynos_dsi_send_to_fifo(struct exynos_dsi *dsi, bool first = !xfer->tx_done; u32 reg; - dev_dbg(dev, "< xfer %p: tx len %u, done %u, rx len %u, done %u\n", + dev_dbg(dev, "< xfer %pK: tx len %u, done %u, rx len %u, done %u\n", xfer, length, xfer->tx_done, xfer->rx_len, xfer->rx_done); if (length > DSI_TX_FIFO_SIZE) @@ -1164,7 +1164,7 @@ static bool exynos_dsi_transfer_finish(struct exynos_dsi *dsi) spin_unlock_irqrestore(&dsi->transfer_lock, flags); dev_dbg(dsi->dev, - "> xfer %p, tx_len %zu, tx_done %u, rx_len %u, rx_done %u\n", + "> xfer %pK, tx_len %zu, tx_done %u, rx_len %u, rx_done %u\n", xfer, xfer->packet.payload_length, xfer->tx_done, xfer->rx_len, xfer->rx_done); diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c index 95871577015d..5b18b5c5fdf2 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c @@ -1695,7 +1695,7 @@ static int fimc_probe(struct platform_device *pdev) goto err_put_clk; } - DRM_DEBUG_KMS("id[%d]ippdrv[%p]\n", ctx->id, ippdrv); + DRM_DEBUG_KMS("id[%d]ippdrv[%pK]\n", ctx->id, ippdrv); spin_lock_init(&ctx->lock); platform_set_drvdata(pdev, ctx); diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index 4c28f7ffcc4d..55a1579d11b3 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -218,7 +218,7 @@ static struct exynos_drm_gem *exynos_drm_gem_init(struct drm_device *dev, return ERR_PTR(ret); } - DRM_DEBUG_KMS("created file object = %p\n", obj->filp); + DRM_DEBUG_KMS("created file object = %pK\n", obj->filp); return exynos_gem; } diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index bef57987759d..0506b2b17ac1 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -1723,7 +1723,7 @@ static int gsc_probe(struct platform_device *pdev) return ret; } - DRM_DEBUG_KMS("id[%d]ippdrv[%p]\n", ctx->id, ippdrv); + DRM_DEBUG_KMS("id[%d]ippdrv[%pK]\n", ctx->id, ippdrv); mutex_init(&ctx->lock); platform_set_drvdata(pdev, ctx); diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c index 9c84ee76f18a..3edda18cc2d2 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_ipp.c +++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c @@ -208,7 +208,7 @@ static struct exynos_drm_ippdrv *ipp_find_drv_by_handle(u32 prop_id) * e.g PAUSE state, queue buf, command control. */ list_for_each_entry(ippdrv, &exynos_drm_ippdrv_list, drv_list) { - DRM_DEBUG_KMS("count[%d]ippdrv[%p]\n", count++, ippdrv); + DRM_DEBUG_KMS("count[%d]ippdrv[%pK]\n", count++, ippdrv); mutex_lock(&ippdrv->cmd_lock); list_for_each_entry(c_node, &ippdrv->cmd_list, list) { @@ -388,7 +388,7 @@ int exynos_drm_ipp_set_property(struct drm_device *drm_dev, void *data, } property->prop_id = ret; - DRM_DEBUG_KMS("created prop_id[%d]cmd[%d]ippdrv[%p]\n", + DRM_DEBUG_KMS("created prop_id[%d]cmd[%d]ippdrv[%pK]\n", property->prop_id, property->cmd, ippdrv); /* stored property information and ippdrv in private data */ @@ -518,7 +518,7 @@ static int ipp_put_mem_node(struct drm_device *drm_dev, { int i; - DRM_DEBUG_KMS("node[%p]\n", m_node); + DRM_DEBUG_KMS("node[%pK]\n", m_node); if (!m_node) { DRM_ERROR("invalid dequeue node.\n"); @@ -562,7 +562,7 @@ static struct drm_exynos_ipp_mem_node m_node->buf_id = qbuf->buf_id; INIT_LIST_HEAD(&m_node->list); - DRM_DEBUG_KMS("m_node[%p]ops_id[%d]\n", m_node, qbuf->ops_id); + DRM_DEBUG_KMS("m_node[%pK]ops_id[%d]\n", m_node, qbuf->ops_id); DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]\n", qbuf->prop_id, m_node->buf_id); for_each_ipp_planar(i) { @@ -659,7 +659,7 @@ static void ipp_put_event(struct drm_exynos_ipp_cmd_node *c_node, mutex_lock(&c_node->event_lock); list_for_each_entry_safe(e, te, &c_node->event_list, base.link) { - DRM_DEBUG_KMS("count[%d]e[%p]\n", count++, e); + DRM_DEBUG_KMS("count[%d]e[%pK]\n", count++, e); /* * qbuf == NULL condition means all event deletion. @@ -750,7 +750,7 @@ static struct drm_exynos_ipp_mem_node /* find memory node from memory list */ list_for_each_entry(m_node, head, list) { - DRM_DEBUG_KMS("count[%d]m_node[%p]\n", count++, m_node); + DRM_DEBUG_KMS("count[%d]m_node[%pK]\n", count++, m_node); /* compare buffer id */ if (m_node->buf_id == qbuf->buf_id) @@ -767,7 +767,7 @@ static int ipp_set_mem_node(struct exynos_drm_ippdrv *ippdrv, struct exynos_drm_ipp_ops *ops = NULL; int ret = 0; - DRM_DEBUG_KMS("node[%p]\n", m_node); + DRM_DEBUG_KMS("node[%pK]\n", m_node); if (!m_node) { DRM_ERROR("invalid queue node.\n"); @@ -1232,7 +1232,7 @@ static int ipp_start_property(struct exynos_drm_ippdrv *ippdrv, m_node = list_first_entry(head, struct drm_exynos_ipp_mem_node, list); - DRM_DEBUG_KMS("m_node[%p]\n", m_node); + DRM_DEBUG_KMS("m_node[%pK]\n", m_node); ret = ipp_set_mem_node(ippdrv, c_node, m_node); if (ret) { @@ -1601,7 +1601,7 @@ static int ipp_subdrv_probe(struct drm_device *drm_dev, struct device *dev) } ippdrv->prop_list.ipp_id = ret; - DRM_DEBUG_KMS("count[%d]ippdrv[%p]ipp_id[%d]\n", + DRM_DEBUG_KMS("count[%d]ippdrv[%pK]ipp_id[%d]\n", count++, ippdrv, ret); /* store parent device for node */ @@ -1659,7 +1659,7 @@ static int ipp_subdrv_open(struct drm_device *drm_dev, struct device *dev, file_priv->ipp_dev = dev; - DRM_DEBUG_KMS("done priv[%p]\n", dev); + DRM_DEBUG_KMS("done priv[%pK]\n", dev); return 0; } @@ -1676,7 +1676,7 @@ static void ipp_subdrv_close(struct drm_device *drm_dev, struct device *dev, mutex_lock(&ippdrv->cmd_lock); list_for_each_entry_safe(c_node, tc_node, &ippdrv->cmd_list, list) { - DRM_DEBUG_KMS("count[%d]ippdrv[%p]\n", + DRM_DEBUG_KMS("count[%d]ippdrv[%pK]\n", count++, ippdrv); if (c_node->filp == file) { diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c index 6591e406084c..79282a820ecc 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c +++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c @@ -748,7 +748,7 @@ static int rotator_probe(struct platform_device *pdev) goto err_ippdrv_register; } - DRM_DEBUG_KMS("ippdrv[%p]\n", ippdrv); + DRM_DEBUG_KMS("ippdrv[%pK]\n", ippdrv); platform_set_drvdata(pdev, rot); -- cgit v1.2.3 From 22e098daae7e53763493b9d9976ef8c65190017e Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Wed, 15 Mar 2017 12:20:42 +0100 Subject: drm/exynos/dsi: make te-gpios optional DSI forwards te-gpios interrupts to display controller, but if display controller works in HW-TRIGGER mode this interrupt is not necessary. Making te-gpios property optional allows to avoid generating spare interrupts. And also if panel device node of command mode panel device doesn't provide te-gpios property then the panel driver failed to probe. This was a critial issue. With this patch we can not only get rid of 60 interrupt callbacks per second but also fix the critial issues. Signed-off-by: Andrzej Hajda Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_dsi.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index c671f8e017db..d7ef26370e67 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -1335,9 +1335,12 @@ static int exynos_dsi_register_te_irq(struct exynos_dsi *dsi) int te_gpio_irq; dsi->te_gpio = of_get_named_gpio(dsi->panel_node, "te-gpios", 0); + if (dsi->te_gpio == -ENOENT) + return 0; + if (!gpio_is_valid(dsi->te_gpio)) { - dev_err(dsi->dev, "no te-gpios specified\n"); ret = dsi->te_gpio; + dev_err(dsi->dev, "cannot get te-gpios, %d\n", ret); goto out; } -- cgit v1.2.3 From fc36a903265c18d124cefaba364a7fa71b21be61 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 21 Mar 2017 12:38:02 +1100 Subject: Revert "powerpc/64: Disable use of radix under a hypervisor" This reverts commit 3f91a89d424a79f8082525db5a375e438887bb3e. Now that we do have the machinery for using the radix MMU under a hypervisor, the extra check and comment introduced in 3f91a89d424a are no longer correct. The result is that when booted under a hypervisor that only allows use of radix, we clear the MMU_FTR_TYPE_RADIX and then set it again, and print a warning about ignoring the disable_radix command line option, even though the command line does not include "disable_radix". Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/mm/init_64.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 9be992083d2a..c22f207aa656 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -397,8 +397,7 @@ static void early_check_vec5(void) void __init mmu_early_init_devtree(void) { /* Disable radix mode based on kernel command line. */ - /* We don't yet have the machinery to do radix as a guest. */ - if (disable_radix || !(mfmsr() & MSR_HV)) + if (disable_radix) cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; /* -- cgit v1.2.3 From cc638a488a5205713b51eabd047be6ea641cc328 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Mon, 20 Mar 2017 17:55:22 +1100 Subject: gcc-plugins: update architecture list in documentation Commit 65c059bcaa73 ("powerpc: Enable support for GCC plugins") enabled GCC plugins on powerpc, but neglected to update the architecture list in the docs. Rectify this. Fixes: 65c059bcaa73 ("powerpc: Enable support for GCC plugins") Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- Documentation/gcc-plugins.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/gcc-plugins.txt b/Documentation/gcc-plugins.txt index 891c69464434..433eaefb4aa1 100644 --- a/Documentation/gcc-plugins.txt +++ b/Documentation/gcc-plugins.txt @@ -18,8 +18,8 @@ because gcc versions 4.5 and 4.6 are compiled by a C compiler, gcc-4.7 can be compiled by a C or a C++ compiler, and versions 4.8+ can only be compiled by a C++ compiler. -Currently the GCC plugin infrastructure supports only the x86, arm and arm64 -architectures. +Currently the GCC plugin infrastructure supports only the x86, arm, arm64 and +powerpc architectures. This infrastructure was ported from grsecurity [6] and PaX [7]. -- cgit v1.2.3 From a82f16188a32a3c889916c582ea2d9188e3c2734 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Myl=C3=A8ne=20Josserand?= Date: Sat, 18 Mar 2017 08:55:05 +0100 Subject: ASoC: sun8i-codec: Remove analog "HP" widget MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "HP" widget is already present and take part to the analog part (sun8i-codec-analog). Remove it from the digital part as it is unnecessary. Signed-off-by: Mylène Josserand Acked-by: Chen-Yu Tsai Signed-off-by: Mark Brown --- sound/soc/sunxi/sun8i-codec.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/sound/soc/sunxi/sun8i-codec.c b/sound/soc/sunxi/sun8i-codec.c index b92bdc8361af..d60f6fbd36a2 100644 --- a/sound/soc/sunxi/sun8i-codec.c +++ b/sound/soc/sunxi/sun8i-codec.c @@ -321,8 +321,6 @@ static const struct snd_soc_dapm_widget sun8i_codec_dapm_widgets[] = { SUN8I_MOD_RST_CTL_AIF1, 0, NULL, 0), SND_SOC_DAPM_SUPPLY("RST DAC", SUN8I_MOD_RST_CTL, SUN8I_MOD_RST_CTL_DAC, 0, NULL, 0), - - SND_SOC_DAPM_OUTPUT("HP"), }; static const struct snd_soc_dapm_route sun8i_codec_dapm_routes[] = { @@ -344,10 +342,6 @@ static const struct snd_soc_dapm_route sun8i_codec_dapm_routes[] = { /* DAC Mixer Routes */ { "Left DAC Mixer", "LSlot 0", "Digital Left DAC"}, { "Right DAC Mixer", "RSlot 0", "Digital Right DAC"}, - - /* End of route : HP out */ - { "HP", NULL, "Left DAC Mixer" }, - { "HP", NULL, "Right DAC Mixer" }, }; static struct snd_soc_dai_ops sun8i_codec_dai_ops = { -- cgit v1.2.3 From 649d55436137b397accb6a9d1b6975598c693bcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Myl=C3=A8ne=20Josserand?= Date: Sat, 18 Mar 2017 08:55:06 +0100 Subject: ASoC: sun8i-codec: Update mixer to use SOC_DAPM_DOUBLE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update the driver to use the new SOC_DAPM_DOUBLE definition on the digital DAC mixer. Update the names accordingly as, when they are shared, the controls are not prefixed with the widget's name anymore. Signed-off-by: Mylène Josserand Acked-by: Chen-Yu Tsai Signed-off-by: Mark Brown --- sound/soc/sunxi/sun8i-codec.c | 45 ++++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/sound/soc/sunxi/sun8i-codec.c b/sound/soc/sunxi/sun8i-codec.c index d60f6fbd36a2..107fa8213600 100644 --- a/sound/soc/sunxi/sun8i-codec.c +++ b/sound/soc/sunxi/sun8i-codec.c @@ -259,25 +259,20 @@ static int sun8i_codec_hw_params(struct snd_pcm_substream *substream, return 0; } -static const struct snd_kcontrol_new sun8i_output_left_mixer_controls[] = { - SOC_DAPM_SINGLE("LSlot 0", SUN8I_DAC_MXR_SRC, - SUN8I_DAC_MXR_SRC_DACL_MXR_SRC_AIF1DA0L, 1, 0), - SOC_DAPM_SINGLE("LSlot 1", SUN8I_DAC_MXR_SRC, - SUN8I_DAC_MXR_SRC_DACL_MXR_SRC_AIF1DA1L, 1, 0), - SOC_DAPM_SINGLE("DACL", SUN8I_DAC_MXR_SRC, - SUN8I_DAC_MXR_SRC_DACL_MXR_SRC_AIF2DACL, 1, 0), - SOC_DAPM_SINGLE("ADCL", SUN8I_DAC_MXR_SRC, - SUN8I_DAC_MXR_SRC_DACL_MXR_SRC_ADCL, 1, 0), -}; - -static const struct snd_kcontrol_new sun8i_output_right_mixer_controls[] = { - SOC_DAPM_SINGLE("RSlot 0", SUN8I_DAC_MXR_SRC, +static const struct snd_kcontrol_new sun8i_dac_mixer_controls[] = { + SOC_DAPM_DOUBLE("AIF1 Slot 0 Digital DAC Playback Switch", + SUN8I_DAC_MXR_SRC, + SUN8I_DAC_MXR_SRC_DACL_MXR_SRC_AIF1DA0L, SUN8I_DAC_MXR_SRC_DACR_MXR_SRC_AIF1DA0R, 1, 0), - SOC_DAPM_SINGLE("RSlot 1", SUN8I_DAC_MXR_SRC, + SOC_DAPM_DOUBLE("AIF1 Slot 1 Digital DAC Playback Switch", + SUN8I_DAC_MXR_SRC, + SUN8I_DAC_MXR_SRC_DACL_MXR_SRC_AIF1DA1L, SUN8I_DAC_MXR_SRC_DACR_MXR_SRC_AIF1DA1R, 1, 0), - SOC_DAPM_SINGLE("DACR", SUN8I_DAC_MXR_SRC, + SOC_DAPM_DOUBLE("AIF2 Digital DAC Playback Switch", SUN8I_DAC_MXR_SRC, + SUN8I_DAC_MXR_SRC_DACL_MXR_SRC_AIF2DACL, SUN8I_DAC_MXR_SRC_DACR_MXR_SRC_AIF2DACR, 1, 0), - SOC_DAPM_SINGLE("ADCR", SUN8I_DAC_MXR_SRC, + SOC_DAPM_DOUBLE("ADC Digital DAC Playback Switch", SUN8I_DAC_MXR_SRC, + SUN8I_DAC_MXR_SRC_DACL_MXR_SRC_ADCL, SUN8I_DAC_MXR_SRC_DACR_MXR_SRC_ADCR, 1, 0), }; @@ -293,12 +288,12 @@ static const struct snd_soc_dapm_widget sun8i_codec_dapm_widgets[] = { SUN8I_AIF1_DACDAT_CTRL_AIF1_DA0R_ENA, 0), /* DAC Mixers */ - SND_SOC_DAPM_MIXER("Left DAC Mixer", SND_SOC_NOPM, 0, 0, - sun8i_output_left_mixer_controls, - ARRAY_SIZE(sun8i_output_left_mixer_controls)), - SND_SOC_DAPM_MIXER("Right DAC Mixer", SND_SOC_NOPM, 0, 0, - sun8i_output_right_mixer_controls, - ARRAY_SIZE(sun8i_output_right_mixer_controls)), + SND_SOC_DAPM_MIXER("Left Digital DAC Mixer", SND_SOC_NOPM, 0, 0, + sun8i_dac_mixer_controls, + ARRAY_SIZE(sun8i_dac_mixer_controls)), + SND_SOC_DAPM_MIXER("Right Digital DAC Mixer", SND_SOC_NOPM, 0, 0, + sun8i_dac_mixer_controls, + ARRAY_SIZE(sun8i_dac_mixer_controls)), /* Clocks */ SND_SOC_DAPM_SUPPLY("MODCLK AFI1", SUN8I_MOD_CLK_ENA, @@ -340,8 +335,10 @@ static const struct snd_soc_dapm_route sun8i_codec_dapm_routes[] = { { "Digital Right DAC", NULL, "DAC" }, /* DAC Mixer Routes */ - { "Left DAC Mixer", "LSlot 0", "Digital Left DAC"}, - { "Right DAC Mixer", "RSlot 0", "Digital Right DAC"}, + { "Left Digital DAC Mixer", "AIF1 Slot 0 Digital DAC Playback Switch", + "Digital Left DAC"}, + { "Right Digital DAC Mixer", "AIF1 Slot 0 Digital DAC Playback Switch ", + "Digital Right DAC"}, }; static struct snd_soc_dai_ops sun8i_codec_dai_ops = { -- cgit v1.2.3 From 79e26de81448fad80f6c15ae1e3c9e7fca2740cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Myl=C3=A8ne=20Josserand?= Date: Sat, 18 Mar 2017 08:55:07 +0100 Subject: ASoC: sun8i-codec: Fix space on audio-routing widget MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An unwanted space is present in an audio widget's name on the dapm routing. It causes an error on the recognition of this widget (error: ("no dapm match for AIF1 Slot 0 Right"). Remove the space fixes it. Signed-off-by: Mylène Josserand Acked-by: Chen-Yu Tsai Signed-off-by: Mark Brown --- sound/soc/sunxi/sun8i-codec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sunxi/sun8i-codec.c b/sound/soc/sunxi/sun8i-codec.c index 107fa8213600..adb13fbd2006 100644 --- a/sound/soc/sunxi/sun8i-codec.c +++ b/sound/soc/sunxi/sun8i-codec.c @@ -337,7 +337,7 @@ static const struct snd_soc_dapm_route sun8i_codec_dapm_routes[] = { /* DAC Mixer Routes */ { "Left Digital DAC Mixer", "AIF1 Slot 0 Digital DAC Playback Switch", "Digital Left DAC"}, - { "Right Digital DAC Mixer", "AIF1 Slot 0 Digital DAC Playback Switch ", + { "Right Digital DAC Mixer", "AIF1 Slot 0 Digital DAC Playback Switch", "Digital Right DAC"}, }; -- cgit v1.2.3 From d1792285ca63e17f8a7eb42efa48834c261a2d8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Myl=C3=A8ne=20Josserand?= Date: Sat, 18 Mar 2017 08:55:08 +0100 Subject: ASoC: sun8i-codec: Convert to use SND_SOC_DAPM_AIF_IN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update the driver to use SND_SOC_DAPM_AIF_IN instead of SND_SOC_DAPM_DAC. Rename the interface's widgets to be more precise on which slot the interface is connected. Signed-off-by: Mylène Josserand Acked-by: Chen-Yu Tsai Signed-off-by: Mark Brown --- sound/soc/sunxi/sun8i-codec.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/sound/soc/sunxi/sun8i-codec.c b/sound/soc/sunxi/sun8i-codec.c index adb13fbd2006..7527ba29a5a0 100644 --- a/sound/soc/sunxi/sun8i-codec.c +++ b/sound/soc/sunxi/sun8i-codec.c @@ -281,11 +281,13 @@ static const struct snd_soc_dapm_widget sun8i_codec_dapm_widgets[] = { SND_SOC_DAPM_SUPPLY("DAC", SUN8I_DAC_DIG_CTRL, SUN8I_DAC_DIG_CTRL_ENDA, 0, NULL, 0), - /* Analog DAC */ - SND_SOC_DAPM_DAC("Digital Left DAC", "Playback", SUN8I_AIF1_DACDAT_CTRL, - SUN8I_AIF1_DACDAT_CTRL_AIF1_DA0L_ENA, 0), - SND_SOC_DAPM_DAC("Digital Right DAC", "Playback", SUN8I_AIF1_DACDAT_CTRL, - SUN8I_AIF1_DACDAT_CTRL_AIF1_DA0R_ENA, 0), + /* Analog DAC AIF */ + SND_SOC_DAPM_AIF_IN("AIF1 Slot 0 Left", "Playback", 0, + SUN8I_AIF1_DACDAT_CTRL, + SUN8I_AIF1_DACDAT_CTRL_AIF1_DA0L_ENA, 0), + SND_SOC_DAPM_AIF_IN("AIF1 Slot 0 Right", "Playback", 0, + SUN8I_AIF1_DACDAT_CTRL, + SUN8I_AIF1_DACDAT_CTRL_AIF1_DA0R_ENA, 0), /* DAC Mixers */ SND_SOC_DAPM_MIXER("Left Digital DAC Mixer", SND_SOC_NOPM, 0, 0, @@ -331,14 +333,14 @@ static const struct snd_soc_dapm_route sun8i_codec_dapm_routes[] = { { "DAC", NULL, "MODCLK DAC" }, /* DAC Routes */ - { "Digital Left DAC", NULL, "DAC" }, - { "Digital Right DAC", NULL, "DAC" }, + { "AIF1 Slot 0 Right", NULL, "DAC" }, + { "AIF1 Slot 0 Left", NULL, "DAC" }, /* DAC Mixer Routes */ { "Left Digital DAC Mixer", "AIF1 Slot 0 Digital DAC Playback Switch", - "Digital Left DAC"}, + "AIF1 Slot 0 Left"}, { "Right Digital DAC Mixer", "AIF1 Slot 0 Digital DAC Playback Switch", - "Digital Right DAC"}, + "AIF1 Slot 0 Right"}, }; static struct snd_soc_dai_ops sun8i_codec_dai_ops = { -- cgit v1.2.3 From eb3abaea7ea42619a48fa84e4b1ff48f1b18d863 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Myl=C3=A8ne=20Josserand?= Date: Sat, 18 Mar 2017 08:55:09 +0100 Subject: ARM: dts: sun8i: Update audio-routing with renamed widgets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The digital AIF interfaces has been renamed in the sun8i audio codec driver so the audio-routing in the device tree must be renamed too. Signed-off-by: Mylène Josserand Acked-by: Chen-Yu Tsai Signed-off-by: Mark Brown --- arch/arm/boot/dts/sun8i-a33.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/sun8i-a33.dtsi b/arch/arm/boot/dts/sun8i-a33.dtsi index 18c174fef84f..0467fb365bfc 100644 --- a/arch/arm/boot/dts/sun8i-a33.dtsi +++ b/arch/arm/boot/dts/sun8i-a33.dtsi @@ -113,8 +113,8 @@ simple-audio-card,mclk-fs = <512>; simple-audio-card,aux-devs = <&codec_analog>; simple-audio-card,routing = - "Left DAC", "Digital Left DAC", - "Right DAC", "Digital Right DAC"; + "Left DAC", "AIF1 Slot 0 Left", + "Right DAC", "AIF1 Slot 0 Right"; status = "disabled"; simple-audio-card,cpu { -- cgit v1.2.3 From c6736a94d0e527ddc0d1eb99dbc59886a9ecf471 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 21 Mar 2017 13:26:02 +0100 Subject: ALSA: x86: Make CONFIG_SND_X86 bool CONFIG_SND_X86 is a menu config to filter only for x86-specific drivers in its sub-menu, and this doesn't have to be tristate but rather it should be a bool. Also, like other sub-menu configs, it's more user-friendly to be default=y; it's merely a menu config and the actual drivers are configured in the sub-menu, after all. Fixes: 287599cf2d77 ("ALSA: add Intel HDMI LPE audio driver for BYT/CHT-T") Signed-off-by: Takashi Iwai --- sound/x86/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/x86/Kconfig b/sound/x86/Kconfig index 84c8f8fc597c..8adf4d1bd46e 100644 --- a/sound/x86/Kconfig +++ b/sound/x86/Kconfig @@ -1,6 +1,7 @@ menuconfig SND_X86 - tristate "X86 sound devices" + bool "X86 sound devices" depends on X86 + default y ---help--- X86 sound devices that don't fall under SoC or PCI categories -- cgit v1.2.3 From c520ff3d03f0b5db7146d9beed6373ad5d2a5e0e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 21 Mar 2017 13:56:04 +0100 Subject: ALSA: seq: Fix racy cell insertions during snd_seq_pool_done() When snd_seq_pool_done() is called, it marks the closing flag to refuse the further cell insertions. But snd_seq_pool_done() itself doesn't clear the cells but just waits until all cells are cleared by the caller side. That is, it's racy, and this leads to the endless stall as syzkaller spotted. This patch addresses the racy by splitting the setup of pool->closing flag out of snd_seq_pool_done(), and calling it properly before snd_seq_pool_done(). BugLink: http://lkml.kernel.org/r/CACT4Y+aqqy8bZA1fFieifNxR2fAfFQQABcBHj801+u5ePV0URw@mail.gmail.com Reported-and-tested-by: Dmitry Vyukov Cc: Signed-off-by: Takashi Iwai --- sound/core/seq/seq_clientmgr.c | 1 + sound/core/seq/seq_fifo.c | 3 +++ sound/core/seq/seq_memory.c | 17 +++++++++++++---- sound/core/seq/seq_memory.h | 1 + 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 4c935202ce23..f3b1d7f50b81 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -1832,6 +1832,7 @@ static int snd_seq_ioctl_set_client_pool(struct snd_seq_client *client, info->output_pool != client->pool->size)) { if (snd_seq_write_pool_allocated(client)) { /* remove all existing cells */ + snd_seq_pool_mark_closing(client->pool); snd_seq_queue_client_leave_cells(client->number); snd_seq_pool_done(client->pool); } diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c index 448efd4e980e..33980d1c8037 100644 --- a/sound/core/seq/seq_fifo.c +++ b/sound/core/seq/seq_fifo.c @@ -72,6 +72,9 @@ void snd_seq_fifo_delete(struct snd_seq_fifo **fifo) return; *fifo = NULL; + if (f->pool) + snd_seq_pool_mark_closing(f->pool); + snd_seq_fifo_clear(f); /* wake up clients if any */ diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c index 1a1acf3ddda4..d4c61ec9be13 100644 --- a/sound/core/seq/seq_memory.c +++ b/sound/core/seq/seq_memory.c @@ -415,6 +415,18 @@ int snd_seq_pool_init(struct snd_seq_pool *pool) return 0; } +/* refuse the further insertion to the pool */ +void snd_seq_pool_mark_closing(struct snd_seq_pool *pool) +{ + unsigned long flags; + + if (snd_BUG_ON(!pool)) + return; + spin_lock_irqsave(&pool->lock, flags); + pool->closing = 1; + spin_unlock_irqrestore(&pool->lock, flags); +} + /* remove events */ int snd_seq_pool_done(struct snd_seq_pool *pool) { @@ -425,10 +437,6 @@ int snd_seq_pool_done(struct snd_seq_pool *pool) return -EINVAL; /* wait for closing all threads */ - spin_lock_irqsave(&pool->lock, flags); - pool->closing = 1; - spin_unlock_irqrestore(&pool->lock, flags); - if (waitqueue_active(&pool->output_sleep)) wake_up(&pool->output_sleep); @@ -485,6 +493,7 @@ int snd_seq_pool_delete(struct snd_seq_pool **ppool) *ppool = NULL; if (pool == NULL) return 0; + snd_seq_pool_mark_closing(pool); snd_seq_pool_done(pool); kfree(pool); return 0; diff --git a/sound/core/seq/seq_memory.h b/sound/core/seq/seq_memory.h index 4a2ec779b8a7..32f959c17786 100644 --- a/sound/core/seq/seq_memory.h +++ b/sound/core/seq/seq_memory.h @@ -84,6 +84,7 @@ static inline int snd_seq_total_cells(struct snd_seq_pool *pool) int snd_seq_pool_init(struct snd_seq_pool *pool); /* done pool - free events */ +void snd_seq_pool_mark_closing(struct snd_seq_pool *pool); int snd_seq_pool_done(struct snd_seq_pool *pool); /* create pool */ -- cgit v1.2.3 From 49cc4c217c0dbb7d09c402472d1f85a81c093f9f Mon Sep 17 00:00:00 2001 From: Aaron Armstrong Skomra Date: Mon, 6 Mar 2017 10:54:57 -0800 Subject: HID: wacom: Correct Intuos Pro 2 resolution The features struct for the second gen Intuos Pro uses the wrong constant for the resolution. This fix is for commit 4922cd2. Fixes: 4922cd2 ("HID: wacom: Support 2nd-gen Intuos Pro's Bluetooth classic interface") Signed-off-by: Aaron Armstrong Skomra Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 4aa3de9f1163..3d864466d473 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -4197,10 +4197,10 @@ static const struct wacom_features wacom_features_0x343 = WACOM_DTU_OFFSET, WACOM_DTU_OFFSET }; static const struct wacom_features wacom_features_0x360 = { "Wacom Intuos Pro M", 44800, 29600, 8191, 63, - INTUOSP2_BT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 9, .touch_max = 10 }; + INTUOSP2_BT, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 9, .touch_max = 10 }; static const struct wacom_features wacom_features_0x361 = { "Wacom Intuos Pro L", 62200, 43200, 8191, 63, - INTUOSP2_BT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 9, .touch_max = 10 }; + INTUOSP2_BT, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 9, .touch_max = 10 }; static const struct wacom_features wacom_features_HID_ANY_ID = { "Wacom HID", .type = HID_GENERIC, .oVid = HID_ANY_ID, .oPid = HID_ANY_ID }; -- cgit v1.2.3 From b6b1f19b06b7d4dcc261a88d74c5fb0a53988b4e Mon Sep 17 00:00:00 2001 From: Aaron Armstrong Skomra Date: Mon, 6 Mar 2017 10:54:58 -0800 Subject: HID: wacom: don't manually release resources for the EKR Commit 5b779fc introduces the manual release of resources in wacom_remove() as an addition to the driver's use of devm. The EKR resources can only be released through wacom_remote_destroy_one() so we skip the manual release for it. Fixes: 5b779fc ("HID: wacom: release the resources before leaving despite devm") Signed-off-by: Aaron Armstrong Skomra Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/wacom_sys.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index be8f7e2a026f..994bddc55b82 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -2579,7 +2579,9 @@ static void wacom_remove(struct hid_device *hdev) /* make sure we don't trigger the LEDs */ wacom_led_groups_release(wacom); - wacom_release_resources(wacom); + + if (wacom->wacom_wac.features.type != REMOTE) + wacom_release_resources(wacom); hid_set_drvdata(hdev, NULL); } -- cgit v1.2.3 From deaba636997557fce46ca7bcb509bff5ea1b0558 Mon Sep 17 00:00:00 2001 From: Oscar Campos Date: Fri, 10 Feb 2017 18:23:00 +0000 Subject: HID: corsair: support for K65-K70 Rapidfire and Scimitar Pro RGB Add quirks for several corsair gaming devices to avoid long delays on report initialization Supported devices: - Corsair K65RGB Rapidfire Gaming Keyboard - Corsair K70RGB Rapidfire Gaming Keyboard - Corsair Scimitar Pro RGB Gaming Mouse Signed-off-by: Oscar Campos Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/usbhid/hid-quirks.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index b3df60da0297..0e2e7c571d22 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -278,6 +278,9 @@ #define USB_DEVICE_ID_CORSAIR_K70RGB 0x1b13 #define USB_DEVICE_ID_CORSAIR_STRAFE 0x1b15 #define USB_DEVICE_ID_CORSAIR_K65RGB 0x1b17 +#define USB_DEVICE_ID_CORSAIR_K70RGB_RAPIDFIRE 0x1b38 +#define USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE 0x1b39 +#define USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB 0x1b3e #define USB_VENDOR_ID_CREATIVELABS 0x041e #define USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51 0x322c diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index d6847a664446..a69a3c88ab29 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -80,6 +80,9 @@ static const struct hid_blacklist { { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_STRAFE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, + { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, + { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, + { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51, HID_QUIRK_NOGET }, { USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET }, { USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_WIIU, HID_QUIRK_MULTI_INPUT }, -- cgit v1.2.3 From 01adc47e885f1127b29d76d0dfb21d8262f9d6b4 Mon Sep 17 00:00:00 2001 From: Oscar Campos Date: Mon, 6 Mar 2017 21:02:39 +0000 Subject: HID: corsair: Add driver Scimitar Pro RGB gaming mouse 1b1c:1b3e support to hid-corsair This mouse sold by Corsair as Scimitar PRO RGB defines two consecutive Logical Minimum items in its Application (Consumer.0001) report making it non parseable. This patch fixes the report descriptor overriding byte 77 in rdesc from 0x16 (Logical Minimum with 16 bits value) to 0x26 (Logical Maximum with 16 bits value). Signed-off-by: Oscar Campos Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 1 + drivers/hid/hid-core.c | 1 + drivers/hid/hid-corsair.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 8eab3200ac9a..8c54cb8f5d6d 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -190,6 +190,7 @@ config HID_CORSAIR Supported devices: - Vengeance K90 + - Scimitar PRO RGB config HID_PRODIKEYS tristate "Prodikeys PC-MIDI Keyboard support" diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index ae01ae601d74..3ceb4a2af381 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1870,6 +1870,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90) }, + { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) }, { HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI) }, { HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_CP2112) }, { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_1) }, diff --git a/drivers/hid/hid-corsair.c b/drivers/hid/hid-corsair.c index c0303f61c26a..9ba5d98a1180 100644 --- a/drivers/hid/hid-corsair.c +++ b/drivers/hid/hid-corsair.c @@ -3,8 +3,10 @@ * * Supported devices: * - Vengeance K90 Keyboard + * - Scimitar PRO RGB Gaming Mouse * * Copyright (c) 2015 Clement Vuchener + * Copyright (c) 2017 Oscar Campos */ /* @@ -670,10 +672,51 @@ static int corsair_input_mapping(struct hid_device *dev, return 0; } +/* + * The report descriptor of Corsair Scimitar RGB Pro gaming mouse is + * non parseable as they define two consecutive Logical Minimum for + * the Usage Page (Consumer) in rdescs bytes 75 and 77 being 77 0x16 + * that should be obviousy 0x26 for Logical Magimum of 16 bits. This + * prevents poper parsing of the report descriptor due Logical + * Minimum being larger than Logical Maximum. + * + * This driver fixes the report descriptor for: + * - USB ID b1c:1b3e, sold as Scimitar RGB Pro Gaming mouse + */ + +static __u8 *corsair_mouse_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) +{ + struct usb_interface *intf = to_usb_interface(hdev->dev.parent); + + if (intf->cur_altsetting->desc.bInterfaceNumber == 1) { + /* + * Corsair Scimitar RGB Pro report descriptor is broken and + * defines two different Logical Minimum for the Consumer + * Application. The byte 77 should be a 0x26 defining a 16 + * bits integer for the Logical Maximum but it is a 0x16 + * instead (Logical Minimum) + */ + switch (hdev->product) { + case USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB: + if (*rsize >= 172 && rdesc[75] == 0x15 && rdesc[77] == 0x16 + && rdesc[78] == 0xff && rdesc[79] == 0x0f) { + hid_info(hdev, "Fixing up report descriptor\n"); + rdesc[77] = 0x26; + } + break; + } + + } + return rdesc; +} + static const struct hid_device_id corsair_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90), .driver_data = CORSAIR_USE_K90_MACRO | CORSAIR_USE_K90_BACKLIGHT }, + { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, + USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) }, {} }; @@ -686,10 +729,14 @@ static struct hid_driver corsair_driver = { .event = corsair_event, .remove = corsair_remove, .input_mapping = corsair_input_mapping, + .report_fixup = corsair_mouse_report_fixup, }; module_hid_driver(corsair_driver); MODULE_LICENSE("GPL"); +/* Original K90 driver author */ MODULE_AUTHOR("Clement Vuchener"); +/* Scimitar PRO RGB driver author */ +MODULE_AUTHOR("Oscar Campos"); MODULE_DESCRIPTION("HID driver for Corsair devices"); -- cgit v1.2.3 From 6e5364f5f472d4ea66d37459e299071b0190362c Mon Sep 17 00:00:00 2001 From: Ping Cheng Date: Tue, 14 Mar 2017 17:08:16 -0700 Subject: HID: wacom: generic: Wacom mouse is only provided for opaque tablets Commit f85c9dc ("Support tool ID and additional tool types") introduced mouse and lens cursor tools to generic codepath, which covers both display (direct) and opaque tablets (indirect devices). However, mouse and lens cursor tools are only provided for opaque tablets. This patch ignores mouse and lens cursor tools if the device is a display tablet. Signed-off-by: Ping Cheng Reviewed-by: Jason Gerecke Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 3d864466d473..94250c293be2 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -1959,8 +1959,10 @@ static void wacom_wac_pen_usage_mapping(struct hid_device *hdev, input_set_capability(input, EV_KEY, BTN_TOOL_BRUSH); input_set_capability(input, EV_KEY, BTN_TOOL_PENCIL); input_set_capability(input, EV_KEY, BTN_TOOL_AIRBRUSH); - input_set_capability(input, EV_KEY, BTN_TOOL_MOUSE); - input_set_capability(input, EV_KEY, BTN_TOOL_LENS); + if (!(features->device_type & WACOM_DEVICETYPE_DIRECT)) { + input_set_capability(input, EV_KEY, BTN_TOOL_MOUSE); + input_set_capability(input, EV_KEY, BTN_TOOL_LENS); + } break; case WACOM_HID_WD_FINGERWHEEL: wacom_map_usage(input, usage, field, EV_ABS, ABS_WHEEL, 0); -- cgit v1.2.3 From ae5c682113f9f94cc5e76f92cf041ee624c173ee Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 19 Mar 2017 22:35:59 +0800 Subject: netfilter: nfnl_cthelper: fix incorrect helper->expect_class_max The helper->expect_class_max must be set to the total number of expect_policy minus 1, since we will use the statement "if (class > helper->expect_class_max)" to validate the CTA_EXPECT_CLASS attr in ctnetlink_alloc_expect. So for compatibility, set the helper->expect_class_max to the NFCTH_POLICY_SET_NUM attr's value minus 1. Also: it's invalid when the NFCTH_POLICY_SET_NUM attr's value is zero. 1. this will result "expect_policy = kzalloc(0, GFP_KERNEL);"; 2. we cannot set the helper->expect_class_max to a proper value. So if nla_get_be32(tb[NFCTH_POLICY_SET_NUM]) is zero, report -EINVAL to the userspace. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_cthelper.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index de8782345c86..3cd41d105407 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -161,6 +161,7 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, int i, ret; struct nf_conntrack_expect_policy *expect_policy; struct nlattr *tb[NFCTH_POLICY_SET_MAX+1]; + unsigned int class_max; ret = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, nfnl_cthelper_expect_policy_set); @@ -170,19 +171,18 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, if (!tb[NFCTH_POLICY_SET_NUM]) return -EINVAL; - helper->expect_class_max = - ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM])); - - if (helper->expect_class_max != 0 && - helper->expect_class_max > NF_CT_MAX_EXPECT_CLASSES) + class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM])); + if (class_max == 0) + return -EINVAL; + if (class_max > NF_CT_MAX_EXPECT_CLASSES) return -EOVERFLOW; expect_policy = kzalloc(sizeof(struct nf_conntrack_expect_policy) * - helper->expect_class_max, GFP_KERNEL); + class_max, GFP_KERNEL); if (expect_policy == NULL) return -ENOMEM; - for (i=0; iexpect_class_max; i++) { + for (i = 0; i < class_max; i++) { if (!tb[NFCTH_POLICY_SET+i]) goto err; @@ -191,6 +191,8 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, if (ret < 0) goto err; } + + helper->expect_class_max = class_max - 1; helper->expect_policy = expect_policy; return 0; err: @@ -377,10 +379,10 @@ nfnl_cthelper_dump_policy(struct sk_buff *skb, goto nla_put_failure; if (nla_put_be32(skb, NFCTH_POLICY_SET_NUM, - htonl(helper->expect_class_max))) + htonl(helper->expect_class_max + 1))) goto nla_put_failure; - for (i=0; iexpect_class_max; i++) { + for (i = 0; i < helper->expect_class_max + 1; i++) { nest_parms2 = nla_nest_start(skb, (NFCTH_POLICY_SET+i) | NLA_F_NESTED); if (nest_parms2 == NULL) -- cgit v1.2.3 From 3d3d18f086cdda72ee18a454db70ca72c6e3246c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 21 Mar 2017 14:45:31 +0000 Subject: drm/i915: Avoid rcu_barrier() from reclaim paths (shrinker) The rcu_barrier() takes the cpu_hotplug mutex which itself is not reclaim-safe, and so rcu_barrier() is illegal from inside the shrinker. [ 309.661373] ========================================================= [ 309.661376] [ INFO: possible irq lock inversion dependency detected ] [ 309.661380] 4.11.0-rc1-CI-CI_DRM_2333+ #1 Tainted: G W [ 309.661383] --------------------------------------------------------- [ 309.661386] gem_exec_gttfil/6435 just changed the state of lock: [ 309.661389] (rcu_preempt_state.barrier_mutex){+.+.-.}, at: [] _rcu_barrier+0x31/0x160 [ 309.661399] but this lock took another, RECLAIM_FS-unsafe lock in the past: [ 309.661402] (cpu_hotplug.lock){+.+.+.} [ 309.661404] and interrupts could create inverse lock ordering between them. [ 309.661410] other info that might help us debug this: [ 309.661414] Possible interrupt unsafe locking scenario: [ 309.661417] CPU0 CPU1 [ 309.661419] ---- ---- [ 309.661421] lock(cpu_hotplug.lock); [ 309.661425] local_irq_disable(); [ 309.661432] lock(rcu_preempt_state.barrier_mutex); [ 309.661441] lock(cpu_hotplug.lock); [ 309.661446] [ 309.661448] lock(rcu_preempt_state.barrier_mutex); [ 309.661453] *** DEADLOCK *** [ 309.661460] 4 locks held by gem_exec_gttfil/6435: [ 309.661464] #0: (sb_writers#10){.+.+.+}, at: [] vfs_write+0x17d/0x1f0 [ 309.661475] #1: (debugfs_srcu){......}, at: [] debugfs_use_file_start+0x41/0xa0 [ 309.661486] #2: (&attr->mutex){+.+.+.}, at: [] simple_attr_write+0x37/0xe0 [ 309.661495] #3: (&dev->struct_mutex){+.+.+.}, at: [] i915_drop_caches_set+0x3a/0x150 [i915] [ 309.661540] the shortest dependencies between 2nd lock and 1st lock: [ 309.661547] -> (cpu_hotplug.lock){+.+.+.} ops: 829 { [ 309.661553] HARDIRQ-ON-W at: [ 309.661560] __lock_acquire+0x5e5/0x1b50 [ 309.661565] lock_acquire+0xc9/0x220 [ 309.661572] __mutex_lock+0x6e/0x990 [ 309.661576] mutex_lock_nested+0x16/0x20 [ 309.661583] get_online_cpus+0x61/0x80 [ 309.661590] kmem_cache_create+0x25/0x1d0 [ 309.661596] debug_objects_mem_init+0x30/0x249 [ 309.661602] start_kernel+0x341/0x3fe [ 309.661607] x86_64_start_reservations+0x2a/0x2c [ 309.661612] x86_64_start_kernel+0x173/0x186 [ 309.661619] verify_cpu+0x0/0xfc [ 309.661622] SOFTIRQ-ON-W at: [ 309.661627] __lock_acquire+0x611/0x1b50 [ 309.661632] lock_acquire+0xc9/0x220 [ 309.661636] __mutex_lock+0x6e/0x990 [ 309.661641] mutex_lock_nested+0x16/0x20 [ 309.661646] get_online_cpus+0x61/0x80 [ 309.661650] kmem_cache_create+0x25/0x1d0 [ 309.661655] debug_objects_mem_init+0x30/0x249 [ 309.661660] start_kernel+0x341/0x3fe [ 309.661664] x86_64_start_reservations+0x2a/0x2c [ 309.661669] x86_64_start_kernel+0x173/0x186 [ 309.661674] verify_cpu+0x0/0xfc [ 309.661677] RECLAIM_FS-ON-W at: [ 309.661682] mark_held_locks+0x6f/0xa0 [ 309.661687] lockdep_trace_alloc+0xb3/0x100 [ 309.661693] kmem_cache_alloc_trace+0x31/0x2e0 [ 309.661699] __smpboot_create_thread.part.1+0x27/0xe0 [ 309.661704] smpboot_create_threads+0x61/0x90 [ 309.661709] cpuhp_invoke_callback+0x9c/0x8a0 [ 309.661713] cpuhp_up_callbacks+0x31/0xb0 [ 309.661718] _cpu_up+0x7a/0xc0 [ 309.661723] do_cpu_up+0x5f/0x80 [ 309.661727] cpu_up+0xe/0x10 [ 309.661734] smp_init+0x71/0xb3 [ 309.661738] kernel_init_freeable+0x94/0x19e [ 309.661743] kernel_init+0x9/0xf0 [ 309.661748] ret_from_fork+0x2e/0x40 [ 309.661752] INITIAL USE at: [ 309.661757] __lock_acquire+0x234/0x1b50 [ 309.661761] lock_acquire+0xc9/0x220 [ 309.661766] __mutex_lock+0x6e/0x990 [ 309.661771] mutex_lock_nested+0x16/0x20 [ 309.661775] get_online_cpus+0x61/0x80 [ 309.661780] __cpuhp_setup_state+0x44/0x170 [ 309.661785] page_alloc_init+0x23/0x3a [ 309.661790] start_kernel+0x124/0x3fe [ 309.661794] x86_64_start_reservations+0x2a/0x2c [ 309.661799] x86_64_start_kernel+0x173/0x186 [ 309.661804] verify_cpu+0x0/0xfc [ 309.661807] } [ 309.661813] ... key at: [] cpu_hotplug+0xb0/0x100 [ 309.661817] ... acquired at: [ 309.661821] lock_acquire+0xc9/0x220 [ 309.661825] __mutex_lock+0x6e/0x990 [ 309.661829] mutex_lock_nested+0x16/0x20 [ 309.661833] get_online_cpus+0x61/0x80 [ 309.661837] _rcu_barrier+0x9f/0x160 [ 309.661841] rcu_barrier+0x10/0x20 [ 309.661847] netdev_run_todo+0x5f/0x310 [ 309.661852] rtnl_unlock+0x9/0x10 [ 309.661856] default_device_exit_batch+0x133/0x150 [ 309.661862] ops_exit_list.isra.0+0x4d/0x60 [ 309.661866] cleanup_net+0x1d8/0x2c0 [ 309.661872] process_one_work+0x1f4/0x6d0 [ 309.661876] worker_thread+0x49/0x4a0 [ 309.661881] kthread+0x107/0x140 [ 309.661884] ret_from_fork+0x2e/0x40 [ 309.661890] -> (rcu_preempt_state.barrier_mutex){+.+.-.} ops: 179 { [ 309.661896] HARDIRQ-ON-W at: [ 309.661901] __lock_acquire+0x5e5/0x1b50 [ 309.661905] lock_acquire+0xc9/0x220 [ 309.661910] __mutex_lock+0x6e/0x990 [ 309.661914] mutex_lock_nested+0x16/0x20 [ 309.661919] _rcu_barrier+0x31/0x160 [ 309.661923] rcu_barrier+0x10/0x20 [ 309.661928] netdev_run_todo+0x5f/0x310 [ 309.661932] rtnl_unlock+0x9/0x10 [ 309.661936] default_device_exit_batch+0x133/0x150 [ 309.661941] ops_exit_list.isra.0+0x4d/0x60 [ 309.661946] cleanup_net+0x1d8/0x2c0 [ 309.661951] process_one_work+0x1f4/0x6d0 [ 309.661955] worker_thread+0x49/0x4a0 [ 309.661960] kthread+0x107/0x140 [ 309.661964] ret_from_fork+0x2e/0x40 [ 309.661968] SOFTIRQ-ON-W at: [ 309.661972] __lock_acquire+0x611/0x1b50 [ 309.661977] lock_acquire+0xc9/0x220 [ 309.661981] __mutex_lock+0x6e/0x990 [ 309.661986] mutex_lock_nested+0x16/0x20 [ 309.661990] _rcu_barrier+0x31/0x160 [ 309.661995] rcu_barrier+0x10/0x20 [ 309.661999] netdev_run_todo+0x5f/0x310 [ 309.662003] rtnl_unlock+0x9/0x10 [ 309.662008] default_device_exit_batch+0x133/0x150 [ 309.662013] ops_exit_list.isra.0+0x4d/0x60 [ 309.662017] cleanup_net+0x1d8/0x2c0 [ 309.662022] process_one_work+0x1f4/0x6d0 [ 309.662027] worker_thread+0x49/0x4a0 [ 309.662031] kthread+0x107/0x140 [ 309.662035] ret_from_fork+0x2e/0x40 [ 309.662039] IN-RECLAIM_FS-W at: [ 309.662043] __lock_acquire+0x638/0x1b50 [ 309.662048] lock_acquire+0xc9/0x220 [ 309.662053] __mutex_lock+0x6e/0x990 [ 309.662058] mutex_lock_nested+0x16/0x20 [ 309.662062] _rcu_barrier+0x31/0x160 [ 309.662067] rcu_barrier+0x10/0x20 [ 309.662089] i915_gem_shrink_all+0x33/0x40 [i915] [ 309.662109] i915_drop_caches_set+0x141/0x150 [i915] [ 309.662114] simple_attr_write+0xc7/0xe0 [ 309.662119] full_proxy_write+0x4f/0x70 [ 309.662124] __vfs_write+0x23/0x120 [ 309.662128] vfs_write+0xc6/0x1f0 [ 309.662133] SyS_write+0x44/0xb0 [ 309.662138] entry_SYSCALL_64_fastpath+0x1c/0xb1 [ 309.662142] INITIAL USE at: [ 309.662147] __lock_acquire+0x234/0x1b50 [ 309.662151] lock_acquire+0xc9/0x220 [ 309.662156] __mutex_lock+0x6e/0x990 [ 309.662160] mutex_lock_nested+0x16/0x20 [ 309.662165] _rcu_barrier+0x31/0x160 [ 309.662169] rcu_barrier+0x10/0x20 [ 309.662174] netdev_run_todo+0x5f/0x310 [ 309.662178] rtnl_unlock+0x9/0x10 [ 309.662183] default_device_exit_batch+0x133/0x150 [ 309.662188] ops_exit_list.isra.0+0x4d/0x60 [ 309.662192] cleanup_net+0x1d8/0x2c0 [ 309.662197] process_one_work+0x1f4/0x6d0 [ 309.662202] worker_thread+0x49/0x4a0 [ 309.662206] kthread+0x107/0x140 [ 309.662210] ret_from_fork+0x2e/0x40 [ 309.662214] } [ 309.662220] ... key at: [] rcu_preempt_state+0x508/0x780 [ 309.662225] ... acquired at: [ 309.662229] check_usage_forwards+0x12b/0x130 [ 309.662233] mark_lock+0x360/0x6f0 [ 309.662237] __lock_acquire+0x638/0x1b50 [ 309.662241] lock_acquire+0xc9/0x220 [ 309.662245] __mutex_lock+0x6e/0x990 [ 309.662249] mutex_lock_nested+0x16/0x20 [ 309.662253] _rcu_barrier+0x31/0x160 [ 309.662257] rcu_barrier+0x10/0x20 [ 309.662279] i915_gem_shrink_all+0x33/0x40 [i915] [ 309.662298] i915_drop_caches_set+0x141/0x150 [i915] [ 309.662303] simple_attr_write+0xc7/0xe0 [ 309.662307] full_proxy_write+0x4f/0x70 [ 309.662311] __vfs_write+0x23/0x120 [ 309.662315] vfs_write+0xc6/0x1f0 [ 309.662319] SyS_write+0x44/0xb0 [ 309.662323] entry_SYSCALL_64_fastpath+0x1c/0xb1 [ 309.662329] stack backtrace: [ 309.662335] CPU: 1 PID: 6435 Comm: gem_exec_gttfil Tainted: G W 4.11.0-rc1-CI-CI_DRM_2333+ #1 [ 309.662342] Hardware name: Hewlett-Packard HP Compaq 8100 Elite SFF PC/304Ah, BIOS 786H1 v01.13 07/14/2011 [ 309.662348] Call Trace: [ 309.662354] dump_stack+0x67/0x92 [ 309.662359] print_irq_inversion_bug.part.19+0x1a4/0x1b0 [ 309.662365] check_usage_forwards+0x12b/0x130 [ 309.662369] mark_lock+0x360/0x6f0 [ 309.662374] ? print_shortest_lock_dependencies+0x1a0/0x1a0 [ 309.662379] __lock_acquire+0x638/0x1b50 [ 309.662383] ? __mutex_unlock_slowpath+0x3e/0x2e0 [ 309.662388] ? trace_hardirqs_on+0xd/0x10 [ 309.662392] ? _rcu_barrier+0x31/0x160 [ 309.662396] lock_acquire+0xc9/0x220 [ 309.662400] ? _rcu_barrier+0x31/0x160 [ 309.662404] ? _rcu_barrier+0x31/0x160 [ 309.662409] __mutex_lock+0x6e/0x990 [ 309.662412] ? _rcu_barrier+0x31/0x160 [ 309.662416] ? _rcu_barrier+0x31/0x160 [ 309.662421] ? synchronize_rcu_expedited+0x35/0xb0 [ 309.662426] ? _raw_spin_unlock_irqrestore+0x52/0x60 [ 309.662434] mutex_lock_nested+0x16/0x20 [ 309.662438] _rcu_barrier+0x31/0x160 [ 309.662442] rcu_barrier+0x10/0x20 [ 309.662464] i915_gem_shrink_all+0x33/0x40 [i915] [ 309.662484] i915_drop_caches_set+0x141/0x150 [i915] [ 309.662489] simple_attr_write+0xc7/0xe0 [ 309.662494] full_proxy_write+0x4f/0x70 [ 309.662498] __vfs_write+0x23/0x120 [ 309.662503] ? rcu_read_lock_sched_held+0x75/0x80 [ 309.662507] ? rcu_sync_lockdep_assert+0x2a/0x50 [ 309.662512] ? __sb_start_write+0x102/0x210 [ 309.662516] ? vfs_write+0x17d/0x1f0 [ 309.662520] vfs_write+0xc6/0x1f0 [ 309.662524] ? trace_hardirqs_on_caller+0xe7/0x200 [ 309.662529] SyS_write+0x44/0xb0 [ 309.662533] entry_SYSCALL_64_fastpath+0x1c/0xb1 [ 309.662537] RIP: 0033:0x7f507eac24a0 [ 309.662541] RSP: 002b:00007fffda8720e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 309.662548] RAX: ffffffffffffffda RBX: ffffffff81482bd3 RCX: 00007f507eac24a0 [ 309.662552] RDX: 0000000000000005 RSI: 00007fffda8720f0 RDI: 0000000000000005 [ 309.662557] RBP: ffffc9000048bf88 R08: 0000000000000000 R09: 000000000000002c [ 309.662561] R10: 0000000000000014 R11: 0000000000000246 R12: 00007fffda872230 [ 309.662566] R13: 00007fffda872228 R14: 0000000000000201 R15: 00007fffda8720f0 [ 309.662572] ? __this_cpu_preempt_check+0x13/0x20 Fixes: 0eafec6d3244 ("drm/i915: Enable lockless lookup of request tracking via RCU") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100192 Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: # v4.9+ Link: http://patchwork.freedesktop.org/patch/msgid/20170314115019.18127-1-chris@chris-wilson.co.uk Reviewed-by: Daniel Vetter (cherry picked from commit bd784b7cc41af7a19cfb705fa6d800e511c4ab02) Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170321144531.12344-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_shrinker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 401006b4c6a3..d5d2b4c6ed38 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -263,7 +263,7 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv) I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_ACTIVE); - rcu_barrier(); /* wait until our RCU delayed slab frees are completed */ + synchronize_rcu(); /* wait for our earlier RCU delayed slab frees */ return freed; } -- cgit v1.2.3 From 590379aef2e3fb7d00a093ed556c0a2714f86916 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Tue, 21 Mar 2017 14:47:20 +0000 Subject: drm/i915: make context status notifier head be per engine GVTg has introduced the context status notifier to schedule the GVTg workload. At that time, the notifier is bound to GVTg context only, so GVTg is not aware of host workloads. Now we are going to improve GVTg's guest workload scheduler policy, and add Guc emulation support for new Gen graphics. Both these two features require acknowledgment for all contexts running on hardware. (But will not alter host workload.) So here try to make some change. The change is simple: 1. Move the context status notifier head from i915_gem_context to intel_engine_cs. Which means there is a notifier head per engine instead of per context. Execlist driver still call notifier for each context sched-in/out events of current engine. 2. At GVTg side, it binds a notifier_block for each physical engine at GVTg initialization period. Then GVTg can hear all context status events. In this patch, GVTg do nothing for host context event, but later will add a function there. But in any case, the notifier callback is a noop if this is no active vGPU. Since intel_gvt_init() is called at early initialization stage and require the status notifier head has been initiated, I initiate it in intel_engine_setup(). v2: remove a redundant newline. (chris) Fixes: 3c7ba6359d70 ("drm/i915: Introduce execlist context status change notification") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100232 Signed-off-by: Changbin Du Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Zhi Wang Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170313024711.28591-1-changbin.du@intel.com Acked-by: Zhenyu Wang Signed-off-by: Chris Wilson (cherry picked from commit 3fc03069bc6e6c316f19bb526e3c8ce784677477) Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170321144720.17020-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gvt/gvt.h | 2 +- drivers/gpu/drm/i915/gvt/scheduler.c | 45 ++++++++++++++------------------- drivers/gpu/drm/i915/i915_gem_context.c | 1 - drivers/gpu/drm/i915/i915_gem_context.h | 3 --- drivers/gpu/drm/i915/intel_engine_cs.c | 2 ++ drivers/gpu/drm/i915/intel_lrc.c | 3 ++- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 +++ 7 files changed, 27 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 23791920ced1..6dfc48b63b71 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -162,7 +162,6 @@ struct intel_vgpu { atomic_t running_workload_num; DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES); struct i915_gem_context *shadow_ctx; - struct notifier_block shadow_ctx_notifier_block; #if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT) struct { @@ -233,6 +232,7 @@ struct intel_gvt { struct intel_gvt_gtt gtt; struct intel_gvt_opregion opregion; struct intel_gvt_workload_scheduler scheduler; + struct notifier_block shadow_ctx_notifier_block[I915_NUM_ENGINES]; DECLARE_HASHTABLE(cmd_table, GVT_CMD_HASH_BITS); struct intel_vgpu_type *types; unsigned int num_types; diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 39a83eb7aecc..c4353ed86d4b 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -130,12 +130,10 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) static int shadow_context_status_change(struct notifier_block *nb, unsigned long action, void *data) { - struct intel_vgpu *vgpu = container_of(nb, - struct intel_vgpu, shadow_ctx_notifier_block); - struct drm_i915_gem_request *req = - (struct drm_i915_gem_request *)data; - struct intel_gvt_workload_scheduler *scheduler = - &vgpu->gvt->scheduler; + struct drm_i915_gem_request *req = (struct drm_i915_gem_request *)data; + struct intel_gvt *gvt = container_of(nb, struct intel_gvt, + shadow_ctx_notifier_block[req->engine->id]); + struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; struct intel_vgpu_workload *workload = scheduler->current_workload[req->engine->id]; @@ -534,15 +532,16 @@ void intel_gvt_wait_vgpu_idle(struct intel_vgpu *vgpu) void intel_gvt_clean_workload_scheduler(struct intel_gvt *gvt) { struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; - int i; + struct intel_engine_cs *engine; + enum intel_engine_id i; gvt_dbg_core("clean workload scheduler\n"); - for (i = 0; i < I915_NUM_ENGINES; i++) { - if (scheduler->thread[i]) { - kthread_stop(scheduler->thread[i]); - scheduler->thread[i] = NULL; - } + for_each_engine(engine, gvt->dev_priv, i) { + atomic_notifier_chain_unregister( + &engine->context_status_notifier, + &gvt->shadow_ctx_notifier_block[i]); + kthread_stop(scheduler->thread[i]); } } @@ -550,18 +549,15 @@ int intel_gvt_init_workload_scheduler(struct intel_gvt *gvt) { struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; struct workload_thread_param *param = NULL; + struct intel_engine_cs *engine; + enum intel_engine_id i; int ret; - int i; gvt_dbg_core("init workload scheduler\n"); init_waitqueue_head(&scheduler->workload_complete_wq); - for (i = 0; i < I915_NUM_ENGINES; i++) { - /* check ring mask at init time */ - if (!HAS_ENGINE(gvt->dev_priv, i)) - continue; - + for_each_engine(engine, gvt->dev_priv, i) { init_waitqueue_head(&scheduler->waitq[i]); param = kzalloc(sizeof(*param), GFP_KERNEL); @@ -580,6 +576,11 @@ int intel_gvt_init_workload_scheduler(struct intel_gvt *gvt) ret = PTR_ERR(scheduler->thread[i]); goto err; } + + gvt->shadow_ctx_notifier_block[i].notifier_call = + shadow_context_status_change; + atomic_notifier_chain_register(&engine->context_status_notifier, + &gvt->shadow_ctx_notifier_block[i]); } return 0; err: @@ -591,9 +592,6 @@ err: void intel_vgpu_clean_gvt_context(struct intel_vgpu *vgpu) { - atomic_notifier_chain_unregister(&vgpu->shadow_ctx->status_notifier, - &vgpu->shadow_ctx_notifier_block); - i915_gem_context_put_unlocked(vgpu->shadow_ctx); } @@ -608,10 +606,5 @@ int intel_vgpu_init_gvt_context(struct intel_vgpu *vgpu) vgpu->shadow_ctx->engine[RCS].initialised = true; - vgpu->shadow_ctx_notifier_block.notifier_call = - shadow_context_status_change; - - atomic_notifier_chain_register(&vgpu->shadow_ctx->status_notifier, - &vgpu->shadow_ctx_notifier_block); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 17f90c618208..e2d83b6d376b 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -311,7 +311,6 @@ __create_hw_context(struct drm_i915_private *dev_priv, ctx->ring_size = 4 * PAGE_SIZE; ctx->desc_template = GEN8_CTX_ADDRESSING_MODE(dev_priv) << GEN8_CTX_ADDRESSING_MODE_SHIFT; - ATOMIC_INIT_NOTIFIER_HEAD(&ctx->status_notifier); /* GuC requires the ring to be placed above GUC_WOPCM_TOP. If GuC is not * present or not in use we still need a small bias as ring wraparound diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 0ac750b90f3d..e9c008fe14b1 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -160,9 +160,6 @@ struct i915_gem_context { /** desc_template: invariant fields for the HW context descriptor */ u32 desc_template; - /** status_notifier: list of callbacks for context-switch changes */ - struct atomic_notifier_head status_notifier; - /** guilty_count: How many times this context has caused a GPU hang. */ unsigned int guilty_count; /** diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 371acf109e34..ab1be5c80ea5 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -105,6 +105,8 @@ intel_engine_setup(struct drm_i915_private *dev_priv, /* Nothing to do here, execute in order of dependencies */ engine->schedule = NULL; + ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); + dev_priv->engine[id] = engine; return 0; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index ebf8023d21e6..471af3b480ad 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -345,7 +345,8 @@ execlists_context_status_change(struct drm_i915_gem_request *rq, if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) return; - atomic_notifier_call_chain(&rq->ctx->status_notifier, status, rq); + atomic_notifier_call_chain(&rq->engine->context_status_notifier, + status, rq); } static void diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 79c2b8d72322..13dccb18cd43 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -403,6 +403,9 @@ struct intel_engine_cs { */ struct i915_gem_context *legacy_active_context; + /* status_notifier: list of callbacks for context-switch changes */ + struct atomic_notifier_head context_status_notifier; + struct intel_engine_hangcheck hangcheck; bool needs_cmd_parser; -- cgit v1.2.3 From 5b52330bbfe63b3305765354d6046c9f7f89c011 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 21 Mar 2017 11:26:35 -0400 Subject: audit: fix auditd/kernel connection state tracking What started as a rather straightforward race condition reported by Dmitry using the syzkaller fuzzer ended up revealing some major problems with how the audit subsystem managed its netlink sockets and its connection with the userspace audit daemon. Fixing this properly had quite the cascading effect and what we are left with is this rather large and complicated patch. My initial goal was to try and decompose this patch into multiple smaller patches, but the way these changes are intertwined makes it difficult to split these changes into meaningful pieces that don't break or somehow make things worse for the intermediate states. The patch makes a number of changes, but the most significant are highlighted below: * The auditd tracking variables, e.g. audit_sock, are now gone and replaced by a RCU/spin_lock protected variable auditd_conn which is a structure containing all of the auditd tracking information. * We no longer track the auditd sock directly, instead we track it via the network namespace in which it resides and we use the audit socket associated with that namespace. In spirit, this is what the code was trying to do prior to this patch (at least I think that is what the original authors intended), but it was done rather poorly and added a layer of obfuscation that only masked the underlying problems. * Big backlog queue cleanup, again. In v4.10 we made some pretty big changes to how the audit backlog queues work, here we haven't changed the queue design so much as cleaned up the implementation. Brought about by the locking changes, we've simplified kauditd_thread() quite a bit by consolidating the queue handling into a new helper function, kauditd_send_queue(), which allows us to eliminate a lot of very similar code and makes the looping logic in kauditd_thread() clearer. * All netlink messages sent to auditd are now sent via auditd_send_unicast_skb(). Other than just making sense, this makes the lock handling easier. * Change the audit_log_start() sleep behavior so that we never sleep on auditd events (unchanged) or if the caller is holding the audit_cmd_mutex (changed). Previously we didn't sleep if the caller was auditd or if the message type fell between a certain range; the type check was a poor effort of doing what the cmd_mutex check now does. Richard Guy Briggs originally proposed not sleeping the cmd_mutex owner several years ago but his patch wasn't acceptable at the time. At least the idea lives on here. * A problem with the lost record counter has been resolved. Steve Grubb and I both happened to notice this problem and according to some quick testing by Steve, this problem goes back quite some time. It's largely a harmless problem, although it may have left some careful sysadmins quite puzzled. Cc: # 4.10.x- Reported-by: Dmitry Vyukov Signed-off-by: Paul Moore --- kernel/audit.c | 639 ++++++++++++++++++++++++++++++++++--------------------- kernel/audit.h | 9 +- kernel/auditsc.c | 6 +- 3 files changed, 399 insertions(+), 255 deletions(-) diff --git a/kernel/audit.c b/kernel/audit.c index e794544f5e63..2f4964cfde0b 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -54,6 +54,10 @@ #include #include #include +#include +#include +#include +#include #include @@ -90,13 +94,34 @@ static u32 audit_default; /* If auditing cannot proceed, audit_failure selects what happens. */ static u32 audit_failure = AUDIT_FAIL_PRINTK; -/* - * If audit records are to be written to the netlink socket, audit_pid - * contains the pid of the auditd process and audit_nlk_portid contains - * the portid to use to send netlink messages to that process. +/* private audit network namespace index */ +static unsigned int audit_net_id; + +/** + * struct audit_net - audit private network namespace data + * @sk: communication socket + */ +struct audit_net { + struct sock *sk; +}; + +/** + * struct auditd_connection - kernel/auditd connection state + * @pid: auditd PID + * @portid: netlink portid + * @net: the associated network namespace + * @lock: spinlock to protect write access + * + * Description: + * This struct is RCU protected; you must either hold the RCU lock for reading + * or the included spinlock for writing. */ -int audit_pid; -static __u32 audit_nlk_portid; +static struct auditd_connection { + int pid; + u32 portid; + struct net *net; + spinlock_t lock; +} auditd_conn; /* If audit_rate_limit is non-zero, limit the rate of sending audit records * to that number per second. This prevents DoS attacks, but results in @@ -123,10 +148,6 @@ u32 audit_sig_sid = 0; */ static atomic_t audit_lost = ATOMIC_INIT(0); -/* The netlink socket. */ -static struct sock *audit_sock; -static unsigned int audit_net_id; - /* Hash for inode-based rules */ struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS]; @@ -139,6 +160,7 @@ static LIST_HEAD(audit_freelist); /* queue msgs to send via kauditd_task */ static struct sk_buff_head audit_queue; +static void kauditd_hold_skb(struct sk_buff *skb); /* queue msgs due to temporary unicast send problems */ static struct sk_buff_head audit_retry_queue; /* queue msgs waiting for new auditd connection */ @@ -192,6 +214,43 @@ struct audit_reply { struct sk_buff *skb; }; +/** + * auditd_test_task - Check to see if a given task is an audit daemon + * @task: the task to check + * + * Description: + * Return 1 if the task is a registered audit daemon, 0 otherwise. + */ +int auditd_test_task(const struct task_struct *task) +{ + int rc; + + rcu_read_lock(); + rc = (auditd_conn.pid && task->tgid == auditd_conn.pid ? 1 : 0); + rcu_read_unlock(); + + return rc; +} + +/** + * audit_get_sk - Return the audit socket for the given network namespace + * @net: the destination network namespace + * + * Description: + * Returns the sock pointer if valid, NULL otherwise. The caller must ensure + * that a reference is held for the network namespace while the sock is in use. + */ +static struct sock *audit_get_sk(const struct net *net) +{ + struct audit_net *aunet; + + if (!net) + return NULL; + + aunet = net_generic(net, audit_net_id); + return aunet->sk; +} + static void audit_set_portid(struct audit_buffer *ab, __u32 portid) { if (ab) { @@ -210,9 +269,7 @@ void audit_panic(const char *message) pr_err("%s\n", message); break; case AUDIT_FAIL_PANIC: - /* test audit_pid since printk is always losey, why bother? */ - if (audit_pid) - panic("audit: %s\n", message); + panic("audit: %s\n", message); break; } } @@ -370,21 +427,87 @@ static int audit_set_failure(u32 state) return audit_do_config_change("audit_failure", &audit_failure, state); } -/* - * For one reason or another this nlh isn't getting delivered to the userspace - * audit daemon, just send it to printk. +/** + * auditd_set - Set/Reset the auditd connection state + * @pid: auditd PID + * @portid: auditd netlink portid + * @net: auditd network namespace pointer + * + * Description: + * This function will obtain and drop network namespace references as + * necessary. + */ +static void auditd_set(int pid, u32 portid, struct net *net) +{ + unsigned long flags; + + spin_lock_irqsave(&auditd_conn.lock, flags); + auditd_conn.pid = pid; + auditd_conn.portid = portid; + if (auditd_conn.net) + put_net(auditd_conn.net); + if (net) + auditd_conn.net = get_net(net); + else + auditd_conn.net = NULL; + spin_unlock_irqrestore(&auditd_conn.lock, flags); +} + +/** + * auditd_reset - Disconnect the auditd connection + * + * Description: + * Break the auditd/kauditd connection and move all the queued records into the + * hold queue in case auditd reconnects. + */ +static void auditd_reset(void) +{ + struct sk_buff *skb; + + /* if it isn't already broken, break the connection */ + rcu_read_lock(); + if (auditd_conn.pid) + auditd_set(0, 0, NULL); + rcu_read_unlock(); + + /* flush all of the main and retry queues to the hold queue */ + while ((skb = skb_dequeue(&audit_retry_queue))) + kauditd_hold_skb(skb); + while ((skb = skb_dequeue(&audit_queue))) + kauditd_hold_skb(skb); +} + +/** + * kauditd_print_skb - Print the audit record to the ring buffer + * @skb: audit record + * + * Whatever the reason, this packet may not make it to the auditd connection + * so write it via printk so the information isn't completely lost. */ static void kauditd_printk_skb(struct sk_buff *skb) { struct nlmsghdr *nlh = nlmsg_hdr(skb); char *data = nlmsg_data(nlh); - if (nlh->nlmsg_type != AUDIT_EOE) { - if (printk_ratelimit()) - pr_notice("type=%d %s\n", nlh->nlmsg_type, data); - else - audit_log_lost("printk limit exceeded"); - } + if (nlh->nlmsg_type != AUDIT_EOE && printk_ratelimit()) + pr_notice("type=%d %s\n", nlh->nlmsg_type, data); +} + +/** + * kauditd_rehold_skb - Handle a audit record send failure in the hold queue + * @skb: audit record + * + * Description: + * This should only be used by the kauditd_thread when it fails to flush the + * hold queue. + */ +static void kauditd_rehold_skb(struct sk_buff *skb) +{ + /* put the record back in the queue at the same place */ + skb_queue_head(&audit_hold_queue, skb); + + /* fail the auditd connection */ + auditd_reset(); } /** @@ -421,6 +544,9 @@ static void kauditd_hold_skb(struct sk_buff *skb) /* we have no other options - drop the message */ audit_log_lost("kauditd hold queue overflow"); kfree_skb(skb); + + /* fail the auditd connection */ + auditd_reset(); } /** @@ -441,51 +567,122 @@ static void kauditd_retry_skb(struct sk_buff *skb) } /** - * auditd_reset - Disconnect the auditd connection + * auditd_send_unicast_skb - Send a record via unicast to auditd + * @skb: audit record * * Description: - * Break the auditd/kauditd connection and move all the records in the retry - * queue into the hold queue in case auditd reconnects. The audit_cmd_mutex - * must be held when calling this function. + * Send a skb to the audit daemon, returns positive/zero values on success and + * negative values on failure; in all cases the skb will be consumed by this + * function. If the send results in -ECONNREFUSED the connection with auditd + * will be reset. This function may sleep so callers should not hold any locks + * where this would cause a problem. */ -static void auditd_reset(void) +static int auditd_send_unicast_skb(struct sk_buff *skb) { - struct sk_buff *skb; - - /* break the connection */ - if (audit_sock) { - sock_put(audit_sock); - audit_sock = NULL; + int rc; + u32 portid; + struct net *net; + struct sock *sk; + + /* NOTE: we can't call netlink_unicast while in the RCU section so + * take a reference to the network namespace and grab local + * copies of the namespace, the sock, and the portid; the + * namespace and sock aren't going to go away while we hold a + * reference and if the portid does become invalid after the RCU + * section netlink_unicast() should safely return an error */ + + rcu_read_lock(); + if (!auditd_conn.pid) { + rcu_read_unlock(); + rc = -ECONNREFUSED; + goto err; } - audit_pid = 0; - audit_nlk_portid = 0; + net = auditd_conn.net; + get_net(net); + sk = audit_get_sk(net); + portid = auditd_conn.portid; + rcu_read_unlock(); - /* flush all of the retry queue to the hold queue */ - while ((skb = skb_dequeue(&audit_retry_queue))) - kauditd_hold_skb(skb); + rc = netlink_unicast(sk, skb, portid, 0); + put_net(net); + if (rc < 0) + goto err; + + return rc; + +err: + if (rc == -ECONNREFUSED) + auditd_reset(); + return rc; } /** - * kauditd_send_unicast_skb - Send a record via unicast to auditd - * @skb: audit record + * kauditd_send_queue - Helper for kauditd_thread to flush skb queues + * @sk: the sending sock + * @portid: the netlink destination + * @queue: the skb queue to process + * @retry_limit: limit on number of netlink unicast failures + * @skb_hook: per-skb hook for additional processing + * @err_hook: hook called if the skb fails the netlink unicast send + * + * Description: + * Run through the given queue and attempt to send the audit records to auditd, + * returns zero on success, negative values on failure. It is up to the caller + * to ensure that the @sk is valid for the duration of this function. + * */ -static int kauditd_send_unicast_skb(struct sk_buff *skb) +static int kauditd_send_queue(struct sock *sk, u32 portid, + struct sk_buff_head *queue, + unsigned int retry_limit, + void (*skb_hook)(struct sk_buff *skb), + void (*err_hook)(struct sk_buff *skb)) { - int rc; + int rc = 0; + struct sk_buff *skb; + static unsigned int failed = 0; - /* if we know nothing is connected, don't even try the netlink call */ - if (!audit_pid) - return -ECONNREFUSED; + /* NOTE: kauditd_thread takes care of all our locking, we just use + * the netlink info passed to us (e.g. sk and portid) */ + + while ((skb = skb_dequeue(queue))) { + /* call the skb_hook for each skb we touch */ + if (skb_hook) + (*skb_hook)(skb); + + /* can we send to anyone via unicast? */ + if (!sk) { + if (err_hook) + (*err_hook)(skb); + continue; + } - /* get an extra skb reference in case we fail to send */ - skb_get(skb); - rc = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0); - if (rc >= 0) { - consume_skb(skb); - rc = 0; + /* grab an extra skb reference in case of error */ + skb_get(skb); + rc = netlink_unicast(sk, skb, portid, 0); + if (rc < 0) { + /* fatal failure for our queue flush attempt? */ + if (++failed >= retry_limit || + rc == -ECONNREFUSED || rc == -EPERM) { + /* yes - error processing for the queue */ + sk = NULL; + if (err_hook) + (*err_hook)(skb); + if (!skb_hook) + goto out; + /* keep processing with the skb_hook */ + continue; + } else + /* no - requeue to preserve ordering */ + skb_queue_head(queue, skb); + } else { + /* it worked - drop the extra reference and continue */ + consume_skb(skb); + failed = 0; + } } - return rc; +out: + return (rc >= 0 ? 0 : rc); } /* @@ -493,16 +690,19 @@ static int kauditd_send_unicast_skb(struct sk_buff *skb) * @skb: audit record * * Description: - * This function doesn't consume an skb as might be expected since it has to - * copy it anyways. + * Write a multicast message to anyone listening in the initial network + * namespace. This function doesn't consume an skb as might be expected since + * it has to copy it anyways. */ static void kauditd_send_multicast_skb(struct sk_buff *skb) { struct sk_buff *copy; - struct audit_net *aunet = net_generic(&init_net, audit_net_id); - struct sock *sock = aunet->nlsk; + struct sock *sock = audit_get_sk(&init_net); struct nlmsghdr *nlh; + /* NOTE: we are not taking an additional reference for init_net since + * we don't have to worry about it going away */ + if (!netlink_has_listeners(sock, AUDIT_NLGRP_READLOG)) return; @@ -526,149 +726,75 @@ static void kauditd_send_multicast_skb(struct sk_buff *skb) } /** - * kauditd_wake_condition - Return true when it is time to wake kauditd_thread - * - * Description: - * This function is for use by the wait_event_freezable() call in - * kauditd_thread(). + * kauditd_thread - Worker thread to send audit records to userspace + * @dummy: unused */ -static int kauditd_wake_condition(void) -{ - static int pid_last = 0; - int rc; - int pid = audit_pid; - - /* wake on new messages or a change in the connected auditd */ - rc = skb_queue_len(&audit_queue) || (pid && pid != pid_last); - if (rc) - pid_last = pid; - - return rc; -} - static int kauditd_thread(void *dummy) { int rc; - int auditd = 0; - int reschedule = 0; - struct sk_buff *skb; - struct nlmsghdr *nlh; + u32 portid = 0; + struct net *net = NULL; + struct sock *sk = NULL; #define UNICAST_RETRIES 5 -#define AUDITD_BAD(x,y) \ - ((x) == -ECONNREFUSED || (x) == -EPERM || ++(y) >= UNICAST_RETRIES) - - /* NOTE: we do invalidate the auditd connection flag on any sending - * errors, but we only "restore" the connection flag at specific places - * in the loop in order to help ensure proper ordering of audit - * records */ set_freezable(); while (!kthread_should_stop()) { - /* NOTE: possible area for future improvement is to look at - * the hold and retry queues, since only this thread - * has access to these queues we might be able to do - * our own queuing and skip some/all of the locking */ - - /* NOTE: it might be a fun experiment to split the hold and - * retry queue handling to another thread, but the - * synchronization issues and other overhead might kill - * any performance gains */ + /* NOTE: see the lock comments in auditd_send_unicast_skb() */ + rcu_read_lock(); + if (!auditd_conn.pid) { + rcu_read_unlock(); + goto main_queue; + } + net = auditd_conn.net; + get_net(net); + sk = audit_get_sk(net); + portid = auditd_conn.portid; + rcu_read_unlock(); /* attempt to flush the hold queue */ - while (auditd && (skb = skb_dequeue(&audit_hold_queue))) { - rc = kauditd_send_unicast_skb(skb); - if (rc) { - /* requeue to the same spot */ - skb_queue_head(&audit_hold_queue, skb); - - auditd = 0; - if (AUDITD_BAD(rc, reschedule)) { - mutex_lock(&audit_cmd_mutex); - auditd_reset(); - mutex_unlock(&audit_cmd_mutex); - reschedule = 0; - } - } else - /* we were able to send successfully */ - reschedule = 0; + rc = kauditd_send_queue(sk, portid, + &audit_hold_queue, UNICAST_RETRIES, + NULL, kauditd_rehold_skb); + if (rc < 0) { + sk = NULL; + goto main_queue; } /* attempt to flush the retry queue */ - while (auditd && (skb = skb_dequeue(&audit_retry_queue))) { - rc = kauditd_send_unicast_skb(skb); - if (rc) { - auditd = 0; - if (AUDITD_BAD(rc, reschedule)) { - kauditd_hold_skb(skb); - mutex_lock(&audit_cmd_mutex); - auditd_reset(); - mutex_unlock(&audit_cmd_mutex); - reschedule = 0; - } else - /* temporary problem (we hope), queue - * to the same spot and retry */ - skb_queue_head(&audit_retry_queue, skb); - } else - /* we were able to send successfully */ - reschedule = 0; + rc = kauditd_send_queue(sk, portid, + &audit_retry_queue, UNICAST_RETRIES, + NULL, kauditd_hold_skb); + if (rc < 0) { + sk = NULL; + goto main_queue; } - /* standard queue processing, try to be as quick as possible */ -quick_loop: - skb = skb_dequeue(&audit_queue); - if (skb) { - /* setup the netlink header, see the comments in - * kauditd_send_multicast_skb() for length quirks */ - nlh = nlmsg_hdr(skb); - nlh->nlmsg_len = skb->len - NLMSG_HDRLEN; - - /* attempt to send to any multicast listeners */ - kauditd_send_multicast_skb(skb); - - /* attempt to send to auditd, queue on failure */ - if (auditd) { - rc = kauditd_send_unicast_skb(skb); - if (rc) { - auditd = 0; - if (AUDITD_BAD(rc, reschedule)) { - mutex_lock(&audit_cmd_mutex); - auditd_reset(); - mutex_unlock(&audit_cmd_mutex); - reschedule = 0; - } - - /* move to the retry queue */ - kauditd_retry_skb(skb); - } else - /* everything is working so go fast! */ - goto quick_loop; - } else if (reschedule) - /* we are currently having problems, move to - * the retry queue */ - kauditd_retry_skb(skb); - else - /* dump the message via printk and hold it */ - kauditd_hold_skb(skb); - } else { - /* we have flushed the backlog so wake everyone */ - wake_up(&audit_backlog_wait); - - /* if everything is okay with auditd (if present), go - * to sleep until there is something new in the queue - * or we have a change in the connected auditd; - * otherwise simply reschedule to give things a chance - * to recover */ - if (reschedule) { - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - } else - wait_event_freezable(kauditd_wait, - kauditd_wake_condition()); - - /* update the auditd connection status */ - auditd = (audit_pid ? 1 : 0); +main_queue: + /* process the main queue - do the multicast send and attempt + * unicast, dump failed record sends to the retry queue; if + * sk == NULL due to previous failures we will just do the + * multicast send and move the record to the retry queue */ + kauditd_send_queue(sk, portid, &audit_queue, 1, + kauditd_send_multicast_skb, + kauditd_retry_skb); + + /* drop our netns reference, no auditd sends past this line */ + if (net) { + put_net(net); + net = NULL; } + sk = NULL; + + /* we have processed all the queues so wake everyone */ + wake_up(&audit_backlog_wait); + + /* NOTE: we want to wake up if there is anything on the queue, + * regardless of if an auditd is connected, as we need to + * do the multicast send and rotate records from the + * main queue to the retry/hold queues */ + wait_event_freezable(kauditd_wait, + (skb_queue_len(&audit_queue) ? 1 : 0)); } return 0; @@ -678,17 +804,16 @@ int audit_send_list(void *_dest) { struct audit_netlink_list *dest = _dest; struct sk_buff *skb; - struct net *net = dest->net; - struct audit_net *aunet = net_generic(net, audit_net_id); + struct sock *sk = audit_get_sk(dest->net); /* wait for parent to finish and send an ACK */ mutex_lock(&audit_cmd_mutex); mutex_unlock(&audit_cmd_mutex); while ((skb = __skb_dequeue(&dest->q)) != NULL) - netlink_unicast(aunet->nlsk, skb, dest->portid, 0); + netlink_unicast(sk, skb, dest->portid, 0); - put_net(net); + put_net(dest->net); kfree(dest); return 0; @@ -722,16 +847,15 @@ out_kfree_skb: static int audit_send_reply_thread(void *arg) { struct audit_reply *reply = (struct audit_reply *)arg; - struct net *net = reply->net; - struct audit_net *aunet = net_generic(net, audit_net_id); + struct sock *sk = audit_get_sk(reply->net); mutex_lock(&audit_cmd_mutex); mutex_unlock(&audit_cmd_mutex); /* Ignore failure. It'll only happen if the sender goes away, because our timeout is set to infinite. */ - netlink_unicast(aunet->nlsk , reply->skb, reply->portid, 0); - put_net(net); + netlink_unicast(sk, reply->skb, reply->portid, 0); + put_net(reply->net); kfree(reply); return 0; } @@ -949,12 +1073,12 @@ static int audit_set_feature(struct sk_buff *skb) static int audit_replace(pid_t pid) { - struct sk_buff *skb = audit_make_reply(0, 0, AUDIT_REPLACE, 0, 0, - &pid, sizeof(pid)); + struct sk_buff *skb; + skb = audit_make_reply(0, 0, AUDIT_REPLACE, 0, 0, &pid, sizeof(pid)); if (!skb) return -ENOMEM; - return netlink_unicast(audit_sock, skb, audit_nlk_portid, 0); + return auditd_send_unicast_skb(skb); } static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) @@ -981,7 +1105,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) memset(&s, 0, sizeof(s)); s.enabled = audit_enabled; s.failure = audit_failure; - s.pid = audit_pid; + rcu_read_lock(); + s.pid = auditd_conn.pid; + rcu_read_unlock(); s.rate_limit = audit_rate_limit; s.backlog_limit = audit_backlog_limit; s.lost = atomic_read(&audit_lost); @@ -1014,30 +1140,44 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) * from the initial pid namespace, but something * to keep in mind if this changes */ int new_pid = s.pid; + pid_t auditd_pid; pid_t requesting_pid = task_tgid_vnr(current); - if ((!new_pid) && (requesting_pid != audit_pid)) { - audit_log_config_change("audit_pid", new_pid, audit_pid, 0); + /* test the auditd connection */ + audit_replace(requesting_pid); + + rcu_read_lock(); + auditd_pid = auditd_conn.pid; + /* only the current auditd can unregister itself */ + if ((!new_pid) && (requesting_pid != auditd_pid)) { + rcu_read_unlock(); + audit_log_config_change("audit_pid", new_pid, + auditd_pid, 0); return -EACCES; } - if (audit_pid && new_pid && - audit_replace(requesting_pid) != -ECONNREFUSED) { - audit_log_config_change("audit_pid", new_pid, audit_pid, 0); + /* replacing a healthy auditd is not allowed */ + if (auditd_pid && new_pid) { + rcu_read_unlock(); + audit_log_config_change("audit_pid", new_pid, + auditd_pid, 0); return -EEXIST; } + rcu_read_unlock(); + if (audit_enabled != AUDIT_OFF) - audit_log_config_change("audit_pid", new_pid, audit_pid, 1); + audit_log_config_change("audit_pid", new_pid, + auditd_pid, 1); + if (new_pid) { - if (audit_sock) - sock_put(audit_sock); - audit_pid = new_pid; - audit_nlk_portid = NETLINK_CB(skb).portid; - sock_hold(skb->sk); - audit_sock = skb->sk; - } else { + /* register a new auditd connection */ + auditd_set(new_pid, + NETLINK_CB(skb).portid, + sock_net(NETLINK_CB(skb).sk)); + /* try to process any backlog */ + wake_up_interruptible(&kauditd_wait); + } else + /* unregister the auditd connection */ auditd_reset(); - } - wake_up_interruptible(&kauditd_wait); } if (s.mask & AUDIT_STATUS_RATE_LIMIT) { err = audit_set_rate_limit(s.rate_limit); @@ -1090,7 +1230,6 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (err) break; } - mutex_unlock(&audit_cmd_mutex); audit_log_common_recv_msg(&ab, msg_type); if (msg_type != AUDIT_USER_TTY) audit_log_format(ab, " msg='%.*s'", @@ -1108,7 +1247,6 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } audit_set_portid(ab, NETLINK_CB(skb).portid); audit_log_end(ab); - mutex_lock(&audit_cmd_mutex); } break; case AUDIT_ADD_RULE: @@ -1298,26 +1436,26 @@ static int __net_init audit_net_init(struct net *net) struct audit_net *aunet = net_generic(net, audit_net_id); - aunet->nlsk = netlink_kernel_create(net, NETLINK_AUDIT, &cfg); - if (aunet->nlsk == NULL) { + aunet->sk = netlink_kernel_create(net, NETLINK_AUDIT, &cfg); + if (aunet->sk == NULL) { audit_panic("cannot initialize netlink socket in namespace"); return -ENOMEM; } - aunet->nlsk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; + aunet->sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; + return 0; } static void __net_exit audit_net_exit(struct net *net) { struct audit_net *aunet = net_generic(net, audit_net_id); - struct sock *sock = aunet->nlsk; - mutex_lock(&audit_cmd_mutex); - if (sock == audit_sock) + + rcu_read_lock(); + if (net == auditd_conn.net) auditd_reset(); - mutex_unlock(&audit_cmd_mutex); + rcu_read_unlock(); - netlink_kernel_release(sock); - aunet->nlsk = NULL; + netlink_kernel_release(aunet->sk); } static struct pernet_operations audit_net_ops __net_initdata = { @@ -1335,20 +1473,24 @@ static int __init audit_init(void) if (audit_initialized == AUDIT_DISABLED) return 0; - pr_info("initializing netlink subsys (%s)\n", - audit_default ? "enabled" : "disabled"); - register_pernet_subsys(&audit_net_ops); + memset(&auditd_conn, 0, sizeof(auditd_conn)); + spin_lock_init(&auditd_conn.lock); skb_queue_head_init(&audit_queue); skb_queue_head_init(&audit_retry_queue); skb_queue_head_init(&audit_hold_queue); - audit_initialized = AUDIT_INITIALIZED; - audit_enabled = audit_default; - audit_ever_enabled |= !!audit_default; for (i = 0; i < AUDIT_INODE_BUCKETS; i++) INIT_LIST_HEAD(&audit_inode_hash[i]); + pr_info("initializing netlink subsys (%s)\n", + audit_default ? "enabled" : "disabled"); + register_pernet_subsys(&audit_net_ops); + + audit_initialized = AUDIT_INITIALIZED; + audit_enabled = audit_default; + audit_ever_enabled |= !!audit_default; + kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd"); if (IS_ERR(kauditd_task)) { int err = PTR_ERR(kauditd_task); @@ -1519,20 +1661,16 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, if (unlikely(!audit_filter(type, AUDIT_FILTER_TYPE))) return NULL; - /* don't ever fail/sleep on these two conditions: + /* NOTE: don't ever fail/sleep on these two conditions: * 1. auditd generated record - since we need auditd to drain the * queue; also, when we are checking for auditd, compare PIDs using * task_tgid_vnr() since auditd_pid is set in audit_receive_msg() * using a PID anchored in the caller's namespace - * 2. audit command message - record types 1000 through 1099 inclusive - * are command messages/records used to manage the kernel subsystem - * and the audit userspace, blocking on these messages could cause - * problems under load so don't do it (note: not all of these - * command types are valid as record types, but it is quicker to - * just check two ints than a series of ints in a if/switch stmt) */ - if (!((audit_pid && audit_pid == task_tgid_vnr(current)) || - (type >= 1000 && type <= 1099))) { - long sleep_time = audit_backlog_wait_time; + * 2. generator holding the audit_cmd_mutex - we don't want to block + * while holding the mutex */ + if (!(auditd_test_task(current) || + (current == __mutex_owner(&audit_cmd_mutex)))) { + long stime = audit_backlog_wait_time; while (audit_backlog_limit && (skb_queue_len(&audit_queue) > audit_backlog_limit)) { @@ -1541,14 +1679,13 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, /* sleep if we are allowed and we haven't exhausted our * backlog wait limit */ - if ((gfp_mask & __GFP_DIRECT_RECLAIM) && - (sleep_time > 0)) { + if (gfpflags_allow_blocking(gfp_mask) && (stime > 0)) { DECLARE_WAITQUEUE(wait, current); add_wait_queue_exclusive(&audit_backlog_wait, &wait); set_current_state(TASK_UNINTERRUPTIBLE); - sleep_time = schedule_timeout(sleep_time); + stime = schedule_timeout(stime); remove_wait_queue(&audit_backlog_wait, &wait); } else { if (audit_rate_check() && printk_ratelimit()) @@ -2127,15 +2264,27 @@ out: */ void audit_log_end(struct audit_buffer *ab) { + struct sk_buff *skb; + struct nlmsghdr *nlh; + if (!ab) return; - if (!audit_rate_check()) { - audit_log_lost("rate limit exceeded"); - } else { - skb_queue_tail(&audit_queue, ab->skb); - wake_up_interruptible(&kauditd_wait); + + if (audit_rate_check()) { + skb = ab->skb; ab->skb = NULL; - } + + /* setup the netlink header, see the comments in + * kauditd_send_multicast_skb() for length quirks */ + nlh = nlmsg_hdr(skb); + nlh->nlmsg_len = skb->len - NLMSG_HDRLEN; + + /* queue the netlink packet and poke the kauditd thread */ + skb_queue_tail(&audit_queue, skb); + wake_up_interruptible(&kauditd_wait); + } else + audit_log_lost("rate limit exceeded"); + audit_buffer_free(ab); } diff --git a/kernel/audit.h b/kernel/audit.h index ca579880303a..0f1cf6d1878a 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -218,7 +218,7 @@ extern void audit_log_name(struct audit_context *context, struct audit_names *n, const struct path *path, int record_num, int *call_panic); -extern int audit_pid; +extern int auditd_test_task(const struct task_struct *task); #define AUDIT_INODE_BUCKETS 32 extern struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS]; @@ -250,10 +250,6 @@ struct audit_netlink_list { int audit_send_list(void *); -struct audit_net { - struct sock *nlsk; -}; - extern int selinux_audit_rule_update(void); extern struct mutex audit_filter_mutex; @@ -340,8 +336,7 @@ extern int audit_filter(int msgtype, unsigned int listtype); extern int __audit_signal_info(int sig, struct task_struct *t); static inline int audit_signal_info(int sig, struct task_struct *t) { - if (unlikely((audit_pid && t->tgid == audit_pid) || - (audit_signals && !audit_dummy_context()))) + if (auditd_test_task(t) || (audit_signals && !audit_dummy_context())) return __audit_signal_info(sig, t); return 0; } diff --git a/kernel/auditsc.c b/kernel/auditsc.c index d6a8de5f8fa3..e59ffc7fc522 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -762,7 +762,7 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk, struct audit_entry *e; enum audit_state state; - if (audit_pid && tsk->tgid == audit_pid) + if (auditd_test_task(tsk)) return AUDIT_DISABLED; rcu_read_lock(); @@ -816,7 +816,7 @@ void audit_filter_inodes(struct task_struct *tsk, struct audit_context *ctx) { struct audit_names *n; - if (audit_pid && tsk->tgid == audit_pid) + if (auditd_test_task(tsk)) return; rcu_read_lock(); @@ -2256,7 +2256,7 @@ int __audit_signal_info(int sig, struct task_struct *t) struct audit_context *ctx = tsk->audit_context; kuid_t uid = current_uid(), t_uid = task_uid(t); - if (audit_pid && t->tgid == audit_pid) { + if (auditd_test_task(t)) { if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) { audit_sig_pid = task_tgid_nr(tsk); if (uid_valid(tsk->loginuid)) -- cgit v1.2.3 From df630b8c1e851b5e265dc2ca9c87222e342c093b Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 15 Mar 2017 16:01:17 +0800 Subject: KVM: x86: clear bus pointer when destroyed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When releasing the bus, let's clear the bus pointers to mark it out. If any further device unregister happens on this bus, we know that we're done if we found the bus being released already. Signed-off-by: Peter Xu Signed-off-by: Radim Krčmář --- virt/kvm/kvm_main.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a17d78759727..7445566fadc1 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -727,8 +727,10 @@ static void kvm_destroy_vm(struct kvm *kvm) list_del(&kvm->vm_list); spin_unlock(&kvm_lock); kvm_free_irq_routing(kvm); - for (i = 0; i < KVM_NR_BUSES; i++) + for (i = 0; i < KVM_NR_BUSES; i++) { kvm_io_bus_destroy(kvm->buses[i]); + kvm->buses[i] = NULL; + } kvm_coalesced_mmio_free(kvm); #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); @@ -3579,6 +3581,14 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, struct kvm_io_bus *new_bus, *bus; bus = kvm->buses[bus_idx]; + + /* + * It's possible the bus being released before hand. If so, + * we're done here. + */ + if (!bus) + return 0; + r = -ENOENT; for (i = 0; i < bus->dev_count; i++) if (bus->range[i].dev == dev) { -- cgit v1.2.3 From f3fbd7ec62dec1528fb8044034e2885f2b257941 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 14 Feb 2017 00:03:38 +0900 Subject: arm: kprobes: Allow to handle reentered kprobe on single-stepping This is arm port of commit 6a5022a56ac3 ("kprobes/x86: Allow to handle reentered kprobe on single-stepping") Since the FIQ handlers can interrupt in the single stepping (or preparing the single stepping, do_debug etc.), we should consider a kprobe is hit in the NMI handler. Even in that case, the kprobe is allowed to be reentered as same as the kprobes hit in kprobe handlers (KPROBE_HIT_ACTIVE or KPROBE_HIT_SSDONE). The real issue will happen when a kprobe hit while another reentered kprobe is processing (KPROBE_REENTER), because we already consumed a saved-area for the previous kprobe. Signed-off-by: Masami Hiramatsu Signed-off-by: Jon Medhurst --- arch/arm/probes/kprobes/core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index b6dc9d838a9a..35148b46c4f5 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -271,6 +271,7 @@ void __kprobes kprobe_handler(struct pt_regs *regs) switch (kcb->kprobe_status) { case KPROBE_HIT_ACTIVE: case KPROBE_HIT_SSDONE: + case KPROBE_HIT_SS: /* A pre- or post-handler probe got us here. */ kprobes_inc_nmissed_count(p); save_previous_kprobe(kcb); @@ -279,6 +280,11 @@ void __kprobes kprobe_handler(struct pt_regs *regs) singlestep(p, regs, kcb); restore_previous_kprobe(kcb); break; + case KPROBE_REENTER: + /* A nested probe was hit in FIQ, it is a BUG */ + pr_warn("Unrecoverable kprobe detected at %p.\n", + p->addr); + /* fall through */ default: /* impossible cases */ BUG(); -- cgit v1.2.3 From 91fc862c613ab7a0ef6b0b7755c33619127f4e5a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 14 Feb 2017 00:04:48 +0900 Subject: arm: kprobes: Skip single-stepping in recursing path if possible Kprobes/arm skips single-stepping (moreover handling the event) if the conditional instruction must not be executed. This also apply the rule when we hit the recursing kprobe, so that kprobe does not count nmissed up in that case. Signed-off-by: Masami Hiramatsu Signed-off-by: Jon Medhurst --- arch/arm/probes/kprobes/core.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index 35148b46c4f5..269f66e66ff5 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -266,7 +266,15 @@ void __kprobes kprobe_handler(struct pt_regs *regs) #endif if (p) { - if (cur) { + if (!p->ainsn.insn_check_cc(regs->ARM_cpsr)) { + /* + * Probe hit but conditional execution check failed, + * so just skip the instruction and continue as if + * nothing had happened. + * In this case, we can skip recursing check too. + */ + singlestep_skip(p, regs); + } else if (cur) { /* Kprobe is pending, so we're recursing. */ switch (kcb->kprobe_status) { case KPROBE_HIT_ACTIVE: @@ -289,7 +297,7 @@ void __kprobes kprobe_handler(struct pt_regs *regs) /* impossible cases */ BUG(); } - } else if (p->ainsn.insn_check_cc(regs->ARM_cpsr)) { + } else { /* Probe hit and conditional execution check ok. */ set_current_kprobe(p); kcb->kprobe_status = KPROBE_HIT_ACTIVE; @@ -310,13 +318,6 @@ void __kprobes kprobe_handler(struct pt_regs *regs) } reset_current_kprobe(); } - } else { - /* - * Probe hit but conditional execution check failed, - * so just skip the instruction and continue as if - * nothing had happened. - */ - singlestep_skip(p, regs); } } else if (cur) { /* We probably hit a jprobe. Call its break handler. */ -- cgit v1.2.3 From 06553175f585b52509c7df37d6f4a50aacb7b211 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 14 Feb 2017 00:05:59 +0900 Subject: arm: kprobes: Fix the return address of multiple kretprobes This is arm port of commit 737480a0d525 ("kprobes/x86: Fix the return address of multiple kretprobes"). Fix the return address of subsequent kretprobes when multiple kretprobes are set on the same function. For example: # cd /sys/kernel/debug/tracing # echo "r:event1 sys_symlink" > kprobe_events # echo "r:event2 sys_symlink" >> kprobe_events # echo 1 > events/kprobes/enable # ln -s /tmp/foo /tmp/bar (without this patch) # cat trace | grep -v ^# ln-82 [000] dn.2 68.446525: event1: (kretprobe_trampoline+0x0/0x18 <- SyS_symlink) ln-82 [000] dn.2 68.447831: event2: (ret_fast_syscall+0x0/0x1c <- SyS_symlink) (with this patch) # cat trace | grep -v ^# ln-81 [000] dn.1 39.463469: event1: (ret_fast_syscall+0x0/0x1c <- SyS_symlink) ln-81 [000] dn.1 39.464701: event2: (ret_fast_syscall+0x0/0x1c <- SyS_symlink) Signed-off-by: Masami Hiramatsu Cc: KUMANO Syuhei Signed-off-by: Jon Medhurst --- arch/arm/probes/kprobes/core.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index 269f66e66ff5..ad1f4e6a9e33 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -441,6 +441,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) struct hlist_node *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; + kprobe_opcode_t *correct_ret_addr = NULL; INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); @@ -463,14 +464,34 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) /* another task is sharing our hash bucket */ continue; + orig_ret_address = (unsigned long)ri->ret_addr; + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_assert(ri, orig_ret_address, trampoline_address); + + correct_ret_addr = ri->ret_addr; + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long)ri->ret_addr; if (ri->rp && ri->rp->handler) { __this_cpu_write(current_kprobe, &ri->rp->kp); get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; + ri->ret_addr = correct_ret_addr; ri->rp->handler(ri, regs); __this_cpu_write(current_kprobe, NULL); } - orig_ret_address = (unsigned long)ri->ret_addr; recycle_rp_inst(ri, &empty_rp); if (orig_ret_address != trampoline_address) @@ -482,7 +503,6 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) break; } - kretprobe_assert(ri, orig_ret_address, trampoline_address); kretprobe_hash_unlock(current, &flags); hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { -- cgit v1.2.3 From 974310d047f3c7788a51d10c8d255eebdb1fa857 Mon Sep 17 00:00:00 2001 From: Jon Medhurst Date: Thu, 2 Mar 2017 13:04:09 +0000 Subject: arm: kprobes: Align stack to 8-bytes in test code kprobes test cases need to have a stack that is aligned to an 8-byte boundary because they call other functions (and the ARM ABI mandates that alignment) and because test cases include 64-bit accesses to the stack. Unfortunately, GCC doesn't ensure this alignment for inline assembler and for the code in question seems to always misalign it by pushing just the LR register onto the stack. We therefore need to explicitly perform stack alignment at the start of each test case. Without this fix, some test cases will generate alignment faults on systems where alignment is enforced. Even if the kernel is configured to handle these faults in software, triggering them is ugly. It also exposes limitations in the fault handling code which doesn't cope with writes to the stack. E.g. when handling this instruction strd r6, [sp, #-64]! the fault handling code will write to a stack location below the SP value at the point the fault occurred, which coincides with where the exception handler has pushed the saved register context. This results in corruption of those registers. Signed-off-by: Jon Medhurst --- arch/arm/probes/kprobes/test-core.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/arm/probes/kprobes/test-core.c b/arch/arm/probes/kprobes/test-core.c index c893726aa52d..1c98a87786ca 100644 --- a/arch/arm/probes/kprobes/test-core.c +++ b/arch/arm/probes/kprobes/test-core.c @@ -977,7 +977,10 @@ static void coverage_end(void) void __naked __kprobes_test_case_start(void) { __asm__ __volatile__ ( - "stmdb sp!, {r4-r11} \n\t" + "mov r2, sp \n\t" + "bic r3, r2, #7 \n\t" + "mov sp, r3 \n\t" + "stmdb sp!, {r2-r11} \n\t" "sub sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t" "bic r0, lr, #1 @ r0 = inline data \n\t" "mov r1, sp \n\t" @@ -997,7 +1000,8 @@ void __naked __kprobes_test_case_end_32(void) "movne pc, r0 \n\t" "mov r0, r4 \n\t" "add sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t" - "ldmia sp!, {r4-r11} \n\t" + "ldmia sp!, {r2-r11} \n\t" + "mov sp, r2 \n\t" "mov pc, r0 \n\t" ); } @@ -1013,7 +1017,8 @@ void __naked __kprobes_test_case_end_16(void) "bxne r0 \n\t" "mov r0, r4 \n\t" "add sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t" - "ldmia sp!, {r4-r11} \n\t" + "ldmia sp!, {r2-r11} \n\t" + "mov sp, r2 \n\t" "bx r0 \n\t" ); } -- cgit v1.2.3 From c248c64387fac5a6b31b343d9acb78f478e8619c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 9 Mar 2017 13:26:07 +0200 Subject: nvme-rdma: handle cpu unplug when re-establishing the controller If a cpu unplug event has occured, we need to take the minimum of the provided nr_io_queues and the number of online cpus, otherwise we won't be able to connect them as blk-mq mapping won't dispatch to those queues. Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 779f516e7a4e..47a479f26e5d 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -343,8 +343,6 @@ static int __nvme_rdma_init_request(struct nvme_rdma_ctrl *ctrl, struct ib_device *ibdev = dev->dev; int ret; - BUG_ON(queue_idx >= ctrl->queue_count); - ret = nvme_rdma_alloc_qe(ibdev, &req->sqe, sizeof(struct nvme_command), DMA_TO_DEVICE); if (ret) @@ -652,8 +650,22 @@ out_free_queues: static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl) { + struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + unsigned int nr_io_queues; int i, ret; + nr_io_queues = min(opts->nr_io_queues, num_online_cpus()); + ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); + if (ret) + return ret; + + ctrl->queue_count = nr_io_queues + 1; + if (ctrl->queue_count < 2) + return 0; + + dev_info(ctrl->ctrl.device, + "creating %d I/O queues.\n", nr_io_queues); + for (i = 1; i < ctrl->queue_count; i++) { ret = nvme_rdma_init_queue(ctrl, i, ctrl->ctrl.opts->queue_size); @@ -1791,20 +1803,8 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { static int nvme_rdma_create_io_queues(struct nvme_rdma_ctrl *ctrl) { - struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; int ret; - ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues); - if (ret) - return ret; - - ctrl->queue_count = opts->nr_io_queues + 1; - if (ctrl->queue_count < 2) - return 0; - - dev_info(ctrl->ctrl.device, - "creating %d I/O queues.\n", opts->nr_io_queues); - ret = nvme_rdma_init_io_queues(ctrl); if (ret) return ret; -- cgit v1.2.3 From 945dd5bacc8978439af276976b5dcbbd42333dbc Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 13 Mar 2017 13:27:51 +0200 Subject: nvme-loop: handle cpu unplug when re-establishing the controller If a cpu unplug event has occured, we need to take the minimum of the provided nr_io_queues and the number of online cpus, otherwise we won't be able to connect them as blk-mq mapping won't dispatch to those queues. Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/target/loop.c | 88 ++++++++++++++++++++++++++-------------------- 1 file changed, 50 insertions(+), 38 deletions(-) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 74e04a0855d4..22f7bc6bac7f 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -223,8 +223,6 @@ static void nvme_loop_submit_async_event(struct nvme_ctrl *arg, int aer_idx) static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl, struct nvme_loop_iod *iod, unsigned int queue_idx) { - BUG_ON(queue_idx >= ctrl->queue_count); - iod->req.cmd = &iod->cmd; iod->req.rsp = &iod->rsp; iod->queue = &ctrl->queues[queue_idx]; @@ -314,6 +312,43 @@ free_ctrl: kfree(ctrl); } +static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl) +{ + int i; + + for (i = 1; i < ctrl->queue_count; i++) + nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); +} + +static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl) +{ + struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + unsigned int nr_io_queues; + int ret, i; + + nr_io_queues = min(opts->nr_io_queues, num_online_cpus()); + ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); + if (ret || !nr_io_queues) + return ret; + + dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", nr_io_queues); + + for (i = 1; i <= nr_io_queues; i++) { + ctrl->queues[i].ctrl = ctrl; + ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq); + if (ret) + goto out_destroy_queues; + + ctrl->queue_count++; + } + + return 0; + +out_destroy_queues: + nvme_loop_destroy_io_queues(ctrl); + return ret; +} + static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) { int error; @@ -385,17 +420,13 @@ out_free_sq: static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl) { - int i; - nvme_stop_keep_alive(&ctrl->ctrl); if (ctrl->queue_count > 1) { nvme_stop_queues(&ctrl->ctrl); blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request, &ctrl->ctrl); - - for (i = 1; i < ctrl->queue_count; i++) - nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); + nvme_loop_destroy_io_queues(ctrl); } if (ctrl->ctrl.state == NVME_CTRL_LIVE) @@ -467,19 +498,14 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) if (ret) goto out_disable; - for (i = 1; i <= ctrl->ctrl.opts->nr_io_queues; i++) { - ctrl->queues[i].ctrl = ctrl; - ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq); - if (ret) - goto out_free_queues; - - ctrl->queue_count++; - } + ret = nvme_loop_init_io_queues(ctrl); + if (ret) + goto out_destroy_admin; - for (i = 1; i <= ctrl->ctrl.opts->nr_io_queues; i++) { + for (i = 1; i < ctrl->queue_count; i++) { ret = nvmf_connect_io_queue(&ctrl->ctrl, i); if (ret) - goto out_free_queues; + goto out_destroy_io; } changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); @@ -492,9 +518,9 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) return; -out_free_queues: - for (i = 1; i < ctrl->queue_count; i++) - nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); +out_destroy_io: + nvme_loop_destroy_io_queues(ctrl); +out_destroy_admin: nvme_loop_destroy_admin_queue(ctrl); out_disable: dev_warn(ctrl->ctrl.device, "Removing after reset failure\n"); @@ -533,25 +559,12 @@ static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = { static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) { - struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; int ret, i; - ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues); - if (ret || !opts->nr_io_queues) + ret = nvme_loop_init_io_queues(ctrl); + if (ret) return ret; - dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", - opts->nr_io_queues); - - for (i = 1; i <= opts->nr_io_queues; i++) { - ctrl->queues[i].ctrl = ctrl; - ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq); - if (ret) - goto out_destroy_queues; - - ctrl->queue_count++; - } - memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); ctrl->tag_set.ops = &nvme_loop_mq_ops; ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; @@ -575,7 +588,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) goto out_free_tagset; } - for (i = 1; i <= opts->nr_io_queues; i++) { + for (i = 1; i < ctrl->queue_count; i++) { ret = nvmf_connect_io_queue(&ctrl->ctrl, i); if (ret) goto out_cleanup_connect_q; @@ -588,8 +601,7 @@ out_cleanup_connect_q: out_free_tagset: blk_mq_free_tag_set(&ctrl->tag_set); out_destroy_queues: - for (i = 1; i < ctrl->queue_count; i++) - nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); + nvme_loop_destroy_io_queues(ctrl); return ret; } -- cgit v1.2.3 From 0ca10b60ceeb5372da01798ca68c116ae45a6eb6 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 20 Mar 2017 11:25:16 +0100 Subject: reset: fix optional reset_control_get stubs to return NULL When RESET_CONTROLLER is not enabled, the optional reset_control_get stubs should now also return NULL. Since it is now valid for reset_control_assert/deassert/reset/status/put to be called unconditionally, with NULL as an argument for optional resets, the stubs are not allowed to warn anymore. Fixes: bb475230b8e5 ("reset: make optional functions really optional") Reported-by: Andrzej Hajda Tested-by: Andrzej Hajda Reviewed-by: Andrzej Hajda Cc: Ramiro Oliveira Signed-off-by: Philipp Zabel --- include/linux/reset.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/include/linux/reset.h b/include/linux/reset.h index 86b4ed75359e..96fb139bdd08 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -31,31 +31,26 @@ static inline int device_reset_optional(struct device *dev) static inline int reset_control_reset(struct reset_control *rstc) { - WARN_ON(1); return 0; } static inline int reset_control_assert(struct reset_control *rstc) { - WARN_ON(1); return 0; } static inline int reset_control_deassert(struct reset_control *rstc) { - WARN_ON(1); return 0; } static inline int reset_control_status(struct reset_control *rstc) { - WARN_ON(1); return 0; } static inline void reset_control_put(struct reset_control *rstc) { - WARN_ON(1); } static inline int __must_check device_reset(struct device *dev) @@ -74,14 +69,14 @@ static inline struct reset_control *__of_reset_control_get( const char *id, int index, bool shared, bool optional) { - return ERR_PTR(-ENOTSUPP); + return optional ? NULL : ERR_PTR(-ENOTSUPP); } static inline struct reset_control *__devm_reset_control_get( struct device *dev, const char *id, int index, bool shared, bool optional) { - return ERR_PTR(-ENOTSUPP); + return optional ? NULL : ERR_PTR(-ENOTSUPP); } #endif /* CONFIG_RESET_CONTROLLER */ -- cgit v1.2.3 From 7292ae3d5a18fb922be496e6bb687647193569b4 Mon Sep 17 00:00:00 2001 From: Gleb Fotengauer-Malinovskiy Date: Mon, 20 Mar 2017 20:15:53 +0300 Subject: jump label: fix passing kbuild_cflags when checking for asm goto support The latest change of asm goto support check added passing of KBUILD_CFLAGS to compiler. When these flags reference gcc plugins that are not built yet, the check fails. When one runs "make bzImage" followed by "make modules", the kernel is always built with HAVE_JUMP_LABEL disabled, while the modules are built depending on CONFIG_JUMP_LABEL. If HAVE_JUMP_LABEL macro happens to be different, modules are built with undefined references, e.g.: ERROR: "static_key_slow_inc" [net/netfilter/xt_TEE.ko] undefined! ERROR: "static_key_slow_dec" [net/netfilter/xt_TEE.ko] undefined! ERROR: "static_key_slow_dec" [net/netfilter/nft_meta.ko] undefined! ERROR: "static_key_slow_inc" [net/netfilter/nft_meta.ko] undefined! ERROR: "nf_hooks_needed" [net/netfilter/ipvs/ip_vs.ko] undefined! ERROR: "nf_hooks_needed" [net/ipv6/ipv6.ko] undefined! ERROR: "static_key_count" [net/ipv6/ipv6.ko] undefined! ERROR: "static_key_slow_inc" [net/ipv6/ipv6.ko] undefined! This change moves the check before all these references are added to KBUILD_CFLAGS. This is correct because subsequent KBUILD_CFLAGS modifications are not relevant to this check. Reported-by: Anton V. Boyarshinov Fixes: 35f860f9ba6a ("jump label: pass kbuild_cflags when checking for asm goto support") Cc: stable@vger.kernel.org # v4.10 Signed-off-by: Gleb Fotengauer-Malinovskiy Signed-off-by: Dmitry V. Levin Acked-by: Steven Rostedt (VMware) Acked-by: David Lin Signed-off-by: Masahiro Yamada --- Makefile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index b8a6b862ed73..74d69f241543 100644 --- a/Makefile +++ b/Makefile @@ -653,6 +653,12 @@ KBUILD_CFLAGS += $(call cc-ifversion, -lt, 0409, \ # Tell gcc to never replace conditional load with a non-conditional one KBUILD_CFLAGS += $(call cc-option,--param=allow-store-data-races=0) +# check for 'asm goto' +ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC) $(KBUILD_CFLAGS)), y) + KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO + KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO +endif + include scripts/Makefile.gcc-plugins ifdef CONFIG_READABLE_ASM @@ -798,12 +804,6 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types) # use the deterministic mode of AR if available KBUILD_ARFLAGS := $(call ar-option,D) -# check for 'asm goto' -ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC) $(KBUILD_CFLAGS)), y) - KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO - KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO -endif - include scripts/Makefile.kasan include scripts/Makefile.extrawarn include scripts/Makefile.ubsan -- cgit v1.2.3 From 9be3213b14d44f6328281941193d97f3b97e7d4c Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Mon, 13 Mar 2017 20:33:41 +0100 Subject: gconfig: remove misleading parentheses around a condition When building the kernel with clang, the compiler complains about the presence of a condition inside two pairs of parentheses: scripts/kconfig/gconf.c:917:19: error: equality comparison with extraneous parentheses [-Werror,-Wparentheses-equality] } else if ((col == COL_OPTION)) { ~~~~^~~~~~~~~~~~~ scripts/kconfig/gconf.c:917:19: note: remove extraneous parentheses around the comparison to silence this warning } else if ((col == COL_OPTION)) { ~ ^ ~ scripts/kconfig/gconf.c:917:19: note: use '=' to turn this equality comparison into an assignment } else if ((col == COL_OPTION)) { ^~ = Silence this warning by removing a level of parentheses. Signed-off-by: Nicolas Iooss Signed-off-by: Masahiro Yamada --- scripts/kconfig/gconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c index 26d208b435a0..cfddddb9c9d7 100644 --- a/scripts/kconfig/gconf.c +++ b/scripts/kconfig/gconf.c @@ -914,7 +914,7 @@ on_treeview2_button_press_event(GtkWidget * widget, current = menu; display_tree_part(); gtk_widget_set_sensitive(back_btn, TRUE); - } else if ((col == COL_OPTION)) { + } else if (col == COL_OPTION) { toggle_sym_value(menu); gtk_tree_view_expand_row(view, path, TRUE); } -- cgit v1.2.3 From 713cc9df6473f0cc8d699987d990482d432c0679 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 21 Mar 2017 18:04:26 +0000 Subject: arm64: compat: Update compat syscalls Hook up three pkey syscalls (which we don't implement) and the new statx syscall, as has been done for arch/arm/. Signed-off-by: Will Deacon --- arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index e78ac26324bd..bdbeb06dc11e 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -44,7 +44,7 @@ #define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2) #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5) -#define __NR_compat_syscalls 394 +#define __NR_compat_syscalls 398 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index b7e8ef16ff0d..c66b51aab195 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -809,6 +809,14 @@ __SYSCALL(__NR_copy_file_range, sys_copy_file_range) __SYSCALL(__NR_preadv2, compat_sys_preadv2) #define __NR_pwritev2 393 __SYSCALL(__NR_pwritev2, compat_sys_pwritev2) +#define __NR_pkey_mprotect 394 +__SYSCALL(__NR_pkey_mprotect, sys_pkey_mprotect) +#define __NR_pkey_alloc 395 +__SYSCALL(__NR_pkey_alloc, sys_pkey_alloc) +#define __NR_pkey_free 396 +__SYSCALL(__NR_pkey_free, sys_pkey_free) +#define __NR_statx 397 +__SYSCALL(__NR_statx, sys_statx) /* * Please add new compat syscalls above this comment and update -- cgit v1.2.3 From 65b1adebfe43c642dfe3b109edb5d992db5fbe72 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 21 Mar 2017 13:19:09 -0600 Subject: vfio: Rework group release notifier warning The intent of the original warning is make sure that the mdev vendor driver has removed any group notifiers at the point where the group is closed by the user. Theoretically this would be through an orderly shutdown where any devices are release prior to the group release. We can't always count on an orderly shutdown, the user can close the group before the notifier can be removed or the user task might be killed. We'd like to add this sanity test when the group is idle and the only references are from the devices within the group themselves, but we don't have a good way to do that. Instead check both when the group itself is removed and when the group is opened. A bit later than we'd prefer, but better than the current over aggressive approach. Fixes: ccd46dbae77d ("vfio: support notifier chain in vfio_group") Signed-off-by: Alex Williamson Cc: # v4.10 Cc: Jike Song --- drivers/vfio/vfio.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 609f4f982c74..561084ab387f 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -403,6 +403,7 @@ static void vfio_group_release(struct kref *kref) struct iommu_group *iommu_group = group->iommu_group; WARN_ON(!list_empty(&group->device_list)); + WARN_ON(group->notifier.head); list_for_each_entry_safe(unbound, tmp, &group->unbound_list, unbound_next) { @@ -1573,6 +1574,10 @@ static int vfio_group_fops_open(struct inode *inode, struct file *filep) return -EBUSY; } + /* Warn if previous user didn't cleanup and re-init to drop them */ + if (WARN_ON(group->notifier.head)) + BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); + filep->private_data = group; return 0; @@ -1584,9 +1589,6 @@ static int vfio_group_fops_release(struct inode *inode, struct file *filep) filep->private_data = NULL; - /* Any user didn't unregister? */ - WARN_ON(group->notifier.head); - vfio_group_try_dissolve_container(group); atomic_dec(&group->opened); -- cgit v1.2.3 From fb2155e3c30dc2043b52020e26965067a3e7779c Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Tue, 21 Mar 2017 14:39:19 +0000 Subject: MIPS: smp-cps: Fix retrieval of VPE mask on big endian CPUs The vpe_mask member of struct core_boot_config is of type atomic_t, which is a 32bit type. In cps-vec.S this member was being retrieved by a PTR_L macro, which on 64bit systems is a 64bit load. On little endian systems this is OK, since the double word that is retrieved will have the required less significant word in the correct position. However, on big endian systems the less significant word of the load is retrieved from address+4, and the more significant from address+0. The destination register therefore ends up with the required word in the more significant word e.g. when starting the second VP of a big endian 64bit system, the load PTR_L ta2, COREBOOTCFG_VPEMASK(a0) ends up setting register ta2 to 0x0000000300000000 When this value is written to the CPC it is ignored, since it is invalid to write anything larger than 4 bits. This results in any VP other than VP0 in a core failing to start in 64bit big endian systems. Change the load to a 32bit load word instruction to fix the bug. Fixes: f12401d7219f ("MIPS: smp-cps: Pull boot config retrieval out of mips_cps_boot_vpes") Signed-off-by: Matt Redfearn Cc: Paul Burton Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/15787/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cps-vec.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S index 59476a607add..a00e87b0256d 100644 --- a/arch/mips/kernel/cps-vec.S +++ b/arch/mips/kernel/cps-vec.S @@ -361,7 +361,7 @@ LEAF(mips_cps_get_bootcfg) END(mips_cps_get_bootcfg) LEAF(mips_cps_boot_vpes) - PTR_L ta2, COREBOOTCFG_VPEMASK(a0) + lw ta2, COREBOOTCFG_VPEMASK(a0) PTR_L ta3, COREBOOTCFG_VPECONFIG(a0) #if defined(CONFIG_CPU_MIPSR6) -- cgit v1.2.3 From 6ef90877eee63a0d03e83183bb44b64229b624e6 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Wed, 15 Mar 2017 23:26:42 +0100 Subject: MIPS: Lantiq: fix missing xbar kernel panic Commit 08b3c894e565 ("MIPS: lantiq: Disable xbar fpi burst mode") accidentally requested the resources from the pmu address region instead of the xbar registers region, but the check for the return value of request_mem_region() was wrong. Commit 98ea51cb0c8c ("MIPS: Lantiq: Fix another request_mem_region() return code check") fixed the check of the return value of request_mem_region() which made the kernel panics. This patch now makes use of the correct memory region for the cross bar. Fixes: 08b3c894e565 ("MIPS: lantiq: Disable xbar fpi burst mode") Signed-off-by: Hauke Mehrtens Cc: John Crispin Cc: james.hogan@imgtec.com Cc: arnd@arndb.de Cc: sergei.shtylyov@cogentembedded.com Cc: john@phrozen.org Cc: # 4.4.x- Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/15751 Signed-off-by: Ralf Baechle --- arch/mips/lantiq/xway/sysctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index 3c3aa05891dd..95bec460b651 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -467,7 +467,7 @@ void __init ltq_soc_init(void) if (!np_xbar) panic("Failed to load xbar nodes from devicetree"); - if (of_address_to_resource(np_pmu, 0, &res_xbar)) + if (of_address_to_resource(np_xbar, 0, &res_xbar)) panic("Failed to get xbar resources"); if (!request_mem_region(res_xbar.start, resource_size(&res_xbar), res_xbar.name)) -- cgit v1.2.3 From 033cffeedbd11c140952b98e8639bf652091a17d Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 16 Mar 2017 21:00:25 +0800 Subject: MIPS: Add MIPS_CPU_FTLB for Loongson-3A R2 Loongson-3A R2 and newer CPU have FTLB, but Config0.MT is 1, so add MIPS_CPU_FTLB to the CPU options. Signed-off-by: Huacai Chen Cc: John Crispin Cc: Steven J . Hill Cc: Fuxin Zhang Cc: Zhangjin Wu Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/15752/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cpu-probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 07718bb5fc9d..12422fd4af23 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -1824,7 +1824,7 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) } decode_configs(c); - c->options |= MIPS_CPU_TLBINV | MIPS_CPU_LDPTE; + c->options |= MIPS_CPU_FTLB | MIPS_CPU_TLBINV | MIPS_CPU_LDPTE; c->writecombine = _CACHE_UNCACHED_ACCELERATED; break; default: -- cgit v1.2.3 From 5a34133167dce36666ea054e30a561b7f4413b7f Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 16 Mar 2017 21:00:26 +0800 Subject: MIPS: Check TLB before handle_ri_rdhwr() for Loongson-3 Loongson-3's micro TLB (ITLB) is not strictly a subset of JTLB. That means: when a JTLB entry is replaced by hardware, there may be an old valid entry exists in ITLB. So, a TLB miss exception may occur while handle_ri_rdhwr() is running because it try to access EPC's content. However, handle_ri_rdhwr() doesn't clear EXL, which makes a TLB Refill exception be treated as a TLB Invalid exception and tlbp may fail. In this case, if FTLB (which is usually set-associative instead of set- associative) is enabled, a tlbp failure will cause an invalid tlbwi, which will hang the whole system. This patch rename handle_ri_rdhwr_vivt to handle_ri_rdhwr_tlbp and use it for Loongson-3. It try to solve the same problem described as below, but more straightforwards. https://patchwork.linux-mips.org/patch/12591/ I think Loongson-2 has the same problem, but it has no FTLB, so we just keep it as is. Signed-off-by: Huacai Chen Cc: Rui Wang Cc: John Crispin Cc: Steven J . Hill Cc: Fuxin Zhang Cc: Zhangjin Wu Cc: Huacai Chen Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/15753/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/genex.S | 4 ++-- arch/mips/kernel/traps.c | 17 +++++++++++++---- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index 7ec9612cb007..2ac6c2625c13 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -519,7 +519,7 @@ NESTED(nmi_handler, PT_SIZE, sp) BUILD_HANDLER reserved reserved sti verbose /* others */ .align 5 - LEAF(handle_ri_rdhwr_vivt) + LEAF(handle_ri_rdhwr_tlbp) .set push .set noat .set noreorder @@ -538,7 +538,7 @@ NESTED(nmi_handler, PT_SIZE, sp) .set pop bltz k1, handle_ri /* slow path */ /* fall thru */ - END(handle_ri_rdhwr_vivt) + END(handle_ri_rdhwr_tlbp) LEAF(handle_ri_rdhwr) .set push diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index c7d17cfb32f6..b49e7bf9f950 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -83,7 +83,7 @@ extern asmlinkage void handle_dbe(void); extern asmlinkage void handle_sys(void); extern asmlinkage void handle_bp(void); extern asmlinkage void handle_ri(void); -extern asmlinkage void handle_ri_rdhwr_vivt(void); +extern asmlinkage void handle_ri_rdhwr_tlbp(void); extern asmlinkage void handle_ri_rdhwr(void); extern asmlinkage void handle_cpu(void); extern asmlinkage void handle_ov(void); @@ -2408,9 +2408,18 @@ void __init trap_init(void) set_except_vector(EXCCODE_SYS, handle_sys); set_except_vector(EXCCODE_BP, handle_bp); - set_except_vector(EXCCODE_RI, rdhwr_noopt ? handle_ri : - (cpu_has_vtag_icache ? - handle_ri_rdhwr_vivt : handle_ri_rdhwr)); + + if (rdhwr_noopt) + set_except_vector(EXCCODE_RI, handle_ri); + else { + if (cpu_has_vtag_icache) + set_except_vector(EXCCODE_RI, handle_ri_rdhwr_tlbp); + else if (current_cpu_type() == CPU_LOONGSON3) + set_except_vector(EXCCODE_RI, handle_ri_rdhwr_tlbp); + else + set_except_vector(EXCCODE_RI, handle_ri_rdhwr); + } + set_except_vector(EXCCODE_CPU, handle_cpu); set_except_vector(EXCCODE_OV, handle_ov); set_except_vector(EXCCODE_TR, handle_tr); -- cgit v1.2.3 From 0115f6cbf26663c86496bc56eeea293f85b77897 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 16 Mar 2017 21:00:27 +0800 Subject: MIPS: Flush wrong invalid FTLB entry for huge page On VTLB+FTLB platforms (such as Loongson-3A R2), FTLB's pagesize is usually configured the same as PAGE_SIZE. In such a case, Huge page entry is not suitable to write in FTLB. Unfortunately, when a huge page is created, its page table entries haven't created immediately. Then the TLB refill handler will fetch an invalid page table entry which has no "HUGE" bit, and this entry may be written to FTLB. Since it is invalid, TLB load/store handler will then use tlbwi to write the valid entry at the same place. However, the valid entry is a huge page entry which isn't suitable for FTLB. Our solution is to modify build_huge_handler_tail. Flush the invalid old entry (whether it is in FTLB or VTLB, this is in order to reduce branches) and use tlbwr to write the valid new entry. Signed-off-by: Rui Wang Signed-off-by: Huacai Chen Cc: John Crispin Cc: Steven J . Hill Cc: Fuxin Zhang Cc: Zhangjin Wu Cc: Huacai Chen Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/15754/ Signed-off-by: Ralf Baechle --- arch/mips/mm/tlbex.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 9bfee8988eaf..4f642e07c2b1 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -760,7 +760,8 @@ static void build_huge_update_entries(u32 **p, unsigned int pte, static void build_huge_handler_tail(u32 **p, struct uasm_reloc **r, struct uasm_label **l, unsigned int pte, - unsigned int ptr) + unsigned int ptr, + unsigned int flush) { #ifdef CONFIG_SMP UASM_i_SC(p, pte, 0, ptr); @@ -769,6 +770,22 @@ static void build_huge_handler_tail(u32 **p, struct uasm_reloc **r, #else UASM_i_SW(p, pte, 0, ptr); #endif + if (cpu_has_ftlb && flush) { + BUG_ON(!cpu_has_tlbinv); + + UASM_i_MFC0(p, ptr, C0_ENTRYHI); + uasm_i_ori(p, ptr, ptr, MIPS_ENTRYHI_EHINV); + UASM_i_MTC0(p, ptr, C0_ENTRYHI); + build_tlb_write_entry(p, l, r, tlb_indexed); + + uasm_i_xori(p, ptr, ptr, MIPS_ENTRYHI_EHINV); + UASM_i_MTC0(p, ptr, C0_ENTRYHI); + build_huge_update_entries(p, pte, ptr); + build_huge_tlb_write_entry(p, l, r, pte, tlb_random, 0); + + return; + } + build_huge_update_entries(p, pte, ptr); build_huge_tlb_write_entry(p, l, r, pte, tlb_indexed, 0); } @@ -2199,7 +2216,7 @@ static void build_r4000_tlb_load_handler(void) uasm_l_tlbl_goaround2(&l, p); } uasm_i_ori(&p, wr.r1, wr.r1, (_PAGE_ACCESSED | _PAGE_VALID)); - build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2); + build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2, 1); #endif uasm_l_nopage_tlbl(&l, p); @@ -2254,7 +2271,7 @@ static void build_r4000_tlb_store_handler(void) build_tlb_probe_entry(&p); uasm_i_ori(&p, wr.r1, wr.r1, _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY); - build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2); + build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2, 1); #endif uasm_l_nopage_tlbs(&l, p); @@ -2310,7 +2327,7 @@ static void build_r4000_tlb_modify_handler(void) build_tlb_probe_entry(&p); uasm_i_ori(&p, wr.r1, wr.r1, _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY); - build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2); + build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2, 0); #endif uasm_l_nopage_tlbm(&l, p); -- cgit v1.2.3 From 093b995e3b55a0ae0670226ddfcb05bfbf0099ae Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 20 Mar 2017 14:26:42 +0800 Subject: mm, swap: Remove WARN_ON_ONCE() in free_swap_slot() Before commit 452b94b8c8c7 ("mm/swap: don't BUG_ON() due to uninitialized swap slot cache"), the following bug is reported, ------------[ cut here ]------------ kernel BUG at mm/swap_slots.c:270! invalid opcode: 0000 [#1] SMP CPU: 5 PID: 1745 Comm: (sd-pam) Not tainted 4.11.0-rc1-00243-g24c534bb161b #1 Hardware name: System manufacturer System Product Name/Z170-K, BIOS 1803 05/06/2016 RIP: 0010:free_swap_slot+0xba/0xd0 Call Trace: swap_free+0x36/0x40 do_swap_page+0x360/0x6d0 __handle_mm_fault+0x880/0x1080 handle_mm_fault+0xd0/0x240 __do_page_fault+0x232/0x4d0 do_page_fault+0x20/0x70 page_fault+0x22/0x30 ---[ end trace aefc9ede53e0ab21 ]--- This is raised by the BUG_ON(!swap_slot_cache_initialized) in free_swap_slot(). This is incorrect, because even if the swap slots cache fails to be initialized, the swap should operate properly without the swap slots cache. And the use_swap_slot_cache check later in the function will protect the uninitialized swap slots cache case. In commit 452b94b8c8c7, the BUG_ON() is replaced by WARN_ON_ONCE(). In the patch, the WARN_ON_ONCE() is removed too. Reported-by: Linus Torvalds Acked-by: Tim Chen Cc: Michal Hocko Signed-off-by: "Huang, Ying" Signed-off-by: Linus Torvalds --- mm/swap_slots.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/mm/swap_slots.c b/mm/swap_slots.c index 7ebb23836f68..b1ccb58ad397 100644 --- a/mm/swap_slots.c +++ b/mm/swap_slots.c @@ -267,8 +267,6 @@ int free_swap_slot(swp_entry_t entry) { struct swap_slots_cache *cache; - WARN_ON_ONCE(!swap_slot_cache_initialized); - cache = &get_cpu_var(swp_slots); if (use_swap_slot_cache && cache->slots_ret) { spin_lock_irq(&cache->free_lock); -- cgit v1.2.3 From 64897b20eed29cee2b998fb5ba362e65523dba3c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 21 Mar 2017 22:19:07 +0100 Subject: cpufreq: intel_pstate: Fix policy data management in passive mode The policy->cpuinfo.max_freq and policy->max updates in intel_cpufreq_turbo_update() are excessive as they are done for no good reason and may lead to problems in principle, so they should be dropped. However, after dropping them intel_cpufreq_turbo_update() becomes almost entirely pointless, because the check made by it is made again down the road in intel_pstate_prepare_request(). The only thing in it that still needs to be done is the call to update_turbo_state(), so drop intel_cpufreq_turbo_update() altogether and make its callers invoke update_turbo_state() directly instead of it. In addition to that, fix intel_cpufreq_verify_policy() so that it checks global.no_turbo in addition to global.turbo_disabled when updating policy->cpuinfo.max_freq to make it consistent with intel_pstate_verify_policy(). Fixes: 001c76f05b01 (cpufreq: intel_pstate: Generic governors support) Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 7b07803e7042..283491f742d3 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2257,7 +2257,7 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy) struct cpudata *cpu = all_cpu_data[policy->cpu]; update_turbo_state(); - policy->cpuinfo.max_freq = global.turbo_disabled ? + policy->cpuinfo.max_freq = global.no_turbo || global.turbo_disabled ? cpu->pstate.max_freq : cpu->pstate.turbo_freq; cpufreq_verify_within_cpu_limits(policy); @@ -2265,26 +2265,6 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy) return 0; } -static unsigned int intel_cpufreq_turbo_update(struct cpudata *cpu, - struct cpufreq_policy *policy, - unsigned int target_freq) -{ - unsigned int max_freq; - - update_turbo_state(); - - max_freq = global.no_turbo || global.turbo_disabled ? - cpu->pstate.max_freq : cpu->pstate.turbo_freq; - policy->cpuinfo.max_freq = max_freq; - if (policy->max > max_freq) - policy->max = max_freq; - - if (target_freq > max_freq) - target_freq = max_freq; - - return target_freq; -} - static int intel_cpufreq_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) @@ -2293,8 +2273,10 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy, struct cpufreq_freqs freqs; int target_pstate; + update_turbo_state(); + freqs.old = policy->cur; - freqs.new = intel_cpufreq_turbo_update(cpu, policy, target_freq); + freqs.new = target_freq; cpufreq_freq_transition_begin(policy, &freqs); switch (relation) { @@ -2326,7 +2308,8 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy, struct cpudata *cpu = all_cpu_data[policy->cpu]; int target_pstate; - target_freq = intel_cpufreq_turbo_update(cpu, policy, target_freq); + update_turbo_state(); + target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling); target_pstate = intel_pstate_prepare_request(cpu, target_pstate); intel_pstate_update_pstate(cpu, target_pstate); -- cgit v1.2.3 From ad0a45fd9c14feebd000b6e84189d0edff265170 Mon Sep 17 00:00:00 2001 From: Vaidyanathan Srinivasan Date: Sun, 19 Mar 2017 00:51:59 +0530 Subject: cpuidle: Validate cpu_dev in cpuidle_add_sysfs() If a given cpu is not in cpu_present and cpu hotplug is disabled, arch can skip setting up the cpu_dev. Arch cpuidle driver should pass correct cpu mask for registration, but failing to do so by the driver causes error to propagate and crash like this: [ 30.076045] Unable to handle kernel paging request for data at address 0x00000048 [ 30.076100] Faulting instruction address: 0xc0000000007b2f30 cpu 0x4d: Vector: 300 (Data Access) at [c000003feb18b670] pc: c0000000007b2f30: kobject_get+0x20/0x70 lr: c0000000007b3c94: kobject_add_internal+0x54/0x3f0 sp: c000003feb18b8f0 msr: 9000000000009033 dar: 48 dsisr: 40000000 current = 0xc000003fd2ed8300 paca = 0xc00000000fbab500 softe: 0 irq_happened: 0x01 pid = 1, comm = swapper/0 Linux version 4.11.0-rc2-svaidy+ (sv@sagarika) (gcc version 6.2.0 20161005 (Ubuntu 6.2.0-5ubuntu12) ) #10 SMP Sun Mar 19 00:08:09 IST 2017 enter ? for help [c000003feb18b960] c0000000007b3c94 kobject_add_internal+0x54/0x3f0 [c000003feb18b9f0] c0000000007b43a4 kobject_init_and_add+0x64/0xa0 [c000003feb18ba70] c000000000e284f4 cpuidle_add_sysfs+0xb4/0x130 [c000003feb18baf0] c000000000e26038 cpuidle_register_device+0x118/0x1c0 [c000003feb18bb30] c000000000e26c48 cpuidle_register+0x78/0x120 [c000003feb18bbc0] c00000000168fd9c powernv_processor_idle_init+0x110/0x1c4 [c000003feb18bc40] c00000000000cff8 do_one_initcall+0x68/0x1d0 [c000003feb18bd00] c0000000016242f4 kernel_init_freeable+0x280/0x360 [c000003feb18bdc0] c00000000000d864 kernel_init+0x24/0x160 [c000003feb18be30] c00000000000b4e8 ret_from_kernel_thread+0x5c/0x74 Validating cpu_dev fixes the crash and reports correct error message like: [ 30.163506] Failed to register cpuidle device for cpu136 [ 30.173329] Registration of powernv driver failed. Signed-off-by: Vaidyanathan Srinivasan [ rjw: Comment massage ] Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/sysfs.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index c5adc8c9ac43..ae948b1da93a 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -615,6 +615,18 @@ int cpuidle_add_sysfs(struct cpuidle_device *dev) struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu); int error; + /* + * Return if cpu_device is not setup for this CPU. + * + * This could happen if the arch did not set up cpu_device + * since this CPU is not in cpu_present mask and the + * driver did not send a correct CPU mask during registration. + * Without this check we would end up passing bogus + * value for &cpu_dev->kobj in kobject_init_and_add() + */ + if (!cpu_dev) + return -ENODEV; + kdev = kzalloc(sizeof(*kdev), GFP_KERNEL); if (!kdev) return -ENOMEM; -- cgit v1.2.3 From 98d068ab52b4b11d403995ed14154660797e7136 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 14 Mar 2017 14:15:20 +0800 Subject: r8152: fix the list rx_done may be used without initialization The list rx_done would be initialized when the linking on occurs. Therefore, if a napi is scheduled without any linking on before, the following kernel panic would happen. BUG: unable to handle kernel NULL pointer dereference at 000000000000008 IP: [] r8152_poll+0xe1e/0x1210 [r8152] PGD 0 Oops: 0002 [#1] SMP Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 986243c932cc..bb3eedd07fbe 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1362,6 +1362,7 @@ static int alloc_all_mem(struct r8152 *tp) spin_lock_init(&tp->rx_lock); spin_lock_init(&tp->tx_lock); INIT_LIST_HEAD(&tp->tx_free); + INIT_LIST_HEAD(&tp->rx_done); skb_queue_head_init(&tp->tx_queue); skb_queue_head_init(&tp->rx_queue); -- cgit v1.2.3 From 8a0f5ccfb33b0b8b51de65b7b3bf342ba10b4fb6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 14 Mar 2017 18:25:57 +0800 Subject: crypto: deadlock between crypto_alg_sem/rtnl_mutex/genl_mutex On Tue, Mar 14, 2017 at 10:44:10AM +0100, Dmitry Vyukov wrote: > > Yes, please. > Disregarding some reports is not a good way long term. Please try this patch. ---8<--- Subject: netlink: Annotate nlk cb_mutex by protocol Currently all occurences of nlk->cb_mutex are annotated by lockdep as a single class. This causes a false lcokdep cycle involving genl and crypto_user. This patch fixes it by dividing cb_mutex into individual classes based on the netlink protocol. As genl and crypto_user do not use the same netlink protocol this breaks the false dependency loop. Reported-by: Dmitry Vyukov Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 7b73c7c161a9..596eaff66649 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -96,6 +96,44 @@ EXPORT_SYMBOL_GPL(nl_table); static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); +static struct lock_class_key nlk_cb_mutex_keys[MAX_LINKS]; + +static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = { + "nlk_cb_mutex-ROUTE", + "nlk_cb_mutex-1", + "nlk_cb_mutex-USERSOCK", + "nlk_cb_mutex-FIREWALL", + "nlk_cb_mutex-SOCK_DIAG", + "nlk_cb_mutex-NFLOG", + "nlk_cb_mutex-XFRM", + "nlk_cb_mutex-SELINUX", + "nlk_cb_mutex-ISCSI", + "nlk_cb_mutex-AUDIT", + "nlk_cb_mutex-FIB_LOOKUP", + "nlk_cb_mutex-CONNECTOR", + "nlk_cb_mutex-NETFILTER", + "nlk_cb_mutex-IP6_FW", + "nlk_cb_mutex-DNRTMSG", + "nlk_cb_mutex-KOBJECT_UEVENT", + "nlk_cb_mutex-GENERIC", + "nlk_cb_mutex-17", + "nlk_cb_mutex-SCSITRANSPORT", + "nlk_cb_mutex-ECRYPTFS", + "nlk_cb_mutex-RDMA", + "nlk_cb_mutex-CRYPTO", + "nlk_cb_mutex-SMC", + "nlk_cb_mutex-23", + "nlk_cb_mutex-24", + "nlk_cb_mutex-25", + "nlk_cb_mutex-26", + "nlk_cb_mutex-27", + "nlk_cb_mutex-28", + "nlk_cb_mutex-29", + "nlk_cb_mutex-30", + "nlk_cb_mutex-31", + "nlk_cb_mutex-MAX_LINKS" +}; + static int netlink_dump(struct sock *sk); static void netlink_skb_destructor(struct sk_buff *skb); @@ -585,6 +623,9 @@ static int __netlink_create(struct net *net, struct socket *sock, } else { nlk->cb_mutex = &nlk->cb_def_mutex; mutex_init(nlk->cb_mutex); + lockdep_set_class_and_name(nlk->cb_mutex, + nlk_cb_mutex_keys + protocol, + nlk_cb_mutex_key_strings[protocol]); } init_waitqueue_head(&nlk->wait); -- cgit v1.2.3 From 36d277bac8080202684e67162ebb157f16631581 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Wed, 15 Mar 2017 09:32:14 +0800 Subject: vsock: track pkt owner vsock So that we can cancel a queued pkt later if necessary. Signed-off-by: Peng Tao Signed-off-by: David S. Miller --- include/linux/virtio_vsock.h | 3 +++ net/vmw_vsock/virtio_transport_common.c | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 9638bfeb0d1f..584f9a647ad4 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -48,6 +48,8 @@ struct virtio_vsock_pkt { struct virtio_vsock_hdr hdr; struct work_struct work; struct list_head list; + /* socket refcnt not held, only use for cancellation */ + struct vsock_sock *vsk; void *buf; u32 len; u32 off; @@ -56,6 +58,7 @@ struct virtio_vsock_pkt { struct virtio_vsock_pkt_info { u32 remote_cid, remote_port; + struct vsock_sock *vsk; struct msghdr *msg; u32 pkt_len; u16 type; diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 8d592a45b597..af087b44ceea 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -58,6 +58,7 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info, pkt->len = len; pkt->hdr.len = cpu_to_le32(len); pkt->reply = info->reply; + pkt->vsk = info->vsk; if (info->msg && len > 0) { pkt->buf = kmalloc(len, GFP_KERNEL); @@ -180,6 +181,7 @@ static int virtio_transport_send_credit_update(struct vsock_sock *vsk, struct virtio_vsock_pkt_info info = { .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE, .type = type, + .vsk = vsk, }; return virtio_transport_send_pkt_info(vsk, &info); @@ -519,6 +521,7 @@ int virtio_transport_connect(struct vsock_sock *vsk) struct virtio_vsock_pkt_info info = { .op = VIRTIO_VSOCK_OP_REQUEST, .type = VIRTIO_VSOCK_TYPE_STREAM, + .vsk = vsk, }; return virtio_transport_send_pkt_info(vsk, &info); @@ -534,6 +537,7 @@ int virtio_transport_shutdown(struct vsock_sock *vsk, int mode) VIRTIO_VSOCK_SHUTDOWN_RCV : 0) | (mode & SEND_SHUTDOWN ? VIRTIO_VSOCK_SHUTDOWN_SEND : 0), + .vsk = vsk, }; return virtio_transport_send_pkt_info(vsk, &info); @@ -560,6 +564,7 @@ virtio_transport_stream_enqueue(struct vsock_sock *vsk, .type = VIRTIO_VSOCK_TYPE_STREAM, .msg = msg, .pkt_len = len, + .vsk = vsk, }; return virtio_transport_send_pkt_info(vsk, &info); @@ -581,6 +586,7 @@ static int virtio_transport_reset(struct vsock_sock *vsk, .op = VIRTIO_VSOCK_OP_RST, .type = VIRTIO_VSOCK_TYPE_STREAM, .reply = !!pkt, + .vsk = vsk, }; /* Send RST only if the original pkt is not a RST pkt */ @@ -826,6 +832,7 @@ virtio_transport_send_response(struct vsock_sock *vsk, .remote_cid = le64_to_cpu(pkt->hdr.src_cid), .remote_port = le32_to_cpu(pkt->hdr.src_port), .reply = true, + .vsk = vsk, }; return virtio_transport_send_pkt_info(vsk, &info); -- cgit v1.2.3 From 16320f363ae128d9b9c70e60f00f2a572f57c23d Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Wed, 15 Mar 2017 09:32:15 +0800 Subject: vhost-vsock: add pkt cancel capability To allow canceling all packets of a connection. Reviewed-by: Stefan Hajnoczi Reviewed-by: Jorgen Hansen Signed-off-by: Peng Tao Signed-off-by: David S. Miller --- drivers/vhost/vsock.c | 41 +++++++++++++++++++++++++++++++++++++++++ include/net/af_vsock.h | 3 +++ 2 files changed, 44 insertions(+) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index ce5e63d2c66a..44eed8eb0725 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -223,6 +223,46 @@ vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt) return len; } +static int +vhost_transport_cancel_pkt(struct vsock_sock *vsk) +{ + struct vhost_vsock *vsock; + struct virtio_vsock_pkt *pkt, *n; + int cnt = 0; + LIST_HEAD(freeme); + + /* Find the vhost_vsock according to guest context id */ + vsock = vhost_vsock_get(vsk->remote_addr.svm_cid); + if (!vsock) + return -ENODEV; + + spin_lock_bh(&vsock->send_pkt_list_lock); + list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) { + if (pkt->vsk != vsk) + continue; + list_move(&pkt->list, &freeme); + } + spin_unlock_bh(&vsock->send_pkt_list_lock); + + list_for_each_entry_safe(pkt, n, &freeme, list) { + if (pkt->reply) + cnt++; + list_del(&pkt->list); + virtio_transport_free_pkt(pkt); + } + + if (cnt) { + struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX]; + int new_cnt; + + new_cnt = atomic_sub_return(cnt, &vsock->queued_replies); + if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num) + vhost_poll_queue(&tx_vq->poll); + } + + return 0; +} + static struct virtio_vsock_pkt * vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq, unsigned int out, unsigned int in) @@ -675,6 +715,7 @@ static struct virtio_transport vhost_transport = { .release = virtio_transport_release, .connect = virtio_transport_connect, .shutdown = virtio_transport_shutdown, + .cancel_pkt = vhost_transport_cancel_pkt, .dgram_enqueue = virtio_transport_dgram_enqueue, .dgram_dequeue = virtio_transport_dgram_dequeue, diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index f2758964ce6f..f32ed9ac181a 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -100,6 +100,9 @@ struct vsock_transport { void (*destruct)(struct vsock_sock *); void (*release)(struct vsock_sock *); + /* Cancel all pending packets sent on vsock. */ + int (*cancel_pkt)(struct vsock_sock *vsk); + /* Connections. */ int (*connect)(struct vsock_sock *); -- cgit v1.2.3 From 073b4f2c50fe67c7c66a059a4d6db52bb1465490 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Wed, 15 Mar 2017 09:32:16 +0800 Subject: vsock: add pkt cancel capability Reviewed-by: Stefan Hajnoczi Signed-off-by: Peng Tao Signed-off-by: David S. Miller --- net/vmw_vsock/virtio_transport.c | 42 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 9d24c0e958b1..68675a151f22 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -213,6 +213,47 @@ virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt) return len; } +static int +virtio_transport_cancel_pkt(struct vsock_sock *vsk) +{ + struct virtio_vsock *vsock; + struct virtio_vsock_pkt *pkt, *n; + int cnt = 0; + LIST_HEAD(freeme); + + vsock = virtio_vsock_get(); + if (!vsock) { + return -ENODEV; + } + + spin_lock_bh(&vsock->send_pkt_list_lock); + list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) { + if (pkt->vsk != vsk) + continue; + list_move(&pkt->list, &freeme); + } + spin_unlock_bh(&vsock->send_pkt_list_lock); + + list_for_each_entry_safe(pkt, n, &freeme, list) { + if (pkt->reply) + cnt++; + list_del(&pkt->list); + virtio_transport_free_pkt(pkt); + } + + if (cnt) { + struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX]; + int new_cnt; + + new_cnt = atomic_sub_return(cnt, &vsock->queued_replies); + if (new_cnt + cnt >= virtqueue_get_vring_size(rx_vq) && + new_cnt < virtqueue_get_vring_size(rx_vq)) + queue_work(virtio_vsock_workqueue, &vsock->rx_work); + } + + return 0; +} + static void virtio_vsock_rx_fill(struct virtio_vsock *vsock) { int buf_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; @@ -462,6 +503,7 @@ static struct virtio_transport virtio_transport = { .release = virtio_transport_release, .connect = virtio_transport_connect, .shutdown = virtio_transport_shutdown, + .cancel_pkt = virtio_transport_cancel_pkt, .dgram_bind = virtio_transport_dgram_bind, .dgram_dequeue = virtio_transport_dgram_dequeue, -- cgit v1.2.3 From 380feae0def7e6a115124a3219c3ec9b654dca32 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Wed, 15 Mar 2017 09:32:17 +0800 Subject: vsock: cancel packets when failing to connect Otherwise we'll leave the packets queued until releasing vsock device. E.g., if guest is slow to start up, resulting ETIMEDOUT on connect, guest will get the connect requests from failed host sockets. Reviewed-by: Stefan Hajnoczi Reviewed-by: Jorgen Hansen Signed-off-by: Peng Tao Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 9f770f33c100..6f7f6757ceef 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1102,10 +1102,19 @@ static const struct proto_ops vsock_dgram_ops = { .sendpage = sock_no_sendpage, }; +static int vsock_transport_cancel_pkt(struct vsock_sock *vsk) +{ + if (!transport->cancel_pkt) + return -EOPNOTSUPP; + + return transport->cancel_pkt(vsk); +} + static void vsock_connect_timeout(struct work_struct *work) { struct sock *sk; struct vsock_sock *vsk; + int cancel = 0; vsk = container_of(work, struct vsock_sock, dwork.work); sk = sk_vsock(vsk); @@ -1116,8 +1125,11 @@ static void vsock_connect_timeout(struct work_struct *work) sk->sk_state = SS_UNCONNECTED; sk->sk_err = ETIMEDOUT; sk->sk_error_report(sk); + cancel = 1; } release_sock(sk); + if (cancel) + vsock_transport_cancel_pkt(vsk); sock_put(sk); } @@ -1224,11 +1236,13 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, err = sock_intr_errno(timeout); sk->sk_state = SS_UNCONNECTED; sock->state = SS_UNCONNECTED; + vsock_transport_cancel_pkt(vsk); goto out_wait; } else if (timeout == 0) { err = -ETIMEDOUT; sk->sk_state = SS_UNCONNECTED; sock->state = SS_UNCONNECTED; + vsock_transport_cancel_pkt(vsk); goto out_wait; } -- cgit v1.2.3 From 0be032c190abcdcfa948082b6a1e0d461184ba4d Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 16 Mar 2017 21:00:29 +0800 Subject: MIPS: c-r4k: Fix Loongson-3's vcache/scache waysize calculation If scache.waysize is 0, r4k___flush_cache_all() will do nothing and then cause bugs. BTW, though vcache.waysize isn't being used by now, we also fix its calculation. Signed-off-by: Huacai Chen Cc: John Crispin Cc: Steven J . Hill Cc: Fuxin Zhang Cc: Zhangjin Wu Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/15756/ Signed-off-by: Ralf Baechle --- arch/mips/mm/c-r4k.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index e7f798d55fbc..3fe99cb271a9 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -1562,6 +1562,7 @@ static void probe_vcache(void) vcache_size = c->vcache.sets * c->vcache.ways * c->vcache.linesz; c->vcache.waybit = 0; + c->vcache.waysize = vcache_size / c->vcache.ways; pr_info("Unified victim cache %ldkB %s, linesize %d bytes.\n", vcache_size >> 10, way_string[c->vcache.ways], c->vcache.linesz); @@ -1664,6 +1665,7 @@ static void __init loongson3_sc_init(void) /* Loongson-3 has 4 cores, 1MB scache for each. scaches are shared */ scache_size *= 4; c->scache.waybit = 0; + c->scache.waysize = scache_size / c->scache.ways; pr_info("Unified secondary cache %ldkB %s, linesize %d bytes.\n", scache_size >> 10, way_string[c->scache.ways], c->scache.linesz); if (scache_size) -- cgit v1.2.3 From 7df9c24625b9981779afb8fcdbe2bb4765e61147 Mon Sep 17 00:00:00 2001 From: Andrey Ulanov Date: Tue, 14 Mar 2017 20:16:42 -0700 Subject: net: unix: properly re-increment inflight counter of GC discarded candidates Dmitry has reported that a BUG_ON() condition in unix_notinflight() may be triggered by a simple code that forwards unix socket in an SCM_RIGHTS message. That is caused by incorrect unix socket GC implementation in unix_gc(). The GC first collects list of candidates, then (a) decrements their "children's" inflight counter, (b) checks which inflight counters are now 0, and then (c) increments all inflight counters back. (a) and (c) are done by calling scan_children() with inc_inflight or dec_inflight as the second argument. Commit 6209344f5a37 ("net: unix: fix inflight counting bug in garbage collector") changed scan_children() such that it no longer considers sockets that do not have UNIX_GC_CANDIDATE flag. It also added a block of code that that unsets this flag _before_ invoking scan_children(, dec_iflight, ). This may lead to incorrect inflight counters for some sockets. This change fixes this bug by changing order of operations: UNIX_GC_CANDIDATE is now unset only after all inflight counters are restored to the original state. kernel BUG at net/unix/garbage.c:149! RIP: 0010:[] [] unix_notinflight+0x3b4/0x490 net/unix/garbage.c:149 Call Trace: [] unix_detach_fds.isra.19+0xff/0x170 net/unix/af_unix.c:1487 [] unix_destruct_scm+0xf9/0x210 net/unix/af_unix.c:1496 [] skb_release_head_state+0x101/0x200 net/core/skbuff.c:655 [] skb_release_all+0x1a/0x60 net/core/skbuff.c:668 [] __kfree_skb+0x1a/0x30 net/core/skbuff.c:684 [] kfree_skb+0x184/0x570 net/core/skbuff.c:705 [] unix_release_sock+0x5b5/0xbd0 net/unix/af_unix.c:559 [] unix_release+0x49/0x90 net/unix/af_unix.c:836 [] sock_release+0x92/0x1f0 net/socket.c:570 [] sock_close+0x1b/0x20 net/socket.c:1017 [] __fput+0x34e/0x910 fs/file_table.c:208 [] ____fput+0x1a/0x20 fs/file_table.c:244 [] task_work_run+0x1a0/0x280 kernel/task_work.c:116 [< inline >] exit_task_work include/linux/task_work.h:21 [] do_exit+0x183a/0x2640 kernel/exit.c:828 [] do_group_exit+0x14e/0x420 kernel/exit.c:931 [] get_signal+0x663/0x1880 kernel/signal.c:2307 [] do_signal+0xc5/0x2190 arch/x86/kernel/signal.c:807 [] exit_to_usermode_loop+0x1ea/0x2d0 arch/x86/entry/common.c:156 [< inline >] prepare_exit_to_usermode arch/x86/entry/common.c:190 [] syscall_return_slowpath+0x4d3/0x570 arch/x86/entry/common.c:259 [] entry_SYSCALL_64_fastpath+0xc4/0xc6 Link: https://lkml.org/lkml/2017/3/6/252 Signed-off-by: Andrey Ulanov Reported-by: Dmitry Vyukov Fixes: 6209344 ("net: unix: fix inflight counting bug in garbage collector") Signed-off-by: David S. Miller --- net/unix/garbage.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 6a0d48525fcf..c36757e72844 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -146,6 +146,7 @@ void unix_notinflight(struct user_struct *user, struct file *fp) if (s) { struct unix_sock *u = unix_sk(s); + BUG_ON(!atomic_long_read(&u->inflight)); BUG_ON(list_empty(&u->link)); if (atomic_long_dec_and_test(&u->inflight)) @@ -341,6 +342,14 @@ void unix_gc(void) } list_del(&cursor); + /* Now gc_candidates contains only garbage. Restore original + * inflight counters for these as well, and remove the skbuffs + * which are creating the cycle(s). + */ + skb_queue_head_init(&hitlist); + list_for_each_entry(u, &gc_candidates, link) + scan_children(&u->sk, inc_inflight, &hitlist); + /* not_cycle_list contains those sockets which do not make up a * cycle. Restore these to the inflight list. */ @@ -350,14 +359,6 @@ void unix_gc(void) list_move_tail(&u->link, &gc_inflight_list); } - /* Now gc_candidates contains only garbage. Restore original - * inflight counters for these as well, and remove the skbuffs - * which are creating the cycle(s). - */ - skb_queue_head_init(&hitlist); - list_for_each_entry(u, &gc_candidates, link) - scan_children(&u->sk, inc_inflight, &hitlist); - spin_unlock(&unix_gc_lock); /* Here we are. Hitlist is filled. Die. */ -- cgit v1.2.3 From 09050957fae896e001498af1aa35c446a11cb47d Mon Sep 17 00:00:00 2001 From: Yaroslav Isakov Date: Thu, 16 Mar 2017 22:44:10 +0300 Subject: tun: fix inability to set offloads after disabling them via ethtool Added missing logic in tun driver, which prevents apps to set offloads using tun ioctl, if offloads were previously disabled via ethtool Signed-off-by: Yaroslav Isakov Signed-off-by: David S. Miller --- drivers/net/tun.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 34cc3c590aa5..cc88cd7856f5 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1931,6 +1931,8 @@ static int set_offload(struct tun_struct *tun, unsigned long arg) return -EINVAL; tun->set_features = features; + tun->dev->wanted_features &= ~TUN_USER_FEATURES; + tun->dev->wanted_features |= features; netdev_update_features(tun->dev); return 0; -- cgit v1.2.3 From aea92fb2e09e29653b023d4254ac9fbf94221538 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 17 Mar 2017 08:05:28 -0700 Subject: sch_dsmark: fix invalid skb_cow() usage skb_cow(skb, sizeof(ip header)) is not very helpful in this context. First we need to use pskb_may_pull() to make sure the ip header is in skb linear part, then use skb_try_make_writable() to address clones issues. Fixes: 4c30719f4f55 ("[PKT_SCHED] dsmark: handle cloned and non-linear skb's") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_dsmark.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 802ac7c2e5e8..5334e309f17f 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -201,9 +201,13 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p); if (p->set_tc_index) { + int wlen = skb_network_offset(skb); + switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): - if (skb_cow_head(skb, sizeof(struct iphdr))) + wlen += sizeof(struct iphdr); + if (!pskb_may_pull(skb, wlen) || + skb_try_make_writable(skb, wlen)) goto drop; skb->tc_index = ipv4_get_dsfield(ip_hdr(skb)) @@ -211,7 +215,9 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, break; case htons(ETH_P_IPV6): - if (skb_cow_head(skb, sizeof(struct ipv6hdr))) + wlen += sizeof(struct ipv6hdr); + if (!pskb_may_pull(skb, wlen) || + skb_try_make_writable(skb, wlen)) goto drop; skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb)) -- cgit v1.2.3 From 6bd845d1cf98b45c634baacb8381436dad3c2dd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Fri, 17 Mar 2017 17:20:48 +0100 Subject: qmi_wwan: add Dell DW5811e MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a Dell branded Sierra Wireless EM7455. It is operating in MBIM mode by default, but can be configured to provide two QMI/RMNET functions. Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 805674550683..f8d55aa058ec 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -925,6 +925,8 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x413c, 0x81a9, 8)}, /* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */ {QMI_FIXED_INTF(0x413c, 0x81b1, 8)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card */ {QMI_FIXED_INTF(0x413c, 0x81b3, 8)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */ + {QMI_FIXED_INTF(0x413c, 0x81b6, 8)}, /* Dell Wireless 5811e */ + {QMI_FIXED_INTF(0x413c, 0x81b6, 10)}, /* Dell Wireless 5811e */ {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */ {QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */ {QMI_FIXED_INTF(0x1e0e, 0x9001, 5)}, /* SIMCom 7230E */ -- cgit v1.2.3 From 13e2d5187f6b965ba3556caedb914baf81b98ed2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 17 Mar 2017 23:52:35 +0300 Subject: bna: integer overflow bug in debugfs We could allocate less memory than intended because we do: bnad->regdata = kzalloc(len << 2, GFP_KERNEL); The shift can overflow leading to a crash. This is debugfs code so the impact is very small. Fixes: 7afc5dbde091 ("bna: Add debugfs interface.") Signed-off-by: Dan Carpenter Acked-by: Rasesh Mody Signed-off-by: David S. Miller --- drivers/net/ethernet/brocade/bna/bnad_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c index 05c1c1dd7751..cebfe3bd086e 100644 --- a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c +++ b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c @@ -325,7 +325,7 @@ bnad_debugfs_write_regrd(struct file *file, const char __user *buf, return PTR_ERR(kern_buf); rc = sscanf(kern_buf, "%x:%x", &addr, &len); - if (rc < 2) { + if (rc < 2 || len > UINT_MAX >> 2) { netdev_warn(bnad->netdev, "failed to read user buffer\n"); kfree(kern_buf); return -EINVAL; -- cgit v1.2.3 From 3dc857f0e8fc22610a59cbb346ba62c6e921863f Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 17 Mar 2017 16:07:11 -0700 Subject: net: vrf: Reset rt6i_idev in local dst after put The VRF driver takes a reference to the inet6_dev on the VRF device for its rt6_local dst when handling local traffic through the VRF device as a loopback. When the device is deleted the driver does a put on the idev but does not reset rt6i_idev in the rt6_info struct. When the dst is destroyed, dst_destroy calls ip6_dst_destroy which does a second put for what is essentially the same reference causing it to be prematurely freed. Reset rt6i_idev after the put in the vrf driver. Fixes: b4869aa2f881e ("net: vrf: ipv6 support for local traffic to local addresses") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index fea687f35b5a..d6988db1930d 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -462,8 +462,10 @@ static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) } if (rt6_local) { - if (rt6_local->rt6i_idev) + if (rt6_local->rt6i_idev) { in6_dev_put(rt6_local->rt6i_idev); + rt6_local->rt6i_idev = NULL; + } dst = &rt6_local->dst; dev_put(dst->dev); -- cgit v1.2.3 From 486a43db2e26b87125b5629e1ade516f90833934 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 18 Mar 2017 19:12:22 +0800 Subject: sctp: remove temporary variable confirm from sctp_packet_transmit Commit c86a773c7802 ("sctp: add dst_pending_confirm flag") introduced a temporary variable "confirm" in sctp_packet_transmit. But it broke the rule that longer lines should be above shorter ones. Besides, this variable is not necessary, so this patch is to just remove it and use tp->dst_pending_confirm directly. Fixes: c86a773c7802 ("sctp: add dst_pending_confirm flag") Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/output.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/sctp/output.c b/net/sctp/output.c index 71ce6b945dcb..1224421036b3 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -546,7 +546,6 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) struct sctp_association *asoc = tp->asoc; struct sctp_chunk *chunk, *tmp; int pkt_count, gso = 0; - int confirm; struct dst_entry *dst; struct sk_buff *head; struct sctphdr *sh; @@ -625,13 +624,13 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) asoc->peer.last_sent_to = tp; } head->ignore_df = packet->ipfragok; - confirm = tp->dst_pending_confirm; - if (confirm) + if (tp->dst_pending_confirm) skb_set_dst_pending_confirm(head, 1); /* neighbour should be confirmed on successful transmission or * positive error */ - if (tp->af_specific->sctp_xmit(head, tp) >= 0 && confirm) + if (tp->af_specific->sctp_xmit(head, tp) >= 0 && + tp->dst_pending_confirm) tp->dst_pending_confirm = 0; out: -- cgit v1.2.3 From 1f904495b79003cd3d881de8731377d48fcbc7e3 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 18 Mar 2017 19:27:23 +0800 Subject: sctp: define dst_pending_confirm as a bit in sctp_transport As tp->dst_pending_confirm's value can only be set 0 or 1, this patch is to change to define it as a bit instead of __u32. Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 07a0b128625a..4f645198e9bd 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -753,6 +753,8 @@ struct sctp_transport { /* Is the Path MTU update pending on this tranport */ pmtu_pending:1, + dst_pending_confirm:1, /* need to confirm neighbour */ + /* Has this transport moved the ctsn since we last sacked */ sack_generation:1; u32 dst_cookie; @@ -806,8 +808,6 @@ struct sctp_transport { __u32 burst_limited; /* Holds old cwnd when max.burst is applied */ - __u32 dst_pending_confirm; /* need to confirm neighbour */ - /* Destination */ struct dst_entry *dst; /* Source address. */ -- cgit v1.2.3 From 23bb09cfbe04076ef647da3889a5a5ab6cbe6f15 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 18 Mar 2017 20:03:59 +0800 Subject: sctp: out_qlen should be updated when pruning unsent queue This patch is to fix the issue that sctp_prsctp_prune_sent forgot to update q->out_qlen when removing a chunk from unsent queue. Fixes: 8dbdf1f5b09c ("sctp: implement prsctp PRIO policy") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index db352e5d61f8..025ccff67072 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -382,17 +382,18 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, } static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, - struct sctp_sndrcvinfo *sinfo, - struct list_head *queue, int msg_len) + struct sctp_sndrcvinfo *sinfo, int msg_len) { + struct sctp_outq *q = &asoc->outqueue; struct sctp_chunk *chk, *temp; - list_for_each_entry_safe(chk, temp, queue, list) { + list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) { if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) continue; list_del_init(&chk->list); + q->out_qlen -= chk->skb->len; asoc->sent_cnt_removable--; asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; @@ -431,9 +432,7 @@ void sctp_prsctp_prune(struct sctp_association *asoc, return; } - sctp_prsctp_prune_unsent(asoc, sinfo, - &asoc->outqueue.out_chunk_list, - msg_len); + sctp_prsctp_prune_unsent(asoc, sinfo, msg_len); } /* Mark all the eligible packets on a transport for retransmission. */ -- cgit v1.2.3 From ff010472fb75670cb5c08671e820eeea3af59c87 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 21 Mar 2017 11:36:06 +0530 Subject: cpufreq: Restore policy min/max limits on CPU online On CPU online the cpufreq core restores the previous governor (or the previous "policy" setting for ->setpolicy drivers), but it does not restore the min/max limits at the same time, which is confusing, inconsistent and real pain for users who set the limits and then suspend/resume the system (using full suspend), in which case the limits are reset on all CPUs except for the boot one. Fix this by making cpufreq_online() restore the limits when an inactive policy is brought online. The commit log and patch are inspired from Rafael's earlier work. Reported-by: Rafael J. Wysocki Signed-off-by: Viresh Kumar Cc: 4.3+ # 4.3+ Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index b8ff617d449d..5dbdd261aa73 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1184,6 +1184,9 @@ static int cpufreq_online(unsigned int cpu) for_each_cpu(j, policy->related_cpus) per_cpu(cpufreq_cpu_data, j) = policy; write_unlock_irqrestore(&cpufreq_driver_lock, flags); + } else { + policy->min = policy->user_policy.min; + policy->max = policy->user_policy.max; } if (cpufreq_driver->get && !cpufreq_driver->setpolicy) { -- cgit v1.2.3 From 8605330aac5a5785630aec8f64378a54891937cc Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Sat, 18 Mar 2017 17:02:59 -0400 Subject: tcp: fix SCM_TIMESTAMPING_OPT_STATS for normal skbs __sock_recv_timestamp can be called for both normal skbs (for receive timestamps) and for skbs on the error queue (for transmit timestamps). Commit 1c885808e456 (tcp: SOF_TIMESTAMPING_OPT_STATS option for SO_TIMESTAMPING) assumes any skb passed to __sock_recv_timestamp are from the error queue, containing OPT_STATS in the content of the skb. This results in accessing invalid memory or generating junk data. To fix this, set skb->pkt_type to PACKET_OUTGOING for packets on the error queue. This is safe because on the receive path on local sockets skb->pkt_type is never set to PACKET_OUTGOING. With that, copy OPT_STATS from a packet, only if its pkt_type is PACKET_OUTGOING. Fixes: 1c885808e456 ("tcp: SOF_TIMESTAMPING_OPT_STATS option for SO_TIMESTAMPING") Reported-by: JongHwan Kim Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Eric Dumazet Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/core/skbuff.c | 10 ++++++++++ net/socket.c | 13 ++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index cd4ba8c6b609..b1fbd1958eb6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3694,6 +3694,15 @@ static void sock_rmem_free(struct sk_buff *skb) atomic_sub(skb->truesize, &sk->sk_rmem_alloc); } +static void skb_set_err_queue(struct sk_buff *skb) +{ + /* pkt_type of skbs received on local sockets is never PACKET_OUTGOING. + * So, it is safe to (mis)use it to mark skbs on the error queue. + */ + skb->pkt_type = PACKET_OUTGOING; + BUILD_BUG_ON(PACKET_OUTGOING == 0); +} + /* * Note: We dont mem charge error packets (no sk_forward_alloc changes) */ @@ -3707,6 +3716,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_rmem_free; atomic_add(skb->truesize, &sk->sk_rmem_alloc); + skb_set_err_queue(skb); /* before exiting rcu section, make sure dst is refcounted */ skb_dst_force(skb); diff --git a/net/socket.c b/net/socket.c index e034fe4164be..692d6989d2c2 100644 --- a/net/socket.c +++ b/net/socket.c @@ -652,6 +652,16 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg, } EXPORT_SYMBOL(kernel_sendmsg); +static bool skb_is_err_queue(const struct sk_buff *skb) +{ + /* pkt_type of skbs enqueued on the error queue are set to + * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do + * in recvmsg, since skbs received on a local socket will never + * have a pkt_type of PACKET_OUTGOING. + */ + return skb->pkt_type == PACKET_OUTGOING; +} + /* * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) */ @@ -695,7 +705,8 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING, sizeof(tss), &tss); - if (skb->len && (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS)) + if (skb_is_err_queue(skb) && skb->len && + (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS)) put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS, skb->len, skb->data); } -- cgit v1.2.3 From 4ef1b2869447411ad3ef91ad7d4891a83c1a509a Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Sat, 18 Mar 2017 17:03:00 -0400 Subject: tcp: mark skbs with SCM_TIMESTAMPING_OPT_STATS SOF_TIMESTAMPING_OPT_STATS can be enabled and disabled while packets are collected on the error queue. So, checking SOF_TIMESTAMPING_OPT_STATS in sk->sk_tsflags is not enough to safely assume that the skb contains OPT_STATS data. Add a bit in sock_exterr_skb to indicate whether the skb contains opt_stats data. Fixes: 1c885808e456 ("tcp: SOF_TIMESTAMPING_OPT_STATS option for SO_TIMESTAMPING") Reported-by: JongHwan Kim Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Eric Dumazet Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/errqueue.h | 2 ++ net/core/skbuff.c | 17 +++++++++++------ net/socket.c | 2 +- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/include/linux/errqueue.h b/include/linux/errqueue.h index 9ca23fcfb5d7..6fdfc884fdeb 100644 --- a/include/linux/errqueue.h +++ b/include/linux/errqueue.h @@ -20,6 +20,8 @@ struct sock_exterr_skb { struct sock_extended_err ee; u16 addr_offset; __be16 port; + u8 opt_stats:1, + unused:7; }; #endif diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b1fbd1958eb6..9f781092fda9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3793,16 +3793,20 @@ EXPORT_SYMBOL(skb_clone_sk); static void __skb_complete_tx_timestamp(struct sk_buff *skb, struct sock *sk, - int tstype) + int tstype, + bool opt_stats) { struct sock_exterr_skb *serr; int err; + BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb)); + serr = SKB_EXT_ERR(skb); memset(serr, 0, sizeof(*serr)); serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; serr->ee.ee_info = tstype; + serr->opt_stats = opt_stats; if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) { serr->ee.ee_data = skb_shinfo(skb)->tskey; if (sk->sk_protocol == IPPROTO_TCP && @@ -3843,7 +3847,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, */ if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) { *skb_hwtstamps(skb) = *hwtstamps; - __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); + __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false); sock_put(sk); } } @@ -3854,7 +3858,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, struct sock *sk, int tstype) { struct sk_buff *skb; - bool tsonly; + bool tsonly, opt_stats = false; if (!sk) return; @@ -3867,9 +3871,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, #ifdef CONFIG_INET if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) && sk->sk_protocol == IPPROTO_TCP && - sk->sk_type == SOCK_STREAM) + sk->sk_type == SOCK_STREAM) { skb = tcp_get_timestamping_opt_stats(sk); - else + opt_stats = true; + } else #endif skb = alloc_skb(0, GFP_ATOMIC); } else { @@ -3888,7 +3893,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, else skb->tstamp = ktime_get_real(); - __skb_complete_tx_timestamp(skb, sk, tstype); + __skb_complete_tx_timestamp(skb, sk, tstype, opt_stats); } EXPORT_SYMBOL_GPL(__skb_tstamp_tx); diff --git a/net/socket.c b/net/socket.c index 692d6989d2c2..985ef06792d6 100644 --- a/net/socket.c +++ b/net/socket.c @@ -706,7 +706,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, SCM_TIMESTAMPING, sizeof(tss), &tss); if (skb_is_err_queue(skb) && skb->len && - (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS)) + SKB_EXT_ERR(skb)->opt_stats) put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS, skb->len, skb->data); } -- cgit v1.2.3 From e8f1f34a344d060eaf1918089369c4c1172a153b Mon Sep 17 00:00:00 2001 From: Zi Shen Lim Date: Sun, 19 Mar 2017 23:03:14 -0700 Subject: selftests/bpf: fix broken build, take 2 Merge of 'linux-kselftest-4.11-rc1': 1. Partially removed use of 'test_objs' target, breaking force rebuild of BPFOBJ, introduced in commit d498f8719a09 ("bpf: Rebuild bpf.o for any dependency update"). Update target so dependency on BPFOBJ is restored. 2. Introduced commit 2047f1d8ba28 ("selftests: Fix the .c linking rule") which fixes order of LDLIBS. Commit d02d8986a768 ("bpf: Always test unprivileged programs") added libcap dependency into CFLAGS. Use LDLIBS instead to fix linking of test_verifier. 3. Introduced commit d83c3ba0b926 ("selftests: Fix selftests build to just build, not run tests"). Reordering the Makefile allows us to remove the 'all' target. Tested both: selftests/bpf$ make and selftests$ make TARGETS=bpf on Ubuntu 16.04.2. Signed-off-by: Zi Shen Lim Acked-by: Daniel Borkmann Tested-by: Daniel Borkmann Acked-by: Alexei Starovoitov Tested-by: Alexei Starovoitov Acked-by: Shuah Khan Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/Makefile | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 67531f47781b..6a1ad58cb66f 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -1,22 +1,23 @@ LIBDIR := ../../../lib -BPFOBJ := $(LIBDIR)/bpf/bpf.o +BPFDIR := $(LIBDIR)/bpf -CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR) $(BPFOBJ) +CFLAGS += -Wall -O2 -I../../../include/uapi -I$(LIBDIR) +LDLIBS += -lcap TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map TEST_PROGS := test_kmod.sh -all: $(TEST_GEN_PROGS) +include ../lib.mk + +BPFOBJ := $(OUTPUT)/bpf.o + +$(TEST_GEN_PROGS): $(BPFOBJ) -.PHONY: all clean force +.PHONY: force # force a rebuild of BPFOBJ when its dependencies are updated force: $(BPFOBJ): force - $(MAKE) -C $(dir $(BPFOBJ)) - -$(test_objs): $(BPFOBJ) - -include ../lib.mk + $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/ -- cgit v1.2.3 From 4071898bf0f4d79ff353db327af2a15123272548 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sun, 19 Mar 2017 09:19:57 -0700 Subject: net: qmi_wwan: Add USB IDs for MDM6600 modem on Motorola Droid 4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This gets qmicli working with the MDM6600 modem. Cc: Bjørn Mork Reviewed-by: Sebastian Reichel Tested-by: Sebastian Reichel Signed-off-by: Tony Lindgren Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index f8d55aa058ec..156f7f85e486 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -580,6 +580,10 @@ static const struct usb_device_id products[] = { USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, USB_CLASS_VENDOR_SPEC, 0x01, 0x69), .driver_info = (unsigned long)&qmi_wwan_info, }, + { /* Motorola Mapphone devices with MDM6600 */ + USB_VENDOR_AND_INTERFACE_INFO(0x22b8, USB_CLASS_VENDOR_SPEC, 0xfb, 0xff), + .driver_info = (unsigned long)&qmi_wwan_info, + }, /* 2. Combined interface devices matching on class+protocol */ { /* Huawei E367 and possibly others in "Windows mode" */ -- cgit v1.2.3 From bc2d4b62db67f817b09c782219996630e9c2f5e2 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Wed, 22 Mar 2017 12:35:31 +0800 Subject: drm/i915/gvt: Use force single submit flag to distinguish gvt request from i915 request In my previous Commit ab9da627906a ("drm/i915: make context status notifier head be per engine") rely on scheduler->current_workload[x] to distinguish gvt spacial request from i915 request. But this is not always true since no synchronization between workload_thread and lrc irq handler. lrc irq handler workload_thread ---- ---- pick i915 requests; intel_vgpu_submit_execlist(); current_workload[x] = xxx; shadow_context_status_change(); Then current_workload[x] is not null but current request is of i915 self. So instead we check ctx flag CONTEXT_FORCE_SINGLE_SUBMISSION. Only gvt request set this flag and always set. v2: Reverse the order of multi-condition 'if' statement. Fixes: ab9da6279 ("drm/i915: make context status notifier head be per engine") Signed-off-by: Changbin Du Reviewed-by: Yulei Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 39a83eb7aecc..d0c04155f7d5 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -127,6 +127,11 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) return 0; } +static inline bool is_gvt_request(struct drm_i915_gem_request *req) +{ + return i915_gem_context_force_single_submission(req->ctx); +} + static int shadow_context_status_change(struct notifier_block *nb, unsigned long action, void *data) { @@ -139,7 +144,7 @@ static int shadow_context_status_change(struct notifier_block *nb, struct intel_vgpu_workload *workload = scheduler->current_workload[req->engine->id]; - if (unlikely(!workload)) + if (!is_gvt_request(req) || unlikely(!workload)) return NOTIFY_OK; switch (action) { -- cgit v1.2.3 From ad4830051aac0b967add82ac168f49edf11f01a0 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 21 Mar 2017 18:16:47 -0500 Subject: x86/platform/uv: Fix calculation of Global Physical Address The calculation of the global physical address (GPA) on UV4 is incorrect. The gnode_extra/upper global offset should only be applied for fixed address space systems (UV1..3). Tested-by: John Estabrook Signed-off-by: Mike Travis Cc: Dimitri Sivanich Cc: John Estabrook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Russ Anderson Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170321231646.667689538@asylum.americas.sgi.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_hub.h | 8 +++++--- arch/x86/kernel/apic/x2apic_uv_x.c | 3 ++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 72e8300b1e8a..9cffb44a3cf5 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -485,15 +485,17 @@ static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr) if (paddr < uv_hub_info->lowmem_remap_top) paddr |= uv_hub_info->lowmem_remap_base; - paddr |= uv_hub_info->gnode_upper; - if (m_val) + + if (m_val) { + paddr |= uv_hub_info->gnode_upper; paddr = ((paddr << uv_hub_info->m_shift) >> uv_hub_info->m_shift) | ((paddr >> uv_hub_info->m_val) << uv_hub_info->n_lshift); - else + } else { paddr |= uv_soc_phys_ram_to_nasid(paddr) << uv_hub_info->gpa_shift; + } return paddr; } diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index e9f8f8cdd570..86f20cc0a65e 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -1105,7 +1105,8 @@ void __init uv_init_hub_info(struct uv_hub_info_s *hi) node_id.v = uv_read_local_mmr(UVH_NODE_ID); uv_cpuid.gnode_shift = max_t(unsigned int, uv_cpuid.gnode_shift, mn.n_val); hi->gnode_extra = (node_id.s.node_id & ~((1 << uv_cpuid.gnode_shift) - 1)) >> 1; - hi->gnode_upper = (unsigned long)hi->gnode_extra << mn.m_val; + if (mn.m_val) + hi->gnode_upper = (u64)hi->gnode_extra << mn.m_val; if (uv_gp_table) { hi->global_mmr_base = uv_gp_table->mmr_base; -- cgit v1.2.3 From dd7406dd334a98ada3ff5371847a3eeb4ba16313 Mon Sep 17 00:00:00 2001 From: Alex Hemme Date: Tue, 7 Mar 2017 14:38:29 -0500 Subject: hwmon: (max31790) Set correct PWM value Traced fans not spinning to incorrect PWM value being written. The passed in value was written instead of the calulated value. Fixes: 54187ff9d766 ("hwmon: (max31790) Convert to use new hwmon registration API") Signed-off-by: Alex Hemme Signed-off-by: Guenter Roeck --- drivers/hwmon/max31790.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/max31790.c b/drivers/hwmon/max31790.c index c1b9275978f9..281491cca510 100644 --- a/drivers/hwmon/max31790.c +++ b/drivers/hwmon/max31790.c @@ -311,7 +311,7 @@ static int max31790_write_pwm(struct device *dev, u32 attr, int channel, data->pwm[channel] = val << 8; err = i2c_smbus_write_word_swapped(client, MAX31790_REG_PWMOUT(channel), - val); + data->pwm[channel]); break; case hwmon_pwm_enable: fan_config = data->fan_config[channel]; -- cgit v1.2.3 From 8358378b22518d92424597503d3c1cd302a490b6 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 12 Mar 2017 06:18:58 -0700 Subject: hwmon: (it87) Avoid registering the same chip on both SIO addresses IT8705F is known to respond on both SIO addresses. Registering it twice may result in system lockups. Reported-by: Russell King Fixes: e84bd9535e2b ("hwmon: (it87) Add support for second Super-IO chip") Signed-off-by: Guenter Roeck --- drivers/hwmon/it87.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c index efb01c247e2d..4dfc7238313e 100644 --- a/drivers/hwmon/it87.c +++ b/drivers/hwmon/it87.c @@ -3198,7 +3198,7 @@ static int __init sm_it87_init(void) { int sioaddr[2] = { REG_2E, REG_4E }; struct it87_sio_data sio_data; - unsigned short isa_address; + unsigned short isa_address[2]; bool found = false; int i, err; @@ -3208,15 +3208,29 @@ static int __init sm_it87_init(void) for (i = 0; i < ARRAY_SIZE(sioaddr); i++) { memset(&sio_data, 0, sizeof(struct it87_sio_data)); - isa_address = 0; - err = it87_find(sioaddr[i], &isa_address, &sio_data); - if (err || isa_address == 0) + isa_address[i] = 0; + err = it87_find(sioaddr[i], &isa_address[i], &sio_data); + if (err || isa_address[i] == 0) continue; + /* + * Don't register second chip if its ISA address matches + * the first chip's ISA address. + */ + if (i && isa_address[i] == isa_address[0]) + break; - err = it87_device_add(i, isa_address, &sio_data); + err = it87_device_add(i, isa_address[i], &sio_data); if (err) goto exit_dev_unregister; + found = true; + + /* + * IT8705F may respond on both SIO addresses. + * Stop probing after finding one. + */ + if (sio_data.type == it87) + break; } if (!found) { -- cgit v1.2.3 From a5023a99393dab276069cd60dad3e61d57720fda Mon Sep 17 00:00:00 2001 From: Peter Huewe Date: Fri, 17 Mar 2017 00:28:56 +0100 Subject: hwmon: Add missing HWMON_T_ALARM Unfortunately the HWMON_T_ALARM define was missing, although the associated entry was present in hwmon_temp_attributes. This is needed to convert drivers to the new interface which use channel based alarms. Signed-off-by: Peter Huewe Signed-off-by: Guenter Roeck --- include/linux/hwmon.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index 78d59dba563e..88b673749121 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -88,6 +88,7 @@ enum hwmon_temp_attributes { #define HWMON_T_CRIT_HYST BIT(hwmon_temp_crit_hyst) #define HWMON_T_EMERGENCY BIT(hwmon_temp_emergency) #define HWMON_T_EMERGENCY_HYST BIT(hwmon_temp_emergency_hyst) +#define HWMON_T_ALARM BIT(hwmon_temp_alarm) #define HWMON_T_MIN_ALARM BIT(hwmon_temp_min_alarm) #define HWMON_T_MAX_ALARM BIT(hwmon_temp_max_alarm) #define HWMON_T_CRIT_ALARM BIT(hwmon_temp_crit_alarm) -- cgit v1.2.3 From d82c0d12c92705ef468683c9b7a8298dd61ed191 Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Cerri Date: Mon, 13 Mar 2017 12:14:58 -0300 Subject: s390/decompressor: fix initrd corruption caused by bss clear Reorder the operations in decompress_kernel() to ensure initrd is moved to a safe location before the bss section is zeroed. During decompression bss can overlap with the initrd and this can corrupt the initrd contents depending on the size of the compressed kernel (which affects where the initrd is placed by the bootloader) and the size of the bss section of the decompressor. Also use the correct initrd size when checking for overlaps with parmblock. Fixes: 06c0dd72aea3 ([S390] fix boot failures with compressed kernels) Cc: stable@vger.kernel.org Reviewed-by: Joy Latten Reviewed-by: Vineetha HariPai Signed-off-by: Marcelo Henrique Cerri Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/boot/compressed/misc.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index fa95041fa9f6..33ca29333e18 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -141,31 +141,34 @@ static void check_ipl_parmblock(void *start, unsigned long size) unsigned long decompress_kernel(void) { - unsigned long output_addr; - unsigned char *output; + void *output, *kernel_end; - output_addr = ((unsigned long) &_end + HEAP_SIZE + 4095UL) & -4096UL; - check_ipl_parmblock((void *) 0, output_addr + SZ__bss_start); - memset(&_bss, 0, &_ebss - &_bss); - free_mem_ptr = (unsigned long)&_end; - free_mem_end_ptr = free_mem_ptr + HEAP_SIZE; - output = (unsigned char *) output_addr; + output = (void *) ALIGN((unsigned long) &_end + HEAP_SIZE, PAGE_SIZE); + kernel_end = output + SZ__bss_start; + check_ipl_parmblock((void *) 0, (unsigned long) kernel_end); #ifdef CONFIG_BLK_DEV_INITRD /* * Move the initrd right behind the end of the decompressed - * kernel image. + * kernel image. This also prevents initrd corruption caused by + * bss clearing since kernel_end will always be located behind the + * current bss section.. */ - if (INITRD_START && INITRD_SIZE && - INITRD_START < (unsigned long) output + SZ__bss_start) { - check_ipl_parmblock(output + SZ__bss_start, - INITRD_START + INITRD_SIZE); - memmove(output + SZ__bss_start, - (void *) INITRD_START, INITRD_SIZE); - INITRD_START = (unsigned long) output + SZ__bss_start; + if (INITRD_START && INITRD_SIZE && kernel_end > (void *) INITRD_START) { + check_ipl_parmblock(kernel_end, INITRD_SIZE); + memmove(kernel_end, (void *) INITRD_START, INITRD_SIZE); + INITRD_START = (unsigned long) kernel_end; } #endif + /* + * Clear bss section. free_mem_ptr and free_mem_end_ptr need to be + * initialized afterwards since they reside in bss. + */ + memset(&_bss, 0, &_ebss - &_bss); + free_mem_ptr = (unsigned long) &_end; + free_mem_end_ptr = free_mem_ptr + HEAP_SIZE; + puts("Uncompressing Linux... "); __decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error); puts("Ok, booting the kernel.\n"); -- cgit v1.2.3 From 0861b5a754fd5539f527b445aaa61538185c8264 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 16 Mar 2017 11:02:36 +0100 Subject: s390/smp: fix ipl from cpu with non-zero address Commit af51160ebd3c ("s390/smp: initialize cpu_present_mask in setup_arch") initializes the cpu_present_mask much earlier than before. However the cpu detection code relies on the fact that iff logical cpu 0 is marked present then also the corresponding physical cpu address within the pcpu_devices array slot is valid. Since commit 44fd22992cb7 ("[PATCH] Register the boot-cpu in the cpu maps earlier") this assumption is not true anymore. The patch marks logical cpu 0 as present in common code without that architecture code had a chance to setup the logical to physical map. With that change the cpu detection code assumes that the physical cpu address of cpu 0 is also 0, which is not necessarily true. Subsequently the physical cpu address of the ipl cpu will be mapped to a different logical cpu. If that cpu is brought online later the ipl cpu will send itself an initial cpu reset sigp signal. This in turn completely resets the ipl cpu and the system stops working. A dump of such a system looks like a "store status" has been forgotten. But actually the kernel itself removed all traces which would allow to easily tell what went wrong. To fix this initialize the logical to physical cpu address already in smp_setup_processor_id(). In addition remove the initialization of the cpu_present_mask and cpu_online_mask for cpu 0, since that has already been done. Also add a sanity check, just in case common code will be changed again... The problem can be easily reproduced within a z/VM guest: > chcpu -d 0 > vmcp ipl Fixes: af51160ebd3c ("s390/smp: initialize cpu_present_mask in setup_arch") Reported-by: Sebastian Ott Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/smp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 47a973b5b4f1..5dab859b0d54 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -909,13 +909,11 @@ void __init smp_prepare_boot_cpu(void) { struct pcpu *pcpu = pcpu_devices; + WARN_ON(!cpu_present(0) || !cpu_online(0)); pcpu->state = CPU_STATE_CONFIGURED; - pcpu->address = stap(); pcpu->lowcore = (struct lowcore *)(unsigned long) store_prefix(); S390_lowcore.percpu_offset = __per_cpu_offset[0]; smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); - set_cpu_present(0, true); - set_cpu_online(0, true); } void __init smp_cpus_done(unsigned int max_cpus) @@ -924,6 +922,7 @@ void __init smp_cpus_done(unsigned int max_cpus) void __init smp_setup_processor_id(void) { + pcpu_devices[0].address = stap(); S390_lowcore.cpu_nr = 0; S390_lowcore.spinlock_lockval = arch_spin_lockval(0); } -- cgit v1.2.3 From ca681ec860d6533b8debda4f6ab2e70e6d519d89 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Wed, 15 Mar 2017 10:58:07 +0100 Subject: s390/pkey: Fix wrong handling of secure key with old MKVP When a secure key with an old Master Key Verification Pattern was given to the pkey_findcard function, there was no responsible card found because only the current MKVP of each card was compared. With this fix also the old MKVP values are considered and so a matching card able to handle the key is reported back to the caller. Signed-off-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/pkey_api.c | 53 +++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index 40f1136f5568..058db724b5a2 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -572,6 +572,12 @@ int pkey_sec2protkey(u16 cardnr, u16 domain, rc = -EIO; goto out; } + if (prepcblk->ccp_rscode != 0) { + DEBUG_WARN( + "pkey_sec2protkey unwrap secure key warning, card response %d/%d\n", + (int) prepcblk->ccp_rtcode, + (int) prepcblk->ccp_rscode); + } /* process response cprb param block */ prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX); @@ -761,9 +767,10 @@ out: } /* - * Fetch just the mkvp value via query_crypto_facility from adapter. + * Fetch the current and old mkvp values via + * query_crypto_facility from adapter. */ -static int fetch_mkvp(u16 cardnr, u16 domain, u64 *mkvp) +static int fetch_mkvp(u16 cardnr, u16 domain, u64 mkvp[2]) { int rc, found = 0; size_t rlen, vlen; @@ -779,9 +786,10 @@ static int fetch_mkvp(u16 cardnr, u16 domain, u64 *mkvp) rc = query_crypto_facility(cardnr, domain, "STATICSA", rarray, &rlen, varray, &vlen); if (rc == 0 && rlen > 8*8 && vlen > 184+8) { - if (rarray[64] == '2') { + if (rarray[8*8] == '2') { /* current master key state is valid */ - *mkvp = *((u64 *)(varray + 184)); + mkvp[0] = *((u64 *)(varray + 184)); + mkvp[1] = *((u64 *)(varray + 172)); found = 1; } } @@ -796,14 +804,14 @@ struct mkvp_info { struct list_head list; u16 cardnr; u16 domain; - u64 mkvp; + u64 mkvp[2]; }; /* a list with mkvp_info entries */ static LIST_HEAD(mkvp_list); static DEFINE_SPINLOCK(mkvp_list_lock); -static int mkvp_cache_fetch(u16 cardnr, u16 domain, u64 *mkvp) +static int mkvp_cache_fetch(u16 cardnr, u16 domain, u64 mkvp[2]) { int rc = -ENOENT; struct mkvp_info *ptr; @@ -812,7 +820,7 @@ static int mkvp_cache_fetch(u16 cardnr, u16 domain, u64 *mkvp) list_for_each_entry(ptr, &mkvp_list, list) { if (ptr->cardnr == cardnr && ptr->domain == domain) { - *mkvp = ptr->mkvp; + memcpy(mkvp, ptr->mkvp, 2 * sizeof(u64)); rc = 0; break; } @@ -822,7 +830,7 @@ static int mkvp_cache_fetch(u16 cardnr, u16 domain, u64 *mkvp) return rc; } -static void mkvp_cache_update(u16 cardnr, u16 domain, u64 mkvp) +static void mkvp_cache_update(u16 cardnr, u16 domain, u64 mkvp[2]) { int found = 0; struct mkvp_info *ptr; @@ -831,7 +839,7 @@ static void mkvp_cache_update(u16 cardnr, u16 domain, u64 mkvp) list_for_each_entry(ptr, &mkvp_list, list) { if (ptr->cardnr == cardnr && ptr->domain == domain) { - ptr->mkvp = mkvp; + memcpy(ptr->mkvp, mkvp, 2 * sizeof(u64)); found = 1; break; } @@ -844,7 +852,7 @@ static void mkvp_cache_update(u16 cardnr, u16 domain, u64 mkvp) } ptr->cardnr = cardnr; ptr->domain = domain; - ptr->mkvp = mkvp; + memcpy(ptr->mkvp, mkvp, 2 * sizeof(u64)); list_add(&ptr->list, &mkvp_list); } spin_unlock_bh(&mkvp_list_lock); @@ -888,8 +896,8 @@ int pkey_findcard(const struct pkey_seckey *seckey, struct secaeskeytoken *t = (struct secaeskeytoken *) seckey; struct zcrypt_device_matrix *device_matrix; u16 card, dom; - u64 mkvp; - int i, rc; + u64 mkvp[2]; + int i, rc, oi = -1; /* mkvp must not be zero */ if (t->mkvp == 0) @@ -910,14 +918,14 @@ int pkey_findcard(const struct pkey_seckey *seckey, device_matrix->device[i].functions & 0x04) { /* an enabled CCA Coprocessor card */ /* try cached mkvp */ - if (mkvp_cache_fetch(card, dom, &mkvp) == 0 && - t->mkvp == mkvp) { + if (mkvp_cache_fetch(card, dom, mkvp) == 0 && + t->mkvp == mkvp[0]) { if (!verify) break; /* verify: fetch mkvp from adapter */ - if (fetch_mkvp(card, dom, &mkvp) == 0) { + if (fetch_mkvp(card, dom, mkvp) == 0) { mkvp_cache_update(card, dom, mkvp); - if (t->mkvp == mkvp) + if (t->mkvp == mkvp[0]) break; } } @@ -936,14 +944,21 @@ int pkey_findcard(const struct pkey_seckey *seckey, card = AP_QID_CARD(device_matrix->device[i].qid); dom = AP_QID_QUEUE(device_matrix->device[i].qid); /* fresh fetch mkvp from adapter */ - if (fetch_mkvp(card, dom, &mkvp) == 0) { + if (fetch_mkvp(card, dom, mkvp) == 0) { mkvp_cache_update(card, dom, mkvp); - if (t->mkvp == mkvp) + if (t->mkvp == mkvp[0]) break; + if (t->mkvp == mkvp[1] && oi < 0) + oi = i; } } + if (i >= MAX_ZDEV_ENTRIES && oi >= 0) { + /* old mkvp matched, use this card then */ + card = AP_QID_CARD(device_matrix->device[oi].qid); + dom = AP_QID_QUEUE(device_matrix->device[oi].qid); + } } - if (i < MAX_ZDEV_ENTRIES) { + if (i < MAX_ZDEV_ENTRIES || oi >= 0) { if (pcardnr) *pcardnr = card; if (pdomain) -- cgit v1.2.3 From 26a37ab319a26d330bab298770d692bb9c852aff Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 20 Mar 2017 14:40:30 -0700 Subject: x86/mce: Fix copy/paste error in exception table entries Back in commit: 92b0729c34cab ("x86/mm, x86/mce: Add memcpy_mcsafe()") ... I made a copy/paste error setting up the exception table entries and ended up with two for label .L_cache_w3 and none for .L_cache_w2. This means that if we take a machine check on: .L_cache_w2: movq 2*8(%rsi), %r10 then we don't have an exception table entry for this instruction and we can't recover. Fix: s/3/2/ Signed-off-by: Tony Luck Cc: Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 92b0729c34cab ("x86/mm, x86/mce: Add memcpy_mcsafe()") Link: http://lkml.kernel.org/r/1490046030-25862-1-git-send-email-tony.luck@intel.com Signed-off-by: Ingo Molnar --- arch/x86/lib/memcpy_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 779782f58324..9a53a06e5a3e 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -290,7 +290,7 @@ EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled) _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail) -- cgit v1.2.3 From de288e36fe33f7e06fa272bc8e2f85aa386d99aa Mon Sep 17 00:00:00 2001 From: Janusz Dziedzic Date: Mon, 13 Mar 2017 14:11:32 +0200 Subject: usb: dwc3: gadget: delay unmap of bounced requests In the case of bounced ep0 requests, we must delay DMA operation until after ->complete() otherwise we might overwrite contents of req->buf. This caused problems with RNDIS gadget. Signed-off-by: Janusz Dziedzic Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/gadget.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 0d75158e43fe..79e7a3480d51 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -171,6 +171,7 @@ void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req, int status) { struct dwc3 *dwc = dep->dwc; + unsigned int unmap_after_complete = false; req->started = false; list_del(&req->list); @@ -180,11 +181,19 @@ void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req, if (req->request.status == -EINPROGRESS) req->request.status = status; - if (dwc->ep0_bounced && dep->number <= 1) + /* + * NOTICE we don't want to unmap before calling ->complete() if we're + * dealing with a bounced ep0 request. If we unmap it here, we would end + * up overwritting the contents of req->buf and this could confuse the + * gadget driver. + */ + if (dwc->ep0_bounced && dep->number <= 1) { dwc->ep0_bounced = false; - - usb_gadget_unmap_request_by_dev(dwc->sysdev, - &req->request, req->direction); + unmap_after_complete = true; + } else { + usb_gadget_unmap_request_by_dev(dwc->sysdev, + &req->request, req->direction); + } trace_dwc3_gadget_giveback(req); @@ -192,6 +201,10 @@ void dwc3_gadget_giveback(struct dwc3_ep *dep, struct dwc3_request *req, usb_gadget_giveback_request(&dep->endpoint, &req->request); spin_lock(&dwc->lock); + if (unmap_after_complete) + usb_gadget_unmap_request_by_dev(dwc->sysdev, + &req->request, req->direction); + if (dep->number > 1) pm_runtime_put(dwc->dev); } -- cgit v1.2.3 From 74098c4ac782afd71b35ac56f9536ae2f5cd7da7 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 14 Mar 2017 12:09:56 +0100 Subject: usb: gadget: acm: fix endianness in notifications MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The gadget code exports the bitfield for serial status changes over the wire in its internal endianness. The fix is to convert to little endian before sending it over the wire. Signed-off-by: Oliver Neukum Tested-by: 家瑋 CC: Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_acm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_acm.c b/drivers/usb/gadget/function/f_acm.c index a30766ca4226..5e3828d9dac7 100644 --- a/drivers/usb/gadget/function/f_acm.c +++ b/drivers/usb/gadget/function/f_acm.c @@ -535,13 +535,15 @@ static int acm_notify_serial_state(struct f_acm *acm) { struct usb_composite_dev *cdev = acm->port.func.config->cdev; int status; + __le16 serial_state; spin_lock(&acm->lock); if (acm->notify_req) { dev_dbg(&cdev->gadget->dev, "acm ttyGS%d serial state %04x\n", acm->port_num, acm->serial_state); + serial_state = cpu_to_le16(acm->serial_state); status = acm_cdc_notify(acm, USB_CDC_NOTIFY_SERIAL_STATE, - 0, &acm->serial_state, sizeof(acm->serial_state)); + 0, &serial_state, sizeof(acm->serial_state)); } else { acm->pending = true; status = 0; -- cgit v1.2.3 From 09424c50b7dff40cb30011c09114404a4656e023 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 8 Mar 2017 16:05:43 +0200 Subject: usb: gadget: f_uvc: Fix SuperSpeed companion descriptor's wBytesPerInterval The streaming_maxburst module parameter is 0 offset (0..15) so we must add 1 while using it for wBytesPerInterval calculation for the SuperSpeed companion descriptor. Without this host uvcvideo driver will always see the wrong wBytesPerInterval for SuperSpeed uvc gadget and may not find a suitable video interface endpoint. e.g. for streaming_maxburst = 0 case it will always fail as wBytePerInterval was evaluating to 0. Cc: stable@vger.kernel.org Reviewed-by: Laurent Pinchart Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_uvc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c index 29b41b5dee04..c7689d05356c 100644 --- a/drivers/usb/gadget/function/f_uvc.c +++ b/drivers/usb/gadget/function/f_uvc.c @@ -625,7 +625,7 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f) uvc_ss_streaming_comp.bMaxBurst = opts->streaming_maxburst; uvc_ss_streaming_comp.wBytesPerInterval = cpu_to_le16(max_packet_size * max_packet_mult * - opts->streaming_maxburst); + (opts->streaming_maxburst + 1)); /* Allocate endpoints. */ ep = usb_ep_autoconfig(cdev->gadget, &uvc_control_ep); -- cgit v1.2.3 From 16bb05d98c904a4f6c5ce7e2d992299f794acbf2 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 8 Mar 2017 16:05:44 +0200 Subject: usb: gadget: f_uvc: Sanity check wMaxPacketSize for SuperSpeed As per USB3.0 Specification "Table 9-20. Standard Endpoint Descriptor", for interrupt and isochronous endpoints, wMaxPacketSize must be set to 1024 if the endpoint defines bMaxBurst to be greater than zero. Reviewed-by: Laurent Pinchart Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_uvc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c index c7689d05356c..f8a1881609a2 100644 --- a/drivers/usb/gadget/function/f_uvc.c +++ b/drivers/usb/gadget/function/f_uvc.c @@ -594,6 +594,14 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f) opts->streaming_maxpacket = clamp(opts->streaming_maxpacket, 1U, 3072U); opts->streaming_maxburst = min(opts->streaming_maxburst, 15U); + /* For SS, wMaxPacketSize has to be 1024 if bMaxBurst is not 0 */ + if (opts->streaming_maxburst && + (opts->streaming_maxpacket % 1024) != 0) { + opts->streaming_maxpacket = roundup(opts->streaming_maxpacket, 1024); + INFO(cdev, "overriding streaming_maxpacket to %d\n", + opts->streaming_maxpacket); + } + /* Fill in the FS/HS/SS Video Streaming specific descriptors from the * module parameters. * -- cgit v1.2.3 From 1f459262b0e1649a1e5ad12fa4c66eb76c2220ce Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 10 Mar 2017 15:39:32 -0600 Subject: usb: gadget: udc: remove pointer dereference after free Remove pointer dereference after free. Addresses-Coverity-ID: 1091173 Acked-by: Michal Nazarewicz Signed-off-by: Gustavo A. R. Silva Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/pch_udc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/gadget/udc/pch_udc.c b/drivers/usb/gadget/udc/pch_udc.c index a97da645c1b9..8a365aad66fe 100644 --- a/drivers/usb/gadget/udc/pch_udc.c +++ b/drivers/usb/gadget/udc/pch_udc.c @@ -1523,7 +1523,6 @@ static void pch_udc_free_dma_chain(struct pch_udc_dev *dev, td = phys_to_virt(addr); addr2 = (dma_addr_t)td->next; pci_pool_free(dev->data_requests, td, addr); - td->next = 0x00; addr = addr2; } req->chain_len = 1; -- cgit v1.2.3 From 25cd9721c2b16ee0d775e36ec3af31f392003f80 Mon Sep 17 00:00:00 2001 From: Krzysztof Opasiak Date: Tue, 31 Jan 2017 18:12:31 +0100 Subject: usb: gadget: f_hid: fix: Don't access hidg->req without spinlock held hidg->req should be accessed only with write_spinlock held as it is set to NULL when we get disabled by host. Signed-off-by: Krzysztof Opasiak Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_hid.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index 89b48bcc377a..5eea44823ca0 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -367,7 +367,7 @@ try_again: count = min_t(unsigned, count, hidg->report_length); spin_unlock_irqrestore(&hidg->write_spinlock, flags); - status = copy_from_user(hidg->req->buf, buffer, count); + status = copy_from_user(req->buf, buffer, count); if (status != 0) { ERROR(hidg->func.config->cdev, @@ -378,9 +378,9 @@ try_again: spin_lock_irqsave(&hidg->write_spinlock, flags); - /* we our function has been disabled by host */ + /* when our function has been disabled by host */ if (!hidg->req) { - free_ep_req(hidg->in_ep, hidg->req); + free_ep_req(hidg->in_ep, req); /* * TODO * Should we fail with error here? @@ -394,7 +394,7 @@ try_again: req->complete = f_hidg_req_complete; req->context = hidg; - status = usb_ep_queue(hidg->in_ep, hidg->req, GFP_ATOMIC); + status = usb_ep_queue(hidg->in_ep, req, GFP_ATOMIC); if (status < 0) { ERROR(hidg->func.config->cdev, "usb_ep_queue error on int endpoint %zd\n", status); -- cgit v1.2.3 From 90400c5884b89a5db232049721ef4dc1ab2f1f1c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 24 Feb 2017 14:35:54 +0200 Subject: extcon: int3496: Rename GPIO pins in accordance with binding Update GPIO pin names in extcon-intel-int3496.c driver to follow the existing extcon binding. Signed-off-by: Andy Shevchenko Signed-off-by: Chanwoo Choi --- drivers/extcon/extcon-intel-int3496.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/extcon/extcon-intel-int3496.c b/drivers/extcon/extcon-intel-int3496.c index 38eb6cab938f..81713bf7487e 100644 --- a/drivers/extcon/extcon-intel-int3496.c +++ b/drivers/extcon/extcon-intel-int3496.c @@ -105,13 +105,13 @@ static int int3496_probe(struct platform_device *pdev) return data->usb_id_irq; } - data->gpio_vbus_en = devm_gpiod_get_index(dev, "vbus en", + data->gpio_vbus_en = devm_gpiod_get_index(dev, "vbus", INT3496_GPIO_VBUS_EN, GPIOD_ASIS); if (IS_ERR(data->gpio_vbus_en)) dev_info(dev, "can't request VBUS EN GPIO\n"); - data->gpio_usb_mux = devm_gpiod_get_index(dev, "usb mux", + data->gpio_usb_mux = devm_gpiod_get_index(dev, "mux", INT3496_GPIO_USB_MUX, GPIOD_ASIS); if (IS_ERR(data->gpio_usb_mux)) -- cgit v1.2.3 From 8cb2cbae45ae46b1e1be16950670d5c5d4408474 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 24 Feb 2017 14:35:55 +0200 Subject: extcon: int3496: Add GPIO ACPI mapping table In order to make GPIO ACPI library stricter prepare users of gpiod_get_index() to correctly behave when there no mapping is provided by firmware. Here we add explicit mapping between _CRS GpioIo() resources and their names used in the driver. Signed-off-by: Andy Shevchenko Signed-off-by: Chanwoo Choi --- Documentation/extcon/intel-int3496.txt | 5 +++++ drivers/extcon/extcon-intel-int3496.c | 20 ++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/Documentation/extcon/intel-int3496.txt b/Documentation/extcon/intel-int3496.txt index af0b366c25b7..8155dbc7fad3 100644 --- a/Documentation/extcon/intel-int3496.txt +++ b/Documentation/extcon/intel-int3496.txt @@ -20,3 +20,8 @@ Index 1: The output gpio for enabling Vbus output from the device to the otg Index 2: The output gpio for muxing of the data pins between the USB host and the USB peripheral controller, write 1 to mux to the peripheral controller + +There is a mapping between indices and GPIO connection IDs as follows + id index 0 + vbus index 1 + mux index 2 diff --git a/drivers/extcon/extcon-intel-int3496.c b/drivers/extcon/extcon-intel-int3496.c index 81713bf7487e..22a529eb1b1a 100644 --- a/drivers/extcon/extcon-intel-int3496.c +++ b/drivers/extcon/extcon-intel-int3496.c @@ -45,6 +45,17 @@ static const unsigned int int3496_cable[] = { EXTCON_NONE, }; +static const struct acpi_gpio_params id_gpios = { INT3496_GPIO_USB_ID, 0, false }; +static const struct acpi_gpio_params vbus_gpios = { INT3496_GPIO_VBUS_EN, 0, false }; +static const struct acpi_gpio_params mux_gpios = { INT3496_GPIO_USB_MUX, 0, false }; + +static const struct acpi_gpio_mapping acpi_int3496_default_gpios[] = { + { "id-gpios", &id_gpios, 1 }, + { "vbus-gpios", &vbus_gpios, 1 }, + { "mux-gpios", &mux_gpios, 1 }, + { }, +}; + static void int3496_do_usb_id(struct work_struct *work) { struct int3496_data *data = @@ -83,6 +94,13 @@ static int int3496_probe(struct platform_device *pdev) struct int3496_data *data; int ret; + ret = acpi_dev_add_driver_gpios(ACPI_COMPANION(dev), + acpi_int3496_default_gpios); + if (ret) { + dev_err(dev, "can't add GPIO ACPI mapping\n"); + return ret; + } + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; @@ -154,6 +172,8 @@ static int int3496_remove(struct platform_device *pdev) devm_free_irq(&pdev->dev, data->usb_id_irq, data); cancel_delayed_work_sync(&data->work); + acpi_dev_remove_driver_gpios(ACPI_COMPANION(&pdev->dev)); + return 0; } -- cgit v1.2.3 From 059c7874b87575e48d2c7702849ff56017059195 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Thu, 2 Mar 2017 18:10:10 +0000 Subject: extcon: int3496: Add dependency on X86 as it's Intel specific Add dependency on X86 so it doesn't show up on other arches and add a option for compile test so it still gets build coverage. Signed-off-by: Peter Robinson Signed-off-by: Chanwoo Choi --- drivers/extcon/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/extcon/Kconfig b/drivers/extcon/Kconfig index 96bbae579c0b..fc09c76248b4 100644 --- a/drivers/extcon/Kconfig +++ b/drivers/extcon/Kconfig @@ -44,7 +44,7 @@ config EXTCON_GPIO config EXTCON_INTEL_INT3496 tristate "Intel INT3496 ACPI device extcon driver" - depends on GPIOLIB && ACPI + depends on GPIOLIB && ACPI && (X86 || COMPILE_TEST) help Say Y here to enable extcon support for USB OTG ports controlled by an Intel INT3496 ACPI device. -- cgit v1.2.3 From 408c5b41d215b317ab58c98b959454407ee4a53d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 10 Mar 2017 21:52:08 +0100 Subject: extcon: int3496: Use gpiod_get instead of gpiod_get_index Now that we've an acpi mapping table we should be using gpiod_get instead of gpiod_get_index. Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Signed-off-by: Chanwoo Choi --- drivers/extcon/extcon-intel-int3496.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/extcon/extcon-intel-int3496.c b/drivers/extcon/extcon-intel-int3496.c index 22a529eb1b1a..00f0e60a00ce 100644 --- a/drivers/extcon/extcon-intel-int3496.c +++ b/drivers/extcon/extcon-intel-int3496.c @@ -108,9 +108,7 @@ static int int3496_probe(struct platform_device *pdev) data->dev = dev; INIT_DELAYED_WORK(&data->work, int3496_do_usb_id); - data->gpio_usb_id = devm_gpiod_get_index(dev, "id", - INT3496_GPIO_USB_ID, - GPIOD_IN); + data->gpio_usb_id = devm_gpiod_get(dev, "id", GPIOD_IN); if (IS_ERR(data->gpio_usb_id)) { ret = PTR_ERR(data->gpio_usb_id); dev_err(dev, "can't request USB ID GPIO: %d\n", ret); @@ -123,15 +121,11 @@ static int int3496_probe(struct platform_device *pdev) return data->usb_id_irq; } - data->gpio_vbus_en = devm_gpiod_get_index(dev, "vbus", - INT3496_GPIO_VBUS_EN, - GPIOD_ASIS); + data->gpio_vbus_en = devm_gpiod_get(dev, "vbus", GPIOD_ASIS); if (IS_ERR(data->gpio_vbus_en)) dev_info(dev, "can't request VBUS EN GPIO\n"); - data->gpio_usb_mux = devm_gpiod_get_index(dev, "mux", - INT3496_GPIO_USB_MUX, - GPIOD_ASIS); + data->gpio_usb_mux = devm_gpiod_get(dev, "mux", GPIOD_ASIS); if (IS_ERR(data->gpio_usb_mux)) dev_info(dev, "can't request USB MUX GPIO\n"); -- cgit v1.2.3 From 70216fd937fea79c20705400540fa48f86ddf1c5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 13 Mar 2017 12:28:34 +0100 Subject: extcon: int3496: Set the id pin to direction-input if necessary With the new more strict ACPI gpio code the dsdt's IoRestriction flags are honored on gpiod_get, but in some dsdt's it is wrong, so explicitly call gpiod_direction_input on the id gpio if necessary. This fixes the following errors when the int3496 code is used together with the new more strict ACPI gpio code: [ 2382.484415] gpio gpiochip1: (INT33FF:01): gpiochip_lock_as_irq: tried to flag a GPIO set as output for IRQ [ 2382.484425] gpio gpiochip1: (INT33FF:01): unable to lock HW IRQ 3 for IRQ [ 2382.484429] genirq: Failed to request resources for INT3496:00 (irq 174) on irqchip chv-gpio [ 2382.484518] intel-int3496 INT3496:00: can't request IRQ for USB ID GPIO: -22 [ 2382.500359] intel-int3496: probe of INT3496:00 failed with error -22 Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Signed-off-by: Chanwoo Choi --- drivers/extcon/extcon-intel-int3496.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/extcon/extcon-intel-int3496.c b/drivers/extcon/extcon-intel-int3496.c index 00f0e60a00ce..9d17984bbbd4 100644 --- a/drivers/extcon/extcon-intel-int3496.c +++ b/drivers/extcon/extcon-intel-int3496.c @@ -113,6 +113,9 @@ static int int3496_probe(struct platform_device *pdev) ret = PTR_ERR(data->gpio_usb_id); dev_err(dev, "can't request USB ID GPIO: %d\n", ret); return ret; + } else if (gpiod_get_direction(data->gpio_usb_id) != GPIOF_DIR_IN) { + dev_warn(dev, FW_BUG "USB ID GPIO not in input mode, fixing\n"); + gpiod_direction_input(data->gpio_usb_id); } data->usb_id_irq = gpiod_to_irq(data->gpio_usb_id); -- cgit v1.2.3 From db8466c581cca1a08b505f1319c3ecd246f16fa8 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Tue, 21 Mar 2017 14:52:25 +0000 Subject: MIPS: IRQ Stack: Unwind IRQ stack onto task stack When the separate IRQ stack was introduced, stack unwinding only proceeded as far as the top of the IRQ stack, leading to kernel backtraces being less useful, lacking the trace of what was interrupted. Fix this by providing a means for the kernel to unwind the IRQ stack onto the interrupted task stack. The processor state is saved to the kernel task stack on interrupt. The IRQ_STACK_START macro reserves an unsigned long at the top of the IRQ stack where the interrupted task stack pointer can be saved. After the active stack is switched to the IRQ stack, save the interrupted tasks stack pointer to the reserved location. Fix the stack unwinding code to look for the frame being the top of the IRQ stack and if so get the next frame from the saved location. The existing test does not work with the separate stack since the ra is no longer pointed at ret_from_{irq,exception}. The test to stop unwinding the stack 32 bytes from the top of a stack must be modified to allow unwinding to continue up to the location of the saved task stack pointer when on the IRQ stack. The low / high marks of the stack are set depending on whether the sp is on an irq stack or not. Signed-off-by: Matt Redfearn Cc: Paolo Bonzini Cc: Marcin Nowakowski Cc: Masanari Iida Cc: Chris Metcalf Cc: James Hogan Cc: Paul Burton Cc: Ingo Molnar Cc: Jason A. Donenfeld Cc: Andrew Morton Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/15788/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/irq.h | 15 +++++++++++ arch/mips/kernel/asm-offsets.c | 1 + arch/mips/kernel/genex.S | 8 ++++-- arch/mips/kernel/process.c | 56 ++++++++++++++++++++++++++++-------------- 4 files changed, 60 insertions(+), 20 deletions(-) diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h index 956db6e201d1..ddd1c918103b 100644 --- a/arch/mips/include/asm/irq.h +++ b/arch/mips/include/asm/irq.h @@ -18,9 +18,24 @@ #include #define IRQ_STACK_SIZE THREAD_SIZE +#define IRQ_STACK_START (IRQ_STACK_SIZE - sizeof(unsigned long)) extern void *irq_stack[NR_CPUS]; +/* + * The highest address on the IRQ stack contains a dummy frame put down in + * genex.S (handle_int & except_vec_vi_handler) which is structured as follows: + * + * top ------------ + * | task sp | <- irq_stack[cpu] + IRQ_STACK_START + * ------------ + * | | <- First frame of IRQ context + * ------------ + * + * task sp holds a copy of the task stack pointer where the struct pt_regs + * from exception entry can be found. + */ + static inline bool on_irq_stack(int cpu, unsigned long sp) { unsigned long low = (unsigned long)irq_stack[cpu]; diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c index bb5c5d34ba81..a670c0c11875 100644 --- a/arch/mips/kernel/asm-offsets.c +++ b/arch/mips/kernel/asm-offsets.c @@ -102,6 +102,7 @@ void output_thread_info_defines(void) DEFINE(_THREAD_SIZE, THREAD_SIZE); DEFINE(_THREAD_MASK, THREAD_MASK); DEFINE(_IRQ_STACK_SIZE, IRQ_STACK_SIZE); + DEFINE(_IRQ_STACK_START, IRQ_STACK_START); BLANK(); } diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index 2ac6c2625c13..ae810da4d499 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -215,9 +215,11 @@ NESTED(handle_int, PT_SIZE, sp) beq t0, t1, 2f /* Switch to IRQ stack */ - li t1, _IRQ_STACK_SIZE + li t1, _IRQ_STACK_START PTR_ADD sp, t0, t1 + /* Save task's sp on IRQ stack so that unwinding can follow it */ + LONG_S s1, 0(sp) 2: jal plat_irq_dispatch @@ -325,9 +327,11 @@ NESTED(except_vec_vi_handler, 0, sp) beq t0, t1, 2f /* Switch to IRQ stack */ - li t1, _IRQ_STACK_SIZE + li t1, _IRQ_STACK_START PTR_ADD sp, t0, t1 + /* Save task's sp on IRQ stack so that unwinding can follow it */ + LONG_S s1, 0(sp) 2: jalr v0 diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index fb6b6b650719..b68e10fc453d 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -488,31 +488,52 @@ unsigned long notrace unwind_stack_by_address(unsigned long stack_page, unsigned long pc, unsigned long *ra) { + unsigned long low, high, irq_stack_high; struct mips_frame_info info; unsigned long size, ofs; + struct pt_regs *regs; int leaf; - extern void ret_from_irq(void); - extern void ret_from_exception(void); if (!stack_page) return 0; /* - * If we reached the bottom of interrupt context, - * return saved pc in pt_regs. + * IRQ stacks start at IRQ_STACK_START + * task stacks at THREAD_SIZE - 32 */ - if (pc == (unsigned long)ret_from_irq || - pc == (unsigned long)ret_from_exception) { - struct pt_regs *regs; - if (*sp >= stack_page && - *sp + sizeof(*regs) <= stack_page + THREAD_SIZE - 32) { - regs = (struct pt_regs *)*sp; - pc = regs->cp0_epc; - if (!user_mode(regs) && __kernel_text_address(pc)) { - *sp = regs->regs[29]; - *ra = regs->regs[31]; - return pc; - } + low = stack_page; + if (!preemptible() && on_irq_stack(raw_smp_processor_id(), *sp)) { + high = stack_page + IRQ_STACK_START; + irq_stack_high = high; + } else { + high = stack_page + THREAD_SIZE - 32; + irq_stack_high = 0; + } + + /* + * If we reached the top of the interrupt stack, start unwinding + * the interrupted task stack. + */ + if (unlikely(*sp == irq_stack_high)) { + unsigned long task_sp = *(unsigned long *)*sp; + + /* + * Check that the pointer saved in the IRQ stack head points to + * something within the stack of the current task + */ + if (!object_is_on_stack((void *)task_sp)) + return 0; + + /* + * Follow pointer to tasks kernel stack frame where interrupted + * state was saved. + */ + regs = (struct pt_regs *)task_sp; + pc = regs->cp0_epc; + if (!user_mode(regs) && __kernel_text_address(pc)) { + *sp = regs->regs[29]; + *ra = regs->regs[31]; + return pc; } return 0; } @@ -533,8 +554,7 @@ unsigned long notrace unwind_stack_by_address(unsigned long stack_page, if (leaf < 0) return 0; - if (*sp < stack_page || - *sp + info.frame_size > stack_page + THREAD_SIZE - 32) + if (*sp < low || *sp + info.frame_size > high) return 0; if (leaf) -- cgit v1.2.3 From 5003ae1e735e6bfe4679d9bed6846274f322e77e Mon Sep 17 00:00:00 2001 From: Koos Vriezen Date: Wed, 1 Mar 2017 21:02:50 +0100 Subject: iommu/vt-d: Fix NULL pointer dereference in device_to_iommu The function device_to_iommu() in the Intel VT-d driver lacks a NULL-ptr check, resulting in this oops at boot on some platforms: BUG: unable to handle kernel NULL pointer dereference at 00000000000007ab IP: [] device_to_iommu+0x11a/0x1a0 PGD 0 [...] Call Trace: ? find_or_alloc_domain.constprop.29+0x1a/0x300 ? dw_dma_probe+0x561/0x580 [dw_dmac_core] ? __get_valid_domain_for_dev+0x39/0x120 ? __intel_map_single+0x138/0x180 ? intel_alloc_coherent+0xb6/0x120 ? sst_hsw_dsp_init+0x173/0x420 [snd_soc_sst_haswell_pcm] ? mutex_lock+0x9/0x30 ? kernfs_add_one+0xdb/0x130 ? devres_add+0x19/0x60 ? hsw_pcm_dev_probe+0x46/0xd0 [snd_soc_sst_haswell_pcm] ? platform_drv_probe+0x30/0x90 ? driver_probe_device+0x1ed/0x2b0 ? __driver_attach+0x8f/0xa0 ? driver_probe_device+0x2b0/0x2b0 ? bus_for_each_dev+0x55/0x90 ? bus_add_driver+0x110/0x210 ? 0xffffffffa11ea000 ? driver_register+0x52/0xc0 ? 0xffffffffa11ea000 ? do_one_initcall+0x32/0x130 ? free_vmap_area_noflush+0x37/0x70 ? kmem_cache_alloc+0x88/0xd0 ? do_init_module+0x51/0x1c4 ? load_module+0x1ee9/0x2430 ? show_taint+0x20/0x20 ? kernel_read_file+0xfd/0x190 ? SyS_finit_module+0xa3/0xb0 ? do_syscall_64+0x4a/0xb0 ? entry_SYSCALL64_slow_path+0x25/0x25 Code: 78 ff ff ff 4d 85 c0 74 ee 49 8b 5a 10 0f b6 9b e0 00 00 00 41 38 98 e0 00 00 00 77 da 0f b6 eb 49 39 a8 88 00 00 00 72 ce eb 8f <41> f6 82 ab 07 00 00 04 0f 85 76 ff ff ff 0f b6 4d 08 88 0e 49 RIP [] device_to_iommu+0x11a/0x1a0 RSP CR2: 00000000000007ab ---[ end trace 16f974b6d58d0aad ]--- Add the missing pointer check. Fixes: 1c387188c60f53b338c20eee32db055dfe022a9b ("iommu/vt-d: Fix IOMMU lookup for SR-IOV Virtual Functions") Signed-off-by: Koos Vriezen Cc: stable@vger.kernel.org # 4.8.15+ Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 238ad3447712..91d60493b57c 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -916,7 +916,7 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf * which we used for the IOMMU lookup. Strictly speaking * we could do this for all PCI devices; we only need to * get the BDF# from the scope table for ACPI matches. */ - if (pdev->is_virtfn) + if (pdev && pdev->is_virtfn) goto got_pdev; *bus = drhd->devices[i].bus; -- cgit v1.2.3 From 2c422257550f123049552b39f7af6e3428a60f43 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 21 Mar 2017 13:32:37 +0100 Subject: netfilter: nfnl_cthelper: fix runtime expectation policy updates We only allow runtime updates of expectation policies for timeout and maximum number of expectations, otherwise reject the update. Signed-off-by: Pablo Neira Ayuso Acked-by: Liping Zhang --- net/netfilter/nfnetlink_cthelper.c | 86 +++++++++++++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 3cd41d105407..90f291e27eb1 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -255,6 +255,89 @@ err: return ret; } +static int +nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy, + struct nf_conntrack_expect_policy *new_policy, + const struct nlattr *attr) +{ + struct nlattr *tb[NFCTH_POLICY_MAX + 1]; + int err; + + err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, + nfnl_cthelper_expect_pol); + if (err < 0) + return err; + + if (!tb[NFCTH_POLICY_NAME] || + !tb[NFCTH_POLICY_EXPECT_MAX] || + !tb[NFCTH_POLICY_EXPECT_TIMEOUT]) + return -EINVAL; + + if (nla_strcmp(tb[NFCTH_POLICY_NAME], policy->name)) + return -EBUSY; + + new_policy->max_expected = + ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX])); + new_policy->timeout = + ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT])); + + return 0; +} + +static int nfnl_cthelper_update_policy_all(struct nlattr *tb[], + struct nf_conntrack_helper *helper) +{ + struct nf_conntrack_expect_policy new_policy[helper->expect_class_max + 1]; + struct nf_conntrack_expect_policy *policy; + int i, err; + + /* Check first that all policy attributes are well-formed, so we don't + * leave things in inconsistent state on errors. + */ + for (i = 0; i < helper->expect_class_max + 1; i++) { + + if (!tb[NFCTH_POLICY_SET + i]) + return -EINVAL; + + err = nfnl_cthelper_update_policy_one(&helper->expect_policy[i], + &new_policy[i], + tb[NFCTH_POLICY_SET + i]); + if (err < 0) + return err; + } + /* Now we can safely update them. */ + for (i = 0; i < helper->expect_class_max + 1; i++) { + policy = (struct nf_conntrack_expect_policy *) + &helper->expect_policy[i]; + policy->max_expected = new_policy->max_expected; + policy->timeout = new_policy->timeout; + } + + return 0; +} + +static int nfnl_cthelper_update_policy(struct nf_conntrack_helper *helper, + const struct nlattr *attr) +{ + struct nlattr *tb[NFCTH_POLICY_SET_MAX + 1]; + unsigned int class_max; + int err; + + err = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, + nfnl_cthelper_expect_policy_set); + if (err < 0) + return err; + + if (!tb[NFCTH_POLICY_SET_NUM]) + return -EINVAL; + + class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM])); + if (helper->expect_class_max + 1 != class_max) + return -EBUSY; + + return nfnl_cthelper_update_policy_all(tb, helper); +} + static int nfnl_cthelper_update(const struct nlattr * const tb[], struct nf_conntrack_helper *helper) @@ -265,8 +348,7 @@ nfnl_cthelper_update(const struct nlattr * const tb[], return -EBUSY; if (tb[NFCTH_POLICY]) { - ret = nfnl_cthelper_parse_expect_policy(helper, - tb[NFCTH_POLICY]); + ret = nfnl_cthelper_update_policy(helper, tb[NFCTH_POLICY]); if (ret < 0) return ret; } -- cgit v1.2.3 From f83bf8da1135ca635aac8f062cad3f001fcf3a26 Mon Sep 17 00:00:00 2001 From: Jeffy Chen Date: Tue, 21 Mar 2017 15:07:10 +0800 Subject: netfilter: nfnl_cthelper: Fix memory leak We have memory leaks of nf_conntrack_helper & expect_policy. Signed-off-by: Jeffy Chen Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_cthelper.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 90f291e27eb1..2b987d2a77bc 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -216,7 +216,7 @@ nfnl_cthelper_create(const struct nlattr * const tb[], ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]); if (ret < 0) - goto err; + goto err1; strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN); helper->data_len = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN])); @@ -247,10 +247,12 @@ nfnl_cthelper_create(const struct nlattr * const tb[], ret = nf_conntrack_helper_register(helper); if (ret < 0) - goto err; + goto err2; return 0; -err: +err2: + kfree(helper->expect_policy); +err1: kfree(helper); return ret; } @@ -696,6 +698,8 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, found = true; nf_conntrack_helper_unregister(cur); + kfree(cur->expect_policy); + kfree(cur); } } /* Make sure we return success if we flush and there is no helpers */ @@ -759,6 +763,8 @@ static void __exit nfnl_cthelper_exit(void) continue; nf_conntrack_helper_unregister(cur); + kfree(cur->expect_policy); + kfree(cur); } } } -- cgit v1.2.3 From 6e2e0eea072f6f82ac0afbcfa8dc38dcc3a29fa4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 9 Feb 2017 13:09:08 -0200 Subject: [media] coda/imx-vdoa: platform_driver should not be const The device driver platform is actually written to during registration, for setting the owner field, so platform_driver_register() does not take a const pointer: drivers/media/platform/coda/imx-vdoa.c: In function 'vdoa_driver_init': drivers/media/platform/coda/imx-vdoa.c:333:213: error: passing argument 1 of '__platform_driver_register' discards 'const' qualifier from pointer target type [-Werror=discarded-qualifiers] module_platform_driver(vdoa_driver); In file included from drivers/media/platform/coda/imx-vdoa.c:22:0: include/linux/platform_device.h:199:12: note: expected 'struct platform_driver *' but argument is of type 'const struct platform_driver *' extern int __platform_driver_register(struct platform_driver *, ^~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/media/platform/coda/imx-vdoa.c: In function 'vdoa_driver_exit': drivers/media/platform/coda/imx-vdoa.c:333:626: error: passing argument 1 of 'platform_driver_unregister' discards 'const' qualifier from pointer target type [-Werror=discarded-qualifiers] Remove the modifier again. Fixes: d2fe28feaebb ("[media] coda/imx-vdoa: constify structs") Signed-off-by: Arnd Bergmann Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/coda/imx-vdoa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/coda/imx-vdoa.c b/drivers/media/platform/coda/imx-vdoa.c index 67fd8ffa60a4..669a4c82f1ff 100644 --- a/drivers/media/platform/coda/imx-vdoa.c +++ b/drivers/media/platform/coda/imx-vdoa.c @@ -321,7 +321,7 @@ static const struct of_device_id vdoa_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, vdoa_dt_ids); -static const struct platform_driver vdoa_driver = { +static struct platform_driver vdoa_driver = { .probe = vdoa_probe, .remove = vdoa_remove, .driver = { -- cgit v1.2.3 From 389e3f0d57e6ab50c207bee5ea2c4a5982a029c2 Mon Sep 17 00:00:00 2001 From: Shailendra Verma Date: Fri, 2 Dec 2016 02:48:01 -0200 Subject: [media] bdisp: Clean up file handle in open() error path The File handle is not yet added in the vdev list.So no need to call v4l2_fh_del(&ctx->fh)if it fails to create control. Signed-off-by: Shailendra Verma Reviewed-by: Fabien Dessenne Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/sti/bdisp/bdisp-v4l2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/sti/bdisp/bdisp-v4l2.c b/drivers/media/platform/sti/bdisp/bdisp-v4l2.c index 823608112d89..7918b928f058 100644 --- a/drivers/media/platform/sti/bdisp/bdisp-v4l2.c +++ b/drivers/media/platform/sti/bdisp/bdisp-v4l2.c @@ -632,8 +632,8 @@ static int bdisp_open(struct file *file) error_ctrls: bdisp_ctrls_delete(ctx); -error_fh: v4l2_fh_del(&ctx->fh); +error_fh: v4l2_fh_exit(&ctx->fh); bdisp_hw_free_nodes(ctx); mem_ctx: -- cgit v1.2.3 From 24a47426066c8ba16a4db1b20a41d63187281195 Mon Sep 17 00:00:00 2001 From: Thibault Saunier Date: Wed, 1 Feb 2017 18:05:21 -0200 Subject: [media] exynos-gsc: Do not swap cb/cr for semi planar formats In the case of semi planar formats cb and cr are in the same plane in memory, meaning that will be set to 'cb' whatever the format is, and whatever the (packed) order of those components are. Suggested-by: Nicolas Dufresne Signed-off-by: Thibault Saunier Signed-off-by: Javier Martinez Canillas Acked-by: Sylwester Nawrocki Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/exynos-gsc/gsc-core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/media/platform/exynos-gsc/gsc-core.c b/drivers/media/platform/exynos-gsc/gsc-core.c index cbb03768f5d7..0f0c389f8897 100644 --- a/drivers/media/platform/exynos-gsc/gsc-core.c +++ b/drivers/media/platform/exynos-gsc/gsc-core.c @@ -861,9 +861,7 @@ int gsc_prepare_addr(struct gsc_ctx *ctx, struct vb2_buffer *vb, if ((frame->fmt->pixelformat == V4L2_PIX_FMT_VYUY) || (frame->fmt->pixelformat == V4L2_PIX_FMT_YVYU) || - (frame->fmt->pixelformat == V4L2_PIX_FMT_NV61) || (frame->fmt->pixelformat == V4L2_PIX_FMT_YVU420) || - (frame->fmt->pixelformat == V4L2_PIX_FMT_NV21) || (frame->fmt->pixelformat == V4L2_PIX_FMT_YVU420M)) swap(addr->cb, addr->cr); -- cgit v1.2.3 From 95a49603707d982b25d17c5b70e220a05556a2f9 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 22 Mar 2017 10:14:43 +0800 Subject: blk-mq: don't complete un-started request in timeout handler When iterating busy requests in timeout handler, if the STARTED flag of one request isn't set, that means the request is being processed in block layer or driver, and isn't submitted to hardware yet. In current implementation of blk_mq_check_expired(), if the request queue becomes dying, un-started requests are handled as being completed/freed immediately. This way is wrong, and can cause rq corruption or double allocation[1][2], when doing I/O and removing&resetting NVMe device at the sametime. This patch fixes several issues reported by Yi Zhang. [1]. oops log 1 [ 581.789754] ------------[ cut here ]------------ [ 581.789758] kernel BUG at block/blk-mq.c:374! [ 581.789760] invalid opcode: 0000 [#1] SMP [ 581.789761] Modules linked in: vfat fat ipmi_ssif intel_rapl sb_edac edac_core x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm nvme irqbypass crct10dif_pclmul nvme_core crc32_pclmul ghash_clmulni_intel intel_cstate ipmi_si mei_me ipmi_devintf intel_uncore sg ipmi_msghandler intel_rapl_perf iTCO_wdt mei iTCO_vendor_support mxm_wmi lpc_ich dcdbas shpchp pcspkr acpi_power_meter wmi nfsd auth_rpcgss nfs_acl lockd dm_multipath grace sunrpc ip_tables xfs libcrc32c sd_mod mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm ahci libahci crc32c_intel tg3 libata megaraid_sas i2c_core ptp fjes pps_core dm_mirror dm_region_hash dm_log dm_mod [ 581.789796] CPU: 1 PID: 1617 Comm: kworker/1:1H Not tainted 4.10.0.bz1420297+ #4 [ 581.789797] Hardware name: Dell Inc. PowerEdge R730xd/072T6D, BIOS 2.2.5 09/06/2016 [ 581.789804] Workqueue: kblockd blk_mq_timeout_work [ 581.789806] task: ffff8804721c8000 task.stack: ffffc90006ee4000 [ 581.789809] RIP: 0010:blk_mq_end_request+0x58/0x70 [ 581.789810] RSP: 0018:ffffc90006ee7d50 EFLAGS: 00010202 [ 581.789811] RAX: 0000000000000001 RBX: ffff8802e4195340 RCX: ffff88028e2f4b88 [ 581.789812] RDX: 0000000000001000 RSI: 0000000000001000 RDI: 0000000000000000 [ 581.789813] RBP: ffffc90006ee7d60 R08: 0000000000000003 R09: ffff88028e2f4b00 [ 581.789814] R10: 0000000000001000 R11: 0000000000000001 R12: 00000000fffffffb [ 581.789815] R13: ffff88042abe5780 R14: 000000000000002d R15: ffff88046fbdff80 [ 581.789817] FS: 0000000000000000(0000) GS:ffff88047fc00000(0000) knlGS:0000000000000000 [ 581.789818] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 581.789819] CR2: 00007f64f403a008 CR3: 000000014d078000 CR4: 00000000001406e0 [ 581.789820] Call Trace: [ 581.789825] blk_mq_check_expired+0x76/0x80 [ 581.789828] bt_iter+0x45/0x50 [ 581.789830] blk_mq_queue_tag_busy_iter+0xdd/0x1f0 [ 581.789832] ? blk_mq_rq_timed_out+0x70/0x70 [ 581.789833] ? blk_mq_rq_timed_out+0x70/0x70 [ 581.789840] ? __switch_to+0x140/0x450 [ 581.789841] blk_mq_timeout_work+0x88/0x170 [ 581.789845] process_one_work+0x165/0x410 [ 581.789847] worker_thread+0x137/0x4c0 [ 581.789851] kthread+0x101/0x140 [ 581.789853] ? rescuer_thread+0x3b0/0x3b0 [ 581.789855] ? kthread_park+0x90/0x90 [ 581.789860] ret_from_fork+0x2c/0x40 [ 581.789861] Code: 48 85 c0 74 0d 44 89 e6 48 89 df ff d0 5b 41 5c 5d c3 48 8b bb 70 01 00 00 48 85 ff 75 0f 48 89 df e8 7d f0 ff ff 5b 41 5c 5d c3 <0f> 0b e8 71 f0 ff ff 90 eb e9 0f 1f 40 00 66 2e 0f 1f 84 00 00 [ 581.789882] RIP: blk_mq_end_request+0x58/0x70 RSP: ffffc90006ee7d50 [ 581.789889] ---[ end trace bcaf03d9a14a0a70 ]--- [2]. oops log2 [ 6984.857362] BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 [ 6984.857372] IP: nvme_queue_rq+0x6e6/0x8cd [nvme] [ 6984.857373] PGD 0 [ 6984.857374] [ 6984.857376] Oops: 0000 [#1] SMP [ 6984.857379] Modules linked in: ipmi_ssif vfat fat intel_rapl sb_edac edac_core x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel ipmi_si iTCO_wdt iTCO_vendor_support mxm_wmi ipmi_devintf intel_cstate sg dcdbas intel_uncore mei_me intel_rapl_perf mei pcspkr lpc_ich ipmi_msghandler shpchp acpi_power_meter wmi nfsd auth_rpcgss dm_multipath nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sd_mod mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect crc32c_intel sysimgblt fb_sys_fops ttm nvme drm nvme_core ahci libahci i2c_core tg3 libata ptp megaraid_sas pps_core fjes dm_mirror dm_region_hash dm_log dm_mod [ 6984.857416] CPU: 7 PID: 1635 Comm: kworker/7:1H Not tainted 4.10.0-2.el7.bz1420297.x86_64 #1 [ 6984.857417] Hardware name: Dell Inc. PowerEdge R730xd/072T6D, BIOS 2.2.5 09/06/2016 [ 6984.857427] Workqueue: kblockd blk_mq_run_work_fn [ 6984.857429] task: ffff880476e3da00 task.stack: ffffc90002e90000 [ 6984.857432] RIP: 0010:nvme_queue_rq+0x6e6/0x8cd [nvme] [ 6984.857433] RSP: 0018:ffffc90002e93c50 EFLAGS: 00010246 [ 6984.857434] RAX: 0000000000000000 RBX: ffff880275646600 RCX: 0000000000001000 [ 6984.857435] RDX: 0000000000000fff RSI: 00000002fba2a000 RDI: ffff8804734e6950 [ 6984.857436] RBP: ffffc90002e93d30 R08: 0000000000002000 R09: 0000000000001000 [ 6984.857437] R10: 0000000000001000 R11: 0000000000000000 R12: ffff8804741d8000 [ 6984.857438] R13: 0000000000000040 R14: ffff880475649f80 R15: ffff8804734e6780 [ 6984.857439] FS: 0000000000000000(0000) GS:ffff88047fcc0000(0000) knlGS:0000000000000000 [ 6984.857440] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 6984.857442] CR2: 0000000000000010 CR3: 0000000001c09000 CR4: 00000000001406e0 [ 6984.857443] Call Trace: [ 6984.857451] ? mempool_free+0x2b/0x80 [ 6984.857455] ? bio_free+0x4e/0x60 [ 6984.857459] blk_mq_dispatch_rq_list+0xf5/0x230 [ 6984.857462] blk_mq_process_rq_list+0x133/0x170 [ 6984.857465] __blk_mq_run_hw_queue+0x8c/0xa0 [ 6984.857467] blk_mq_run_work_fn+0x12/0x20 [ 6984.857473] process_one_work+0x165/0x410 [ 6984.857475] worker_thread+0x137/0x4c0 [ 6984.857478] kthread+0x101/0x140 [ 6984.857480] ? rescuer_thread+0x3b0/0x3b0 [ 6984.857481] ? kthread_park+0x90/0x90 [ 6984.857489] ret_from_fork+0x2c/0x40 [ 6984.857490] Code: 8b bd 70 ff ff ff 89 95 50 ff ff ff 89 8d 58 ff ff ff 44 89 95 60 ff ff ff e8 b7 dd 12 e1 8b 95 50 ff ff ff 48 89 85 68 ff ff ff <4c> 8b 48 10 44 8b 58 18 8b 8d 58 ff ff ff 44 8b 95 60 ff ff ff [ 6984.857511] RIP: nvme_queue_rq+0x6e6/0x8cd [nvme] RSP: ffffc90002e93c50 [ 6984.857512] CR2: 0000000000000010 [ 6984.895359] ---[ end trace 2d7ceb528432bf83 ]--- Cc: stable@vger.kernel.org Reported-by: Yi Zhang Tested-by: Yi Zhang Reviewed-by: Bart Van Assche Reviewed-by: Hannes Reinecke Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index a4546f060e80..08a49c69738b 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -697,17 +697,8 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, { struct blk_mq_timeout_data *data = priv; - if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) { - /* - * If a request wasn't started before the queue was - * marked dying, kill it here or it'll go unnoticed. - */ - if (unlikely(blk_queue_dying(rq->q))) { - rq->errors = -EIO; - blk_mq_end_request(rq, rq->errors); - } + if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) return; - } if (time_after_eq(jiffies, rq->deadline)) { if (!blk_mark_rq_complete(rq)) -- cgit v1.2.3 From 7d2aa6b814476a2e2794960f844344519246df72 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 20 Mar 2017 10:17:56 +0100 Subject: iommu/exynos: Block SYSMMU while invalidating FLPD cache Documentation specifies that SYSMMU should be in blocked state while performing TLB/FLPD cache invalidation, so add needed calls to sysmmu_block/unblock. Fixes: 66a7ed84b345d ("iommu/exynos: Apply workaround of caching fault page table entries") CC: stable@vger.kernel.org # v4.10+ Signed-off-by: Marek Szyprowski Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index a7e0821c9967..32d43f1994e4 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -512,7 +512,10 @@ static void sysmmu_tlb_invalidate_flpdcache(struct sysmmu_drvdata *data, spin_lock_irqsave(&data->lock, flags); if (data->active && data->version >= MAKE_MMU_VER(3, 3)) { clk_enable(data->clk_master); - __sysmmu_tlb_invalidate_entry(data, iova, 1); + if (sysmmu_block(data)) { + __sysmmu_tlb_invalidate_entry(data, iova, 1); + sysmmu_unblock(data); + } clk_disable(data->clk_master); } spin_unlock_irqrestore(&data->lock, flags); -- cgit v1.2.3 From cd37a296a9f890586665bb8974a8b17ee2f17d6d Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 20 Mar 2017 10:17:57 +0100 Subject: iommu/exynos: Workaround FLPD cache flush issues for SYSMMU v5 For some unknown reasons, in some cases, FLPD cache invalidation doesn't work properly with SYSMMU v5 controllers found in Exynos5433 SoCs. This can be observed by a firmware crash during initialization phase of MFC video decoder available in the mentioned SoCs when IOMMU support is enabled. To workaround this issue perform a full TLB/FLPD invalidation in case of replacing any first level page descriptors in case of SYSMMU v5. Fixes: 740a01eee9ada ("iommu/exynos: Add support for v5 SYSMMU") CC: stable@vger.kernel.org # v4.10+ Signed-off-by: Marek Szyprowski Tested-by: Andrzej Hajda Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 32d43f1994e4..c01bfcdb2383 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -513,7 +513,10 @@ static void sysmmu_tlb_invalidate_flpdcache(struct sysmmu_drvdata *data, if (data->active && data->version >= MAKE_MMU_VER(3, 3)) { clk_enable(data->clk_master); if (sysmmu_block(data)) { - __sysmmu_tlb_invalidate_entry(data, iova, 1); + if (data->version >= MAKE_MMU_VER(5, 0)) + __sysmmu_tlb_invalidate(data); + else + __sysmmu_tlb_invalidate_entry(data, iova, 1); sysmmu_unblock(data); } clk_disable(data->clk_master); -- cgit v1.2.3 From 9d3a4de4cb8db8e71730e36736272ef041836f68 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 16 Mar 2017 17:00:16 +0000 Subject: iommu: Disambiguate MSI region types The introduction of reserved regions has left a couple of rough edges which we could do with sorting out sooner rather than later. Since we are not yet addressing the potential dynamic aspect of software-managed reservations and presenting them at arbitrary fixed addresses, it is incongruous that we end up displaying hardware vs. software-managed MSI regions to userspace differently, especially since ARM-based systems may actually require one or the other, or even potentially both at once, (which iommu-dma currently has no hope of dealing with at all). Let's resolve the former user-visible inconsistency ASAP before the ABI has been baked into a kernel release, in a way that also lays the groundwork for the latter shortcoming to be addressed by follow-up patches. For clarity, rename the software-managed type to IOMMU_RESV_SW_MSI, use IOMMU_RESV_MSI to describe the hardware type, and document everything a little bit. Since the x86 MSI remapping hardware falls squarely under this meaning of IOMMU_RESV_MSI, apply that type to their regions as well, so that we tell the same story to userspace across all platforms. Secondly, as the various region types require quite different handling, and it really makes little sense to ever try combining them, convert the bitfield-esque #defines to a plain enum in the process before anyone gets the wrong impression. Fixes: d30ddcaa7b02 ("iommu: Add a new type field in iommu_resv_region") Reviewed-by: Eric Auger CC: Alex Williamson CC: David Woodhouse CC: kvm@vger.kernel.org Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 2 +- drivers/iommu/arm-smmu-v3.c | 2 +- drivers/iommu/arm-smmu.c | 2 +- drivers/iommu/intel-iommu.c | 2 +- drivers/iommu/iommu.c | 5 +++-- drivers/vfio/vfio_iommu_type1.c | 7 +++---- include/linux/iommu.h | 18 +++++++++++++----- 7 files changed, 23 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 98940d1392cb..b17536d6e69b 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3202,7 +3202,7 @@ static void amd_iommu_get_resv_regions(struct device *dev, region = iommu_alloc_resv_region(MSI_RANGE_START, MSI_RANGE_END - MSI_RANGE_START + 1, - 0, IOMMU_RESV_RESERVED); + 0, IOMMU_RESV_MSI); if (!region) return; list_add_tail(®ion->list, head); diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 5806a6acc94e..591bb96047c9 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1888,7 +1888,7 @@ static void arm_smmu_get_resv_regions(struct device *dev, int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH, - prot, IOMMU_RESV_MSI); + prot, IOMMU_RESV_SW_MSI); if (!region) return; diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index abf6496843a6..b493c99e17f7 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1608,7 +1608,7 @@ static void arm_smmu_get_resv_regions(struct device *dev, int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH, - prot, IOMMU_RESV_MSI); + prot, IOMMU_RESV_SW_MSI); if (!region) return; diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 91d60493b57c..d412a313a372 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5249,7 +5249,7 @@ static void intel_iommu_get_resv_regions(struct device *device, reg = iommu_alloc_resv_region(IOAPIC_RANGE_START, IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1, - 0, IOMMU_RESV_RESERVED); + 0, IOMMU_RESV_MSI); if (!reg) return; list_add_tail(®->list, head); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 8ea14f41a979..3b67144dead2 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -72,6 +72,7 @@ static const char * const iommu_group_resv_type_string[] = { [IOMMU_RESV_DIRECT] = "direct", [IOMMU_RESV_RESERVED] = "reserved", [IOMMU_RESV_MSI] = "msi", + [IOMMU_RESV_SW_MSI] = "msi", }; #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ @@ -1743,8 +1744,8 @@ void iommu_put_resv_regions(struct device *dev, struct list_head *list) } struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start, - size_t length, - int prot, int type) + size_t length, int prot, + enum iommu_resv_type type) { struct iommu_resv_region *region; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index c26fa1f3ed86..32d2633092a3 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1182,8 +1182,7 @@ static struct vfio_group *find_iommu_group(struct vfio_domain *domain, return NULL; } -static bool vfio_iommu_has_resv_msi(struct iommu_group *group, - phys_addr_t *base) +static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base) { struct list_head group_resv_regions; struct iommu_resv_region *region, *next; @@ -1192,7 +1191,7 @@ static bool vfio_iommu_has_resv_msi(struct iommu_group *group, INIT_LIST_HEAD(&group_resv_regions); iommu_get_group_resv_regions(group, &group_resv_regions); list_for_each_entry(region, &group_resv_regions, list) { - if (region->type & IOMMU_RESV_MSI) { + if (region->type == IOMMU_RESV_SW_MSI) { *base = region->start; ret = true; goto out; @@ -1283,7 +1282,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, if (ret) goto out_domain; - resv_msi = vfio_iommu_has_resv_msi(iommu_group, &resv_msi_base); + resv_msi = vfio_iommu_has_sw_msi(iommu_group, &resv_msi_base); INIT_LIST_HEAD(&domain->group_list); list_add(&group->next, &domain->group_list); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 6a6de187ddc0..2e4de0deee53 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -125,9 +125,16 @@ enum iommu_attr { }; /* These are the possible reserved region types */ -#define IOMMU_RESV_DIRECT (1 << 0) -#define IOMMU_RESV_RESERVED (1 << 1) -#define IOMMU_RESV_MSI (1 << 2) +enum iommu_resv_type { + /* Memory regions which must be mapped 1:1 at all times */ + IOMMU_RESV_DIRECT, + /* Arbitrary "never map this or give it to a device" address ranges */ + IOMMU_RESV_RESERVED, + /* Hardware MSI region (untranslated) */ + IOMMU_RESV_MSI, + /* Software-managed MSI translation window */ + IOMMU_RESV_SW_MSI, +}; /** * struct iommu_resv_region - descriptor for a reserved memory region @@ -142,7 +149,7 @@ struct iommu_resv_region { phys_addr_t start; size_t length; int prot; - int type; + enum iommu_resv_type type; }; #ifdef CONFIG_IOMMU_API @@ -288,7 +295,8 @@ extern void iommu_get_resv_regions(struct device *dev, struct list_head *list); extern void iommu_put_resv_regions(struct device *dev, struct list_head *list); extern int iommu_request_dm_for_dev(struct device *dev); extern struct iommu_resv_region * -iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot, int type); +iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot, + enum iommu_resv_type type); extern int iommu_get_group_resv_regions(struct iommu_group *group, struct list_head *head); -- cgit v1.2.3 From afd0e5a876703accb95894f23317a13e2c49b523 Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Wed, 22 Mar 2017 17:08:25 +0530 Subject: arm64: kaslr: Fix up the kernel image alignment If kernel image extends across alignment boundary, existing code increases the KASLR offset by size of kernel image. The offset is masked after resizing. There are cases, where after masking, we may still have kernel image extending across boundary. This eventually results in only 2MB block getting mapped while creating the page tables. This results in data aborts while accessing unmapped regions during second relocation (with kaslr offset) in __primary_switch. To fix this problem, round up the kernel image size, by swapper block size, before adding it for correction. For example consider below case, where kernel image still crosses 1GB alignment boundary, after masking the offset, which is fixed by rounding up kernel image size. SWAPPER_TABLE_SHIFT = 30 Swapper using section maps with section size 2MB. CONFIG_PGTABLE_LEVELS = 3 VA_BITS = 39 _text : 0xffffff8008080000 _end : 0xffffff800aa1b000 offset : 0x1f35600000 mask = ((1UL << (VA_BITS - 2)) - 1) & ~(SZ_2M - 1) (_text + offset) >> SWAPPER_TABLE_SHIFT = 0x3fffffe7c (_end + offset) >> SWAPPER_TABLE_SHIFT = 0x3fffffe7d offset after existing correction (before mask) = 0x1f37f9b000 (_text + offset) >> SWAPPER_TABLE_SHIFT = 0x3fffffe7d (_end + offset) >> SWAPPER_TABLE_SHIFT = 0x3fffffe7d offset (after mask) = 0x1f37e00000 (_text + offset) >> SWAPPER_TABLE_SHIFT = 0x3fffffe7c (_end + offset) >> SWAPPER_TABLE_SHIFT = 0x3fffffe7d new offset w/ rounding up = 0x1f38000000 (_text + offset) >> SWAPPER_TABLE_SHIFT = 0x3fffffe7d (_end + offset) >> SWAPPER_TABLE_SHIFT = 0x3fffffe7d Fixes: f80fb3a3d508 ("arm64: add support for kernel ASLR") Cc: Reviewed-by: Ard Biesheuvel Signed-off-by: Neeraj Upadhyay Signed-off-by: Srinivas Ramana Signed-off-by: Will Deacon --- arch/arm64/kernel/kaslr.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 769f24ef628c..d7e90d97f5c4 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -131,11 +131,15 @@ u64 __init kaslr_early_init(u64 dt_phys, u64 modulo_offset) /* * The kernel Image should not extend across a 1GB/32MB/512MB alignment * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this - * happens, increase the KASLR offset by the size of the kernel image. + * happens, increase the KASLR offset by the size of the kernel image + * rounded up by SWAPPER_BLOCK_SIZE. */ if ((((u64)_text + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT) != - (((u64)_end + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT)) - offset = (offset + (u64)(_end - _text)) & mask; + (((u64)_end + offset + modulo_offset) >> SWAPPER_TABLE_SHIFT)) { + u64 kimg_sz = _end - _text; + offset = (offset + round_up(kimg_sz, SWAPPER_BLOCK_SIZE)) + & mask; + } if (IS_ENABLED(CONFIG_KASAN)) /* -- cgit v1.2.3 From f0c0cb99f74c03e2407ea553f6d46eb611e262b5 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Tue, 21 Mar 2017 16:51:19 -0400 Subject: arm64: dts: NS2: Add dma-coherent to relevant DT entries Cache related issues with DMA rings and performance issues related to caching are being caused by not properly setting the "dma-coherent" flag in the device tree entries. Adding it here to correct the issue. Signed-off-by: Jon Mason Fixes: fd5e5dd56 ("arm64: dts: Add PCIe0 and PCIe4 DT nodes for NS2") Fixes: dddc3c9d7 ("arm64: dts: NS2: add AMAC ethernet support") Fixes: e79249143 ("arm64: dts: Add Broadcom Northstar2 device tree entries for PDC driver") Fixes: ac9aae00f ("arm64: dts: Add SATA3 AHCI and SATA3 PHY DT nodes for NS2") Fixes: efc877676 ("arm64: dts: Add SDHCI DT node for NS2") Signed-off-by: Florian Fainelli --- arch/arm64/boot/dts/broadcom/ns2.dtsi | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arm64/boot/dts/broadcom/ns2.dtsi b/arch/arm64/boot/dts/broadcom/ns2.dtsi index 9f9e203c09c5..bcb03fc32665 100644 --- a/arch/arm64/boot/dts/broadcom/ns2.dtsi +++ b/arch/arm64/boot/dts/broadcom/ns2.dtsi @@ -114,6 +114,7 @@ pcie0: pcie@20020000 { compatible = "brcm,iproc-pcie"; reg = <0 0x20020000 0 0x1000>; + dma-coherent; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; @@ -144,6 +145,7 @@ pcie4: pcie@50020000 { compatible = "brcm,iproc-pcie"; reg = <0 0x50020000 0 0x1000>; + dma-coherent; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; @@ -174,6 +176,7 @@ pcie8: pcie@60c00000 { compatible = "brcm,iproc-pcie-paxc"; reg = <0 0x60c00000 0 0x1000>; + dma-coherent; linux,pci-domain = <8>; bus-range = <0x0 0x1>; @@ -203,6 +206,7 @@ <0x61030000 0x100>; reg-names = "amac_base", "idm_base", "nicpm_base"; interrupts = ; + dma-coherent; phy-handle = <&gphy0>; phy-mode = "rgmii"; status = "disabled"; @@ -213,6 +217,7 @@ reg = <0x612c0000 0x445>; /* PDC FS0 regs */ interrupts = ; #mbox-cells = <1>; + dma-coherent; brcm,rx-status-len = <32>; brcm,use-bcm-hdr; }; @@ -222,6 +227,7 @@ reg = <0x612e0000 0x445>; /* PDC FS1 regs */ interrupts = ; #mbox-cells = <1>; + dma-coherent; brcm,rx-status-len = <32>; brcm,use-bcm-hdr; }; @@ -231,6 +237,7 @@ reg = <0x61300000 0x445>; /* PDC FS2 regs */ interrupts = ; #mbox-cells = <1>; + dma-coherent; brcm,rx-status-len = <32>; brcm,use-bcm-hdr; }; @@ -240,6 +247,7 @@ reg = <0x61320000 0x445>; /* PDC FS3 regs */ interrupts = ; #mbox-cells = <1>; + dma-coherent; brcm,rx-status-len = <32>; brcm,use-bcm-hdr; }; @@ -644,6 +652,7 @@ sata: ahci@663f2000 { compatible = "brcm,iproc-ahci", "generic-ahci"; reg = <0x663f2000 0x1000>; + dma-coherent; reg-names = "ahci"; interrupts = ; #address-cells = <1>; @@ -667,6 +676,7 @@ compatible = "brcm,sdhci-iproc-cygnus"; reg = <0x66420000 0x100>; interrupts = ; + dma-coherent; bus-width = <8>; clocks = <&genpll_sw BCM_NS2_GENPLL_SW_SDIO_CLK>; status = "disabled"; @@ -676,6 +686,7 @@ compatible = "brcm,sdhci-iproc-cygnus"; reg = <0x66430000 0x100>; interrupts = ; + dma-coherent; bus-width = <8>; clocks = <&genpll_sw BCM_NS2_GENPLL_SW_SDIO_CLK>; status = "disabled"; -- cgit v1.2.3 From a05d4fd9176003e0c1f9c3d083f4dac19fd346ab Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 14 Mar 2017 19:25:56 -0400 Subject: cgroup, net_cls: iterate the fds of only the tasks which are being migrated The net_cls controller controls the classid field of each socket which is associated with the cgroup. Because the classid is per-socket attribute, when a task migrates to another cgroup or the configured classid of the cgroup changes, the controller needs to walk all sockets and update the classid value, which was implemented by 3b13758f51de ("cgroups: Allow dynamically changing net_classid"). While the approach is not scalable, migrating tasks which have a lot of fds attached to them is rare and the cost is born by the ones initiating the operations. However, for simplicity, both the migration and classid config change paths call update_classid() which scans all fds of all tasks in the target css. This is an overkill for the migration path which only needs to cover a much smaller subset of tasks which are actually getting migrated in. On cgroup v1, this can lead to unexpected scalability issues when one tries to migrate a task or process into a net_cls cgroup which already contains a lot of fds. Even if the migration traget doesn't have many to get scanned, update_classid() ends up scanning all fds in the target cgroup which can be extremely numerous. Unfortunately, on cgroup v2 which doesn't use net_cls, the problem is even worse. Before bfc2cf6f61fc ("cgroup: call subsys->*attach() only for subsystems which are actually affected by migration"), cgroup core would call the ->css_attach callback even for controllers which don't see actual migration to a different css. As net_cls is always disabled but still mounted on cgroup v2, whenever a process is migrated on the cgroup v2 hierarchy, net_cls sees identity migration from root to root and cgroup core used to call ->css_attach callback for those. The net_cls ->css_attach ends up calling update_classid() on the root net_cls css to which all processes on the system belong to as the controller isn't used. This makes any cgroup v2 migration O(total_number_of_fds_on_the_system) which is horrible and easily leads to noticeable stalls triggering RCU stall warnings and so on. The worst symptom is already fixed in upstream by bfc2cf6f61fc ("cgroup: call subsys->*attach() only for subsystems which are actually affected by migration"); however, backporting that commit is too invasive and we want to avoid other cases too. This patch updates net_cls's cgrp_attach() to iterate fds of only the processes which are actually getting migrated. This removes the surprising migration cost which is dependent on the total number of fds in the target cgroup. As this leaves write_classid() the only user of update_classid(), open-code the helper into write_classid(). Reported-by: David Goode Fixes: 3b13758f51de ("cgroups: Allow dynamically changing net_classid") Cc: stable@vger.kernel.org # v4.4+ Cc: Nina Schiff Cc: David S. Miller Signed-off-by: Tejun Heo Signed-off-by: David S. Miller --- net/core/netclassid_cgroup.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 6ae56037bb13..029a61ac6cdd 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -71,27 +71,17 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n) return 0; } -static void update_classid(struct cgroup_subsys_state *css, void *v) +static void cgrp_attach(struct cgroup_taskset *tset) { - struct css_task_iter it; + struct cgroup_subsys_state *css; struct task_struct *p; - css_task_iter_start(css, &it); - while ((p = css_task_iter_next(&it))) { + cgroup_taskset_for_each(p, css, tset) { task_lock(p); - iterate_fd(p->files, 0, update_classid_sock, v); + iterate_fd(p->files, 0, update_classid_sock, + (void *)(unsigned long)css_cls_state(css)->classid); task_unlock(p); } - css_task_iter_end(&it); -} - -static void cgrp_attach(struct cgroup_taskset *tset) -{ - struct cgroup_subsys_state *css; - - cgroup_taskset_first(tset, &css); - update_classid(css, - (void *)(unsigned long)css_cls_state(css)->classid); } static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft) @@ -103,12 +93,22 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, u64 value) { struct cgroup_cls_state *cs = css_cls_state(css); + struct css_task_iter it; + struct task_struct *p; cgroup_sk_alloc_disable(); cs->classid = (u32)value; - update_classid(css, (void *)(unsigned long)cs->classid); + css_task_iter_start(css, &it); + while ((p = css_task_iter_next(&it))) { + task_lock(p); + iterate_fd(p->files, 0, update_classid_sock, + (void *)(unsigned long)cs->classid); + task_unlock(p); + } + css_task_iter_end(&it); + return 0; } -- cgit v1.2.3 From 210c4f70b4c630b27f0840c8043c138c955edc9e Mon Sep 17 00:00:00 2001 From: hayeswang Date: Mon, 20 Mar 2017 16:13:44 +0800 Subject: r8152: set the RMS of RTL8153 according to the mtu Set the received maximum size (RMS) according to the mtu size. It is unnecessary to receive a packet which is more than the size we could transmit. Besides, this could let the rx buffer be used effectively. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index bb3eedd07fbe..525c25817013 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2899,7 +2899,8 @@ static void r8153_first_init(struct r8152 *tp) rtl_rx_vlan_en(tp, tp->netdev->features & NETIF_F_HW_VLAN_CTAG_RX); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, RTL8153_RMS); + ocp_data = tp->netdev->mtu + VLAN_ETH_HLEN + CRC_SIZE; + ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, ocp_data); ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, MTPS_JUMBO); ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_TCR0); @@ -2951,7 +2952,8 @@ static void r8153_enter_oob(struct r8152 *tp) usleep_range(1000, 2000); } - ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, RTL8153_RMS); + ocp_data = tp->netdev->mtu + VLAN_ETH_HLEN + CRC_SIZE; + ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, ocp_data); ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_TEREDO_CFG); ocp_data &= ~TEREDO_WAKE_MASK; @@ -4201,8 +4203,14 @@ static int rtl8152_change_mtu(struct net_device *dev, int new_mtu) dev->mtu = new_mtu; - if (netif_running(dev) && netif_carrier_ok(dev)) - r8153_set_rx_early_size(tp); + if (netif_running(dev)) { + u32 rms = new_mtu + VLAN_ETH_HLEN + CRC_SIZE; + + ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, rms); + + if (netif_carrier_ok(dev)) + r8153_set_rx_early_size(tp); + } mutex_unlock(&tp->control); -- cgit v1.2.3 From b20cb60e2b865638459e6ec82ad3536d3734e555 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Mon, 20 Mar 2017 16:13:45 +0800 Subject: r8152: fix the rx early size of RTL8153 revert commit a59e6d815226 ("r8152: correct the rx early size") and fix the rx early size as (rx buffer size - rx packet size - rx desc size - alignment) / 4 Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 525c25817013..0b1b9188625d 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -32,7 +32,7 @@ #define NETNEXT_VERSION "08" /* Information for net */ -#define NET_VERSION "8" +#define NET_VERSION "9" #define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION #define DRIVER_AUTHOR "Realtek linux nic maintainers " @@ -501,6 +501,8 @@ enum rtl_register_content { #define RTL8153_RMS RTL8153_MAX_PACKET #define RTL8152_TX_TIMEOUT (5 * HZ) #define RTL8152_NAPI_WEIGHT 64 +#define rx_reserved_size(x) ((x) + VLAN_ETH_HLEN + CRC_SIZE + \ + sizeof(struct rx_desc) + RX_ALIGN) /* rtl8152 flags */ enum rtl8152_flags { @@ -2253,8 +2255,7 @@ static void r8153_set_rx_early_timeout(struct r8152 *tp) static void r8153_set_rx_early_size(struct r8152 *tp) { - u32 mtu = tp->netdev->mtu; - u32 ocp_data = (agg_buf_sz - mtu - VLAN_ETH_HLEN - VLAN_HLEN) / 8; + u32 ocp_data = (agg_buf_sz - rx_reserved_size(tp->netdev->mtu)) / 4; ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EARLY_SIZE, ocp_data); } -- cgit v1.2.3 From be9ca0d33c850192198c22518eeb1f41401268e8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 20 Mar 2017 09:52:50 +0100 Subject: cpsw/netcp: work around reverse cpts dependency The dependency is reversed: cpsw and netcp call into cpts, but cpts depends on the other two in Kconfig. This can lead to cpts being a loadable module and its callers built-in: drivers/net/ethernet/ti/cpsw.o: In function `cpsw_remove': cpsw.c:(.text.cpsw_remove+0xd0): undefined reference to `cpts_release' drivers/net/ethernet/ti/cpsw.o: In function `cpsw_rx_handler': cpsw.c:(.text.cpsw_rx_handler+0x2dc): undefined reference to `cpts_rx_timestamp' drivers/net/ethernet/ti/cpsw.o: In function `cpsw_tx_handler': cpsw.c:(.text.cpsw_tx_handler+0x7c): undefined reference to `cpts_tx_timestamp' drivers/net/ethernet/ti/cpsw.o: In function `cpsw_ndo_stop': As a workaround, I'm introducing another Kconfig symbol to control the compilation of cpts, while making the actual module controlled by a silent symbol that is =y when necessary. Fixes: 6246168b4a38 ("net: ethernet: ti: netcp: add support of cpts") Signed-off-by: Arnd Bergmann Reviewed-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/Kconfig | 8 +++++++- drivers/net/ethernet/ti/Makefile | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index 296c8efd0038..d923890a9fda 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -74,7 +74,7 @@ config TI_CPSW will be called cpsw. config TI_CPTS - tristate "TI Common Platform Time Sync (CPTS) Support" + bool "TI Common Platform Time Sync (CPTS) Support" depends on TI_CPSW || TI_KEYSTONE_NETCP imply PTP_1588_CLOCK ---help--- @@ -83,6 +83,12 @@ config TI_CPTS The unit can time stamp PTP UDP/IPv4 and Layer 2 packets, and the driver offers a PTP Hardware Clock. +config TI_CPTS_MOD + tristate + depends on TI_CPTS + default y if TI_CPSW=y || TI_KEYSTONE_NETCP=y + default m + config TI_KEYSTONE_NETCP tristate "TI Keystone NETCP Core Support" select TI_CPSW_ALE diff --git a/drivers/net/ethernet/ti/Makefile b/drivers/net/ethernet/ti/Makefile index 1e7c10bf8713..10e6b0ce51ba 100644 --- a/drivers/net/ethernet/ti/Makefile +++ b/drivers/net/ethernet/ti/Makefile @@ -12,7 +12,7 @@ obj-$(CONFIG_TI_DAVINCI_MDIO) += davinci_mdio.o obj-$(CONFIG_TI_DAVINCI_CPDMA) += davinci_cpdma.o obj-$(CONFIG_TI_CPSW_PHY_SEL) += cpsw-phy-sel.o obj-$(CONFIG_TI_CPSW_ALE) += cpsw_ale.o -obj-$(CONFIG_TI_CPTS) += cpts.o +obj-$(CONFIG_TI_CPTS_MOD) += cpts.o obj-$(CONFIG_TI_CPSW) += ti_cpsw.o ti_cpsw-y := cpsw.o -- cgit v1.2.3 From 07fef3623407444e51c12ea57cd91df38c1069e0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 20 Mar 2017 09:58:33 +0100 Subject: cpsw/netcp: cpts depends on posix_timers With posix timers having become optional, we get a build error with the cpts time sync option of the CPSW driver: drivers/net/ethernet/ti/cpts.c: In function 'cpts_find_ts': drivers/net/ethernet/ti/cpts.c:291:23: error: implicit declaration of function 'ptp_classify_raw';did you mean 'ptp_classifier_init'? [-Werror=implicit-function-declaration] This adds a hard dependency on PTP_CLOCK to avoid the problem, as building it without PTP support makes no sense anyway. Fixes: baa73d9e478f ("posix-timers: Make them configurable") Cc: Nicolas Pitre Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Acked-by: Nicolas Pitre Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index d923890a9fda..9e631952b86f 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -76,7 +76,7 @@ config TI_CPSW config TI_CPTS bool "TI Common Platform Time Sync (CPTS) Support" depends on TI_CPSW || TI_KEYSTONE_NETCP - imply PTP_1588_CLOCK + depends on PTP_1588_CLOCK ---help--- This driver supports the Common Platform Time Sync unit of the CPSW Ethernet Switch and Keystone 2 1g/10g Switch Subsystem. -- cgit v1.2.3 From 1511949c61ec63e4b646c34d602ac6990b38ce30 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 20 Mar 2017 17:46:27 +0800 Subject: sctp: declare struct sctp_stream before using it sctp_stream_free uses struct sctp_stream as a param, but struct sctp_stream is defined after it's declaration. This patch is to declare struct sctp_stream before sctp_stream_free. Fixes: a83863174a61 ("sctp: prepare asoc stream for stream reconf") Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 4f645198e9bd..592decebac75 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -83,6 +83,7 @@ struct sctp_bind_addr; struct sctp_ulpq; struct sctp_ep_common; struct crypto_shash; +struct sctp_stream; #include -- cgit v1.2.3 From 581947787eaf1ad801959d00b42b9d0131aacb6a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 20 Mar 2017 18:00:28 +0800 Subject: sctp: remove useless err from sctp_association_init This patch is to remove the unnecessary temporary variable 'err' from sctp_association_init. Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/associola.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 2a6835b4562b..0439a1a68367 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -71,9 +71,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a { struct net *net = sock_net(sk); struct sctp_sock *sp; - int i; sctp_paramhdr_t *p; - int err; + int i; /* Retrieve the SCTP per socket area. */ sp = sctp_sk((struct sock *)sk); @@ -264,8 +263,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* AUTH related initializations */ INIT_LIST_HEAD(&asoc->endpoint_shared_keys); - err = sctp_auth_asoc_copy_shkeys(ep, asoc, gfp); - if (err) + if (sctp_auth_asoc_copy_shkeys(ep, asoc, gfp)) goto fail_init; asoc->active_key_id = ep->active_key_id; -- cgit v1.2.3 From 557d054c01da0337ca81de9e9d9206d57245b57e Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Tue, 21 Mar 2017 10:47:49 +0100 Subject: tipc: fix nametbl deadlock at tipc_nametbl_unsubscribe Until now, tipc_nametbl_unsubscribe() is called at subscriptions reference count cleanup. Usually the subscriptions cleanup is called at subscription timeout or at subscription cancel or at subscriber delete. We have ignored the possibility of this being called from other locations, which causes deadlock as we try to grab the tn->nametbl_lock while holding it already. CPU1: CPU2: ---------- ---------------- tipc_nametbl_publish spin_lock_bh(&tn->nametbl_lock) tipc_nametbl_insert_publ tipc_nameseq_insert_publ tipc_subscrp_report_overlap tipc_subscrp_get tipc_subscrp_send_event tipc_close_conn tipc_subscrb_release_cb tipc_subscrb_delete tipc_subscrp_put tipc_subscrp_put tipc_subscrp_kref_release tipc_nametbl_unsubscribe spin_lock_bh(&tn->nametbl_lock) <> CPU1: CPU2: ---------- ---------------- tipc_nametbl_stop spin_lock_bh(&tn->nametbl_lock) tipc_purge_publications tipc_nameseq_remove_publ tipc_subscrp_report_overlap tipc_subscrp_get tipc_subscrp_send_event tipc_close_conn tipc_subscrb_release_cb tipc_subscrb_delete tipc_subscrp_put tipc_subscrp_put tipc_subscrp_kref_release tipc_nametbl_unsubscribe spin_lock_bh(&tn->nametbl_lock) <> In this commit, we advance the calling of tipc_nametbl_unsubscribe() from the refcount cleanup to the intended callers. Fixes: d094c4d5f5c7 ("tipc: add subscription refcount to avoid invalid delete") Reported-by: John Thompson Acked-by: Jon Maloy Signed-off-by: Ying Xue Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: David S. Miller --- net/tipc/subscr.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 9d94e65d0894..271cd66e4b3b 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -141,6 +141,11 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, static void tipc_subscrp_timeout(unsigned long data) { struct tipc_subscription *sub = (struct tipc_subscription *)data; + struct tipc_subscriber *subscriber = sub->subscriber; + + spin_lock_bh(&subscriber->lock); + tipc_nametbl_unsubscribe(sub); + spin_unlock_bh(&subscriber->lock); /* Notify subscriber of timeout */ tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, @@ -173,7 +178,6 @@ static void tipc_subscrp_kref_release(struct kref *kref) struct tipc_subscriber *subscriber = sub->subscriber; spin_lock_bh(&subscriber->lock); - tipc_nametbl_unsubscribe(sub); list_del(&sub->subscrp_list); atomic_dec(&tn->subscription_count); spin_unlock_bh(&subscriber->lock); @@ -205,6 +209,7 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) continue; + tipc_nametbl_unsubscribe(sub); tipc_subscrp_get(sub); spin_unlock_bh(&subscriber->lock); tipc_subscrp_delete(sub); -- cgit v1.2.3 From 1f30a86c58093046dc3e49c23d2618894e098f7a Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 21 Mar 2017 15:59:12 +0200 Subject: net/mlx5: Add missing entries for set/query rate limit commands The switch cases for the rate limit set and query commands were missing, which could get us wrong under fw error or driver reset flow, fix that. Fixes: 1466cc5b23d1 ('net/mlx5: Rate limit tables support') Signed-off-by: Or Gerlitz Reviewed-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index caa837e5e2b9..a380353a78c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -361,6 +361,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_VPORT_COUNTER: case MLX5_CMD_OP_ALLOC_Q_COUNTER: case MLX5_CMD_OP_QUERY_Q_COUNTER: + case MLX5_CMD_OP_SET_RATE_LIMIT: + case MLX5_CMD_OP_QUERY_RATE_LIMIT: case MLX5_CMD_OP_ALLOC_PD: case MLX5_CMD_OP_ALLOC_UAR: case MLX5_CMD_OP_CONFIG_INT_MODERATION: @@ -497,6 +499,8 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER); MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER); MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER); + MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT); + MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT); MLX5_COMMAND_STR_CASE(ALLOC_PD); MLX5_COMMAND_STR_CASE(DEALLOC_PD); MLX5_COMMAND_STR_CASE(ALLOC_UAR); -- cgit v1.2.3 From d85cdccbb3fe9a632ec9d0f4e4526c8c84fc3523 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 21 Mar 2017 15:59:13 +0200 Subject: net/mlx5e: Change the TC offload rule add/del code path to be per NIC or E-Switch Refactor the code to deal with add/del TC rules to have handler per NIC/E-switch offloading use case, and push the latter into the e-switch code. This provides better separation and is to be used in down-stream patch for applying a fix. Fixes: bffaa916588e ("net/mlx5: E-Switch, Add control for inline mode") Signed-off-by: Or Gerlitz Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 59 ++++++++++++++-------- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 5 ++ .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 14 +++++ 3 files changed, 58 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 79481f4cf264..2825b5665456 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -133,6 +133,23 @@ err_create_ft: return rule; } +static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_fc *counter = NULL; + + if (!IS_ERR(flow->rule)) { + counter = mlx5_flow_rule_counter(flow->rule); + mlx5_del_flow_rules(flow->rule); + mlx5_fc_destroy(priv->mdev, counter); + } + + if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) { + mlx5_destroy_flow_table(priv->fs.tc.t); + priv->fs.tc.t = NULL; + } +} + static struct mlx5_flow_handle * mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, @@ -149,7 +166,24 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, } static void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow) { + struct mlx5e_tc_flow *flow); + +static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->attr); + + mlx5_eswitch_del_vlan_action(esw, flow->attr); + + if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) + mlx5e_detach_encap(priv, flow); +} + +static void mlx5e_detach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ struct list_head *next = flow->encap.next; list_del(&flow->encap); @@ -173,25 +207,10 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv, static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_fc *counter = NULL; - - if (!IS_ERR(flow->rule)) { - counter = mlx5_flow_rule_counter(flow->rule); - mlx5_del_flow_rules(flow->rule); - mlx5_fc_destroy(priv->mdev, counter); - } - - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { - mlx5_eswitch_del_vlan_action(esw, flow->attr); - if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) - mlx5e_detach_encap(priv, flow); - } - - if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) { - mlx5_destroy_flow_table(priv->fs.tc.t); - priv->fs.tc.t = NULL; - } + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + mlx5e_tc_del_fdb_flow(priv, flow); + else + mlx5e_tc_del_nic_flow(priv, flow); } static void parse_vxlan_attr(struct mlx5_flow_spec *spec, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 5b78883d5654..9227a83a97e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -271,6 +271,11 @@ struct mlx5_flow_handle * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, struct mlx5_esw_flow_attr *attr); +void +mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_handle *rule, + struct mlx5_esw_flow_attr *attr); + struct mlx5_flow_handle * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 4f5b0d47d5f3..bfabefe20ac0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -97,6 +97,20 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, return rule; } +void +mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_handle *rule, + struct mlx5_esw_flow_attr *attr) +{ + struct mlx5_fc *counter = NULL; + + if (!IS_ERR(rule)) { + counter = mlx5_flow_rule_counter(rule); + mlx5_del_flow_rules(rule); + mlx5_fc_destroy(esw->dev, counter); + } +} + static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) { struct mlx5_eswitch_rep *rep; -- cgit v1.2.3 From 375f51e2b5b7b9a42b3139aea519cbb1bfc5d6ef Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 21 Mar 2017 15:59:14 +0200 Subject: net/mlx5: E-Switch, Don't allow changing inline mode when flows are configured Changing the eswitch inline mode can potentially cause already configured flows not to match the policy. E.g. set policy L4, add some L4 rules, set policy to L2 --> bad! Hence we disallow it. Keep track of how many offloaded rules are now set and refuse inline mode changes if this isn't zero. Fixes: bffaa916588e ("net/mlx5: E-Switch, Add control for inline mode") Signed-off-by: Roi Dayan Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 9227a83a97e3..ad329b1680b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -209,6 +209,7 @@ struct mlx5_esw_offload { struct mlx5_eswitch_rep *vport_reps; DECLARE_HASHTABLE(encap_tbl, 8); u8 inline_mode; + u64 num_flows; }; struct mlx5_eswitch { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index bfabefe20ac0..307ec6c5fd3b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -93,6 +93,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, spec, &flow_act, dest, i); if (IS_ERR(rule)) mlx5_fc_destroy(esw->dev, counter); + else + esw->offloads.num_flows++; return rule; } @@ -108,6 +110,7 @@ mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw, counter = mlx5_flow_rule_counter(rule); mlx5_del_flow_rules(rule); mlx5_fc_destroy(esw->dev, counter); + esw->offloads.num_flows--; } } @@ -922,6 +925,11 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode) MLX5_CAP_INLINE_MODE_VPORT_CONTEXT) return -EOPNOTSUPP; + if (esw->offloads.num_flows > 0) { + esw_warn(dev, "Can't set inline mode when flows are configured\n"); + return -EOPNOTSUPP; + } + err = esw_inline_mode_from_devlink(mode, &mlx5_mode); if (err) goto out; -- cgit v1.2.3 From 09c91ddf2cd33489c2c14edfef43ae38d412888e Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 21 Mar 2017 15:59:15 +0200 Subject: net/mlx5e: Use the proper UAPI values when offloading TC vlan actions Currently we use the non UAPI values and we miss erring on the modify action which is not supported, fix that. Fixes: 8b32580df1cb ('net/mlx5e: Add TC vlan action for SRIOV offloads') Signed-off-by: Or Gerlitz Reported-by: Petr Machata Reviewed-by: Jiri Pirko Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 2825b5665456..9c13abaf3885 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1131,14 +1131,16 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, } if (is_tcf_vlan(a)) { - if (tcf_vlan_action(a) == VLAN_F_POP) { + if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) { attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; - } else if (tcf_vlan_action(a) == VLAN_F_PUSH) { + } else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) { if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q)) return -EOPNOTSUPP; attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; attr->vlan = tcf_vlan_push_vid(a); + } else { /* action is TCA_VLAN_ACT_MODIFY */ + return -EOPNOTSUPP; } continue; } -- cgit v1.2.3 From 1ad9a00ae0efc2e9337148d6c382fad3d27bf99a Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 21 Mar 2017 15:59:16 +0200 Subject: net/mlx5e: Avoid supporting udp tunnel port ndo for VF reps This was added to allow the TC offloading code to identify offloading encap/decap vxlan rules. The VF reps are effectively related to the same mlx5 PCI device as the PF. Since the kernel invokes the (say) delete ndo for each netdev, the FW erred on multiple vxlan dst port deletes when the port was deleted from the system. We fix that by keeping the registration to be carried out only by the PF. Since the PF serves as the uplink device, the VF reps will look up a port there and realize if they are ok to offload that. Tested: ip link add vxlan1 type vxlan id 44 dev ens5f0 dstport 9999 ip link set vxlan1 up ip link del dev vxlan1 Fixes: 4a25730eb202 ('net/mlx5e: Add ndo_udp_tunnel_add to VF representors') Signed-off-by: Paul Blakey Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 4 ---- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 ++++---- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 -- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 9 +++++++-- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index f6a6ded204f6..dc52053128bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -928,10 +928,6 @@ void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv); int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev); void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev); u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout); -void mlx5e_add_vxlan_port(struct net_device *netdev, - struct udp_tunnel_info *ti); -void mlx5e_del_vxlan_port(struct net_device *netdev, - struct udp_tunnel_info *ti); int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, void *sp); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 8ef64c4db2c2..66c133757a5e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3100,8 +3100,8 @@ static int mlx5e_get_vf_stats(struct net_device *dev, vf_stats); } -void mlx5e_add_vxlan_port(struct net_device *netdev, - struct udp_tunnel_info *ti) +static void mlx5e_add_vxlan_port(struct net_device *netdev, + struct udp_tunnel_info *ti) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -3114,8 +3114,8 @@ void mlx5e_add_vxlan_port(struct net_device *netdev, mlx5e_vxlan_queue_work(priv, ti->sa_family, be16_to_cpu(ti->port), 1); } -void mlx5e_del_vxlan_port(struct net_device *netdev, - struct udp_tunnel_info *ti) +static void mlx5e_del_vxlan_port(struct net_device *netdev, + struct udp_tunnel_info *ti) { struct mlx5e_priv *priv = netdev_priv(netdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 2c864574a9d5..f621373bd7a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -393,8 +393,6 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = { .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, .ndo_setup_tc = mlx5e_rep_ndo_setup_tc, .ndo_get_stats64 = mlx5e_rep_get_stats, - .ndo_udp_tunnel_add = mlx5e_add_vxlan_port, - .ndo_udp_tunnel_del = mlx5e_del_vxlan_port, .ndo_has_offload_stats = mlx5e_has_offload_stats, .ndo_get_offload_stats = mlx5e_get_offload_stats, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9c13abaf3885..fade7233dac5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -267,12 +267,15 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, skb_flow_dissector_target(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS, f->mask); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw); + struct mlx5e_priv *up_priv = netdev_priv(up_dev); /* Full udp dst port must be given */ if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst))) goto vxlan_match_offload_err; - if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->dst)) && + if (mlx5e_vxlan_lookup_port(up_priv, be16_to_cpu(key->dst)) && MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) parse_vxlan_attr(spec, f); else { @@ -995,6 +998,8 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5_esw_flow_attr *attr) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw); + struct mlx5e_priv *up_priv = netdev_priv(up_dev); unsigned short family = ip_tunnel_info_af(tun_info); struct ip_tunnel_key *key = &tun_info->key; struct mlx5_encap_entry *e; @@ -1015,7 +1020,7 @@ vxlan_encap_offload_err: return -EOPNOTSUPP; } - if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->tp_dst)) && + if (mlx5e_vxlan_lookup_port(up_priv, be16_to_cpu(key->tp_dst)) && MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) { tunnel_type = MLX5_HEADER_TYPE_VXLAN; } else { -- cgit v1.2.3 From 5f40b4ed975c26016cf41953b7510fe90718e21c Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 21 Mar 2017 15:59:17 +0200 Subject: net/mlx5: Increase number of max QPs in default profile With ConnectX-4 sharing SRQs from the same space as QPs, we hit a limit preventing some applications to allocate needed QPs amount. Double the size to 256K. Fixes: e126ba97dba9e ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index e2bd600d19de..60154a175bd3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -87,7 +87,7 @@ static struct mlx5_profile profile[] = { [2] = { .mask = MLX5_PROF_MASK_QP_SIZE | MLX5_PROF_MASK_MR_CACHE, - .log_max_qp = 17, + .log_max_qp = 18, .mr_cache[0] = { .size = 500, .limit = 250 -- cgit v1.2.3 From d3a4e4da54c7adb420d5f48e89be913b14bdeff1 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 21 Mar 2017 15:59:18 +0200 Subject: net/mlx5e: Count GSO packets correctly TX packets statistics ('tx_packets' counter) used to count GSO packets as one, even though it contains multiple segments. This patch will increment the counter by the number of segments, and align the driver with the behavior of other drivers in the stack. Note that no information is lost in this patch due to 'tx_tso_packets' counter existence. Before, ethtool showed: $ ethtool -S ens6 | egrep "tx_packets|tx_tso_packets" tx_packets: 61340 tx_tso_packets: 60954 tx_packets_phy: 2451115 Now, we will see the more logical statistics: $ ethtool -S ens6 | egrep "tx_packets|tx_tso_packets" tx_packets: 2451115 tx_tso_packets: 60954 tx_packets_phy: 2451115 Fixes: e586b3b0baee ("net/mlx5: Ethernet Datapath files") Signed-off-by: Gal Pressman Cc: kernel-team@fb.com Signed-off-by: Saeed Mahameed Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index f193128bac4b..57f5e2d7ebd1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -274,15 +274,18 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) sq->stats.tso_bytes += skb->len - ihs; } + sq->stats.packets += skb_shinfo(skb)->gso_segs; num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs; } else { bf = sq->bf_budget && !skb->xmit_more && !skb_shinfo(skb)->nr_frags; ihs = mlx5e_get_inline_hdr_size(sq, skb, bf); + sq->stats.packets++; num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); } + sq->stats.bytes += num_bytes; wi->num_bytes = num_bytes; ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; @@ -381,8 +384,6 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) if (bf) sq->bf_budget--; - sq->stats.packets++; - sq->stats.bytes += num_bytes; return NETDEV_TX_OK; dma_unmap_wqe_err: -- cgit v1.2.3 From 8ab7e2ae15d84ba758b2c8c6f4075722e9bd2a08 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 21 Mar 2017 15:59:19 +0200 Subject: net/mlx5e: Count LRO packets correctly RX packets statistics ('rx_packets' counter) used to count LRO packets as one, even though it contains multiple segments. This patch will increment the counter by the number of segments, and align the driver with the behavior of other drivers in the stack. Note that no information is lost in this patch due to 'rx_lro_packets' counter existence. Before, ethtool showed: $ ethtool -S ens6 | egrep "rx_packets|rx_lro_packets" rx_packets: 435277 rx_lro_packets: 35847 rx_packets_phy: 1935066 Now, we will see the more logical statistics: $ ethtool -S ens6 | egrep "rx_packets|rx_lro_packets" rx_packets: 1935066 rx_lro_packets: 35847 rx_packets_phy: 1935066 Fixes: e586b3b0baee ("net/mlx5: Ethernet Datapath files") Signed-off-by: Gal Pressman Cc: kernel-team@fb.com Signed-off-by: Saeed Mahameed Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 3d371688fbbb..bafcb349a50c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -601,6 +601,10 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, if (lro_num_seg > 1) { mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt); skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg); + /* Subtract one since we already counted this as one + * "regular" packet in mlx5e_complete_rx_cqe() + */ + rq->stats.packets += lro_num_seg - 1; rq->stats.lro_packets++; rq->stats.lro_bytes += cqe_bcnt; } -- cgit v1.2.3 From ac23d3cac1f339febd95c403a245ae072dfd0e84 Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Tue, 21 Mar 2017 11:44:25 -0400 Subject: fjes: Do not load fjes driver if system does not have extended socket device. The fjes driver is used only by FUJITSU servers and almost of all servers in the world never use it. But currently if ACPI PNP0C02 is defined in the ACPI table, the following message is always shown: "FUJITSU Extended Socket Network Device Driver - version 1.2 - Copyright (c) 2015 FUJITSU LIMITED" The message makes users confused because there is no reason that the message is shown in other vendor servers. To avoid the confusion, the patch adds a check that the server has a extended socket device or not. Signed-off-by: Yasuaki Ishimatsu CC: Taku Izumi Signed-off-by: David S. Miller --- drivers/net/fjes/fjes_main.c | 52 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index c4b3c4b77a9c..7b589649ab46 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -45,6 +45,8 @@ MODULE_DESCRIPTION("FUJITSU Extended Socket Network Device Driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); +#define ACPI_MOTHERBOARD_RESOURCE_HID "PNP0C02" + static int fjes_request_irq(struct fjes_adapter *); static void fjes_free_irq(struct fjes_adapter *); @@ -78,7 +80,7 @@ static void fjes_rx_irq(struct fjes_adapter *, int); static int fjes_poll(struct napi_struct *, int); static const struct acpi_device_id fjes_acpi_ids[] = { - {"PNP0C02", 0}, + {ACPI_MOTHERBOARD_RESOURCE_HID, 0}, {"", 0}, }; MODULE_DEVICE_TABLE(acpi, fjes_acpi_ids); @@ -115,18 +117,17 @@ static struct resource fjes_resource[] = { }, }; -static int fjes_acpi_add(struct acpi_device *device) +static bool is_extended_socket_device(struct acpi_device *device) { struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL}; char str_buf[sizeof(FJES_ACPI_SYMBOL) + 1]; - struct platform_device *plat_dev; union acpi_object *str; acpi_status status; int result; status = acpi_evaluate_object(device->handle, "_STR", NULL, &buffer); if (ACPI_FAILURE(status)) - return -ENODEV; + return false; str = buffer.pointer; result = utf16s_to_utf8s((wchar_t *)str->string.pointer, @@ -136,10 +137,21 @@ static int fjes_acpi_add(struct acpi_device *device) if (strncmp(FJES_ACPI_SYMBOL, str_buf, strlen(FJES_ACPI_SYMBOL)) != 0) { kfree(buffer.pointer); - return -ENODEV; + return false; } kfree(buffer.pointer); + return true; +} + +static int fjes_acpi_add(struct acpi_device *device) +{ + struct platform_device *plat_dev; + acpi_status status; + + if (!is_extended_socket_device(device)) + return -ENODEV; + status = acpi_walk_resources(device->handle, METHOD_NAME__CRS, fjes_get_acpi_resource, fjes_resource); if (ACPI_FAILURE(status)) @@ -1473,11 +1485,41 @@ static void fjes_watch_unshare_task(struct work_struct *work) } } +static acpi_status +acpi_find_extended_socket_device(acpi_handle obj_handle, u32 level, + void *context, void **return_value) +{ + struct acpi_device *device; + bool *found = context; + int result; + + result = acpi_bus_get_device(obj_handle, &device); + if (result) + return AE_OK; + + if (strcmp(acpi_device_hid(device), ACPI_MOTHERBOARD_RESOURCE_HID)) + return AE_OK; + + if (!is_extended_socket_device(device)) + return AE_OK; + + *found = true; + return AE_CTRL_TERMINATE; +} + /* fjes_init_module - Driver Registration Routine */ static int __init fjes_init_module(void) { + bool found = false; int result; + acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, + acpi_find_extended_socket_device, NULL, &found, + NULL); + + if (!found) + return -ENODEV; + pr_info("%s - version %s - %s\n", fjes_driver_string, fjes_driver_version, fjes_copyright); -- cgit v1.2.3 From 2b396d302650f1ebb770ed758ddcf5a64328ffd5 Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Tue, 21 Mar 2017 11:46:35 -0400 Subject: fjes: Do not load fjes driver if extended socket device is not power on. The extended device socket cannot turn on/off while system is running. So when system boots up and the device is not power on, the fjes driver does not need be loaded. To check the status of the device, the patch adds ACPI _STA method check. Signed-off-by: Yasuaki Ishimatsu CC: Taku Izumi Signed-off-by: David S. Miller --- drivers/net/fjes/fjes_main.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index 7b589649ab46..ae48c809bac9 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -144,6 +144,24 @@ static bool is_extended_socket_device(struct acpi_device *device) return true; } +static int acpi_check_extended_socket_status(struct acpi_device *device) +{ + unsigned long long sta; + acpi_status status; + + status = acpi_evaluate_integer(device->handle, "_STA", NULL, &sta); + if (ACPI_FAILURE(status)) + return -ENODEV; + + if (!((sta & ACPI_STA_DEVICE_PRESENT) && + (sta & ACPI_STA_DEVICE_ENABLED) && + (sta & ACPI_STA_DEVICE_UI) && + (sta & ACPI_STA_DEVICE_FUNCTIONING))) + return -ENODEV; + + return 0; +} + static int fjes_acpi_add(struct acpi_device *device) { struct platform_device *plat_dev; @@ -152,6 +170,9 @@ static int fjes_acpi_add(struct acpi_device *device) if (!is_extended_socket_device(device)) return -ENODEV; + if (acpi_check_extended_socket_status(device)) + return -ENODEV; + status = acpi_walk_resources(device->handle, METHOD_NAME__CRS, fjes_get_acpi_resource, fjes_resource); if (ACPI_FAILURE(status)) @@ -1503,6 +1524,9 @@ acpi_find_extended_socket_device(acpi_handle obj_handle, u32 level, if (!is_extended_socket_device(device)) return AE_OK; + if (acpi_check_extended_socket_status(device)) + return AE_OK; + *found = true; return AE_CTRL_TERMINATE; } -- cgit v1.2.3 From d515684d78148884d5fc425ba904c50f03844020 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Tue, 21 Mar 2017 17:14:27 +0100 Subject: ipv6: make sure to initialize sockc.tsflags before first use In the case udp_sk(sk)->pending is AF_INET6, udpv6_sendmsg() would jump to do_append_data, skipping the initialization of sockc.tsflags. Fix the problem by moving sockc.tsflags initialization earlier. The bug was detected with KMSAN. Fixes: c14ac9451c34 ("sock: enable timestamping using control messages") Signed-off-by: Alexander Potapenko Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv6/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 4e4c401e3bc6..e28082f0a307 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1035,6 +1035,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc6.hlimit = -1; ipc6.tclass = -1; ipc6.dontfrag = -1; + sockc.tsflags = sk->sk_tsflags; /* destination address check */ if (sin6) { @@ -1159,7 +1160,6 @@ do_udp_sendmsg: fl6.flowi6_mark = sk->sk_mark; fl6.flowi6_uid = sk->sk_uid; - sockc.tsflags = sk->sk_tsflags; if (msg->msg_controllen) { opt = &opt_space; -- cgit v1.2.3 From 31739eae738ccbe8b9d627c3f2251017ca03f4d2 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Tue, 21 Mar 2017 14:01:06 -0700 Subject: net: bcmgenet: remove bcmgenet_internal_phy_setup() Commit 6ac3ce8295e6 ("net: bcmgenet: Remove excessive PHY reset") removed the bcmgenet_mii_reset() function from bcmgenet_power_up() and bcmgenet_internal_phy_setup() functions. In so doing it broke the reset of the internal PHY devices used by the GENETv1-GENETv3 which required this reset before the UniMAC was enabled. It also broke the internal GPHY devices used by the GENETv4 because the config_init that installed the AFE workaround was no longer occurring after the reset of the GPHY performed by bcmgenet_phy_power_set() in bcmgenet_internal_phy_setup(). In addition the code in bcmgenet_internal_phy_setup() related to the "enable APD" comment goes with the bcmgenet_mii_reset() so it should have also been removed. Commit bd4060a6108b ("net: bcmgenet: Power on integrated GPHY in bcmgenet_power_up()") moved the bcmgenet_phy_power_set() call to the bcmgenet_power_up() function, but failed to remove it from the bcmgenet_internal_phy_setup() function. Had it done so, the bcmgenet_internal_phy_setup() function would have been empty and could have been removed at that time. Commit 5dbebbb44a6a ("net: bcmgenet: Software reset EPHY after power on") was submitted to correct the functional problems introduced by commit 6ac3ce8295e6 ("net: bcmgenet: Remove excessive PHY reset"). It was included in v4.4 and made available on 4.3-stable. Unfortunately, it didn't fully revert the commit because this bcmgenet_mii_reset() doesn't apply the soft reset to the internal GPHY used by GENETv4 like the previous one did. This prevents the restoration of the AFE work- arounds for internal GPHY devices after the bcmgenet_phy_power_set() in bcmgenet_internal_phy_setup(). This commit takes the alternate approach of removing the unnecessary bcmgenet_internal_phy_setup() function which shouldn't have been in v4.3 so that when bcmgenet_mii_reset() was restored it should have only gone into bcmgenet_power_up(). This will avoid the problems while also removing the redundancy (and hopefully some of the confusion). Fixes: 6ac3ce8295e6 ("net: bcmgenet: Remove excessive PHY reset") Signed-off-by: Doug Berger Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmmii.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index e87607621e62..2f9281936f0e 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -220,20 +220,6 @@ void bcmgenet_phy_power_set(struct net_device *dev, bool enable) udelay(60); } -static void bcmgenet_internal_phy_setup(struct net_device *dev) -{ - struct bcmgenet_priv *priv = netdev_priv(dev); - u32 reg; - - /* Power up PHY */ - bcmgenet_phy_power_set(dev, true); - /* enable APD */ - reg = bcmgenet_ext_readl(priv, EXT_EXT_PWR_MGMT); - reg |= EXT_PWR_DN_EN_LD; - bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT); - bcmgenet_mii_reset(dev); -} - static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv) { u32 reg; @@ -281,7 +267,6 @@ int bcmgenet_mii_config(struct net_device *dev) if (priv->internal_phy) { phy_name = "internal PHY"; - bcmgenet_internal_phy_setup(dev); } else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) { phy_name = "MoCA"; bcmgenet_moca_phy_setup(priv); -- cgit v1.2.3 From dd1ef79120e1600cb48320cf80a612ee6510110c Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan Date: Tue, 21 Mar 2017 15:07:48 -0700 Subject: enic: update enic maintainers update enic maintainers Signed-off-by: Govindarajulu Varadarajan Signed-off-by: David S. Miller --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 078c38217daa..c45c02bc6082 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3216,7 +3216,6 @@ F: drivers/platform/chrome/ CISCO VIC ETHERNET NIC DRIVER M: Christian Benvenuti -M: Sujith Sankar M: Govindarajulu Varadarajan <_govind@gmx.com> M: Neel Patel S: Supported -- cgit v1.2.3 From 8c290e60fa2a51806159522331c9ed41252a8fb3 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 21 Mar 2017 19:05:04 -0700 Subject: bpf: fix hashmap extra_elems logic In both kmalloc and prealloc mode the bpf_map_update_elem() is using per-cpu extra_elems to do atomic update when the map is full. There are two issues with it. The logic can be misused, since it allows max_entries+num_cpus elements to be present in the map. And alloc_extra_elems() at map creation time can fail percpu alloc for large map values with a warn: WARNING: CPU: 3 PID: 2752 at ../mm/percpu.c:892 pcpu_alloc+0x119/0xa60 illegal size (32824) or align (8) for percpu allocation The fixes for both of these issues are different for kmalloc and prealloc modes. For prealloc mode allocate extra num_possible_cpus elements and store their pointers into extra_elems array instead of actual elements. Hence we can use these hidden(spare) elements not only when the map is full but during bpf_map_update_elem() that replaces existing element too. That also improves performance, since pcpu_freelist_pop/push is avoided. Unfortunately this approach cannot be used for kmalloc mode which needs to kfree elements after rcu grace period. Therefore switch it back to normal kmalloc even when full and old element exists like it was prior to commit 6c9059817432 ("bpf: pre-allocate hash map elements"). Add tests to check for over max_entries and large map values. Reported-by: Dave Jones Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements") Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- kernel/bpf/hashtab.c | 144 ++++++++++++++++---------------- tools/testing/selftests/bpf/test_maps.c | 29 ++++++- 2 files changed, 97 insertions(+), 76 deletions(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index afe5bab376c9..361a69dfe543 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -30,18 +30,12 @@ struct bpf_htab { struct pcpu_freelist freelist; struct bpf_lru lru; }; - void __percpu *extra_elems; + struct htab_elem *__percpu *extra_elems; atomic_t count; /* number of elements in this hashtable */ u32 n_buckets; /* number of hash buckets */ u32 elem_size; /* size of each element in bytes */ }; -enum extra_elem_state { - HTAB_NOT_AN_EXTRA_ELEM = 0, - HTAB_EXTRA_ELEM_FREE, - HTAB_EXTRA_ELEM_USED -}; - /* each htab element is struct htab_elem + key + value */ struct htab_elem { union { @@ -56,7 +50,6 @@ struct htab_elem { }; union { struct rcu_head rcu; - enum extra_elem_state state; struct bpf_lru_node lru_node; }; u32 hash; @@ -77,6 +70,11 @@ static bool htab_is_percpu(const struct bpf_htab *htab) htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; } +static bool htab_is_prealloc(const struct bpf_htab *htab) +{ + return !(htab->map.map_flags & BPF_F_NO_PREALLOC); +} + static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size, void __percpu *pptr) { @@ -128,17 +126,20 @@ static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key, static int prealloc_init(struct bpf_htab *htab) { + u32 num_entries = htab->map.max_entries; int err = -ENOMEM, i; - htab->elems = bpf_map_area_alloc(htab->elem_size * - htab->map.max_entries); + if (!htab_is_percpu(htab) && !htab_is_lru(htab)) + num_entries += num_possible_cpus(); + + htab->elems = bpf_map_area_alloc(htab->elem_size * num_entries); if (!htab->elems) return -ENOMEM; if (!htab_is_percpu(htab)) goto skip_percpu_elems; - for (i = 0; i < htab->map.max_entries; i++) { + for (i = 0; i < num_entries; i++) { u32 size = round_up(htab->map.value_size, 8); void __percpu *pptr; @@ -166,11 +167,11 @@ skip_percpu_elems: if (htab_is_lru(htab)) bpf_lru_populate(&htab->lru, htab->elems, offsetof(struct htab_elem, lru_node), - htab->elem_size, htab->map.max_entries); + htab->elem_size, num_entries); else pcpu_freelist_populate(&htab->freelist, htab->elems + offsetof(struct htab_elem, fnode), - htab->elem_size, htab->map.max_entries); + htab->elem_size, num_entries); return 0; @@ -191,16 +192,22 @@ static void prealloc_destroy(struct bpf_htab *htab) static int alloc_extra_elems(struct bpf_htab *htab) { - void __percpu *pptr; + struct htab_elem *__percpu *pptr, *l_new; + struct pcpu_freelist_node *l; int cpu; - pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN); + pptr = __alloc_percpu_gfp(sizeof(struct htab_elem *), 8, + GFP_USER | __GFP_NOWARN); if (!pptr) return -ENOMEM; for_each_possible_cpu(cpu) { - ((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state = - HTAB_EXTRA_ELEM_FREE; + l = pcpu_freelist_pop(&htab->freelist); + /* pop will succeed, since prealloc_init() + * preallocated extra num_possible_cpus elements + */ + l_new = container_of(l, struct htab_elem, fnode); + *per_cpu_ptr(pptr, cpu) = l_new; } htab->extra_elems = pptr; return 0; @@ -342,25 +349,25 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) raw_spin_lock_init(&htab->buckets[i].lock); } - if (!percpu && !lru) { - /* lru itself can remove the least used element, so - * there is no need for an extra elem during map_update. - */ - err = alloc_extra_elems(htab); - if (err) - goto free_buckets; - } - if (prealloc) { err = prealloc_init(htab); if (err) - goto free_extra_elems; + goto free_buckets; + + if (!percpu && !lru) { + /* lru itself can remove the least used element, so + * there is no need for an extra elem during map_update. + */ + err = alloc_extra_elems(htab); + if (err) + goto free_prealloc; + } } return &htab->map; -free_extra_elems: - free_percpu(htab->extra_elems); +free_prealloc: + prealloc_destroy(htab); free_buckets: bpf_map_area_free(htab->buckets); free_htab: @@ -575,12 +582,7 @@ static void htab_elem_free_rcu(struct rcu_head *head) static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) { - if (l->state == HTAB_EXTRA_ELEM_USED) { - l->state = HTAB_EXTRA_ELEM_FREE; - return; - } - - if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) { + if (htab_is_prealloc(htab)) { pcpu_freelist_push(&htab->freelist, &l->fnode); } else { atomic_dec(&htab->count); @@ -610,47 +612,43 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr, static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, void *value, u32 key_size, u32 hash, bool percpu, bool onallcpus, - bool old_elem_exists) + struct htab_elem *old_elem) { u32 size = htab->map.value_size; - bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC); - struct htab_elem *l_new; + bool prealloc = htab_is_prealloc(htab); + struct htab_elem *l_new, **pl_new; void __percpu *pptr; - int err = 0; if (prealloc) { - struct pcpu_freelist_node *l; + if (old_elem) { + /* if we're updating the existing element, + * use per-cpu extra elems to avoid freelist_pop/push + */ + pl_new = this_cpu_ptr(htab->extra_elems); + l_new = *pl_new; + *pl_new = old_elem; + } else { + struct pcpu_freelist_node *l; - l = pcpu_freelist_pop(&htab->freelist); - if (!l) - err = -E2BIG; - else + l = pcpu_freelist_pop(&htab->freelist); + if (!l) + return ERR_PTR(-E2BIG); l_new = container_of(l, struct htab_elem, fnode); - } else { - if (atomic_inc_return(&htab->count) > htab->map.max_entries) { - atomic_dec(&htab->count); - err = -E2BIG; - } else { - l_new = kmalloc(htab->elem_size, - GFP_ATOMIC | __GFP_NOWARN); - if (!l_new) - return ERR_PTR(-ENOMEM); } - } - - if (err) { - if (!old_elem_exists) - return ERR_PTR(err); - - /* if we're updating the existing element and the hash table - * is full, use per-cpu extra elems - */ - l_new = this_cpu_ptr(htab->extra_elems); - if (l_new->state != HTAB_EXTRA_ELEM_FREE) - return ERR_PTR(-E2BIG); - l_new->state = HTAB_EXTRA_ELEM_USED; } else { - l_new->state = HTAB_NOT_AN_EXTRA_ELEM; + if (atomic_inc_return(&htab->count) > htab->map.max_entries) + if (!old_elem) { + /* when map is full and update() is replacing + * old element, it's ok to allocate, since + * old element will be freed immediately. + * Otherwise return an error + */ + atomic_dec(&htab->count); + return ERR_PTR(-E2BIG); + } + l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN); + if (!l_new) + return ERR_PTR(-ENOMEM); } memcpy(l_new->key, key, key_size); @@ -731,7 +729,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, goto err; l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, - !!l_old); + l_old); if (IS_ERR(l_new)) { /* all pre-allocated elements are in use or memory exhausted */ ret = PTR_ERR(l_new); @@ -744,7 +742,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, hlist_nulls_add_head_rcu(&l_new->hash_node, head); if (l_old) { hlist_nulls_del_rcu(&l_old->hash_node); - free_htab_elem(htab, l_old); + if (!htab_is_prealloc(htab)) + free_htab_elem(htab, l_old); } ret = 0; err: @@ -856,7 +855,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key, value, onallcpus); } else { l_new = alloc_htab_elem(htab, key, value, key_size, - hash, true, onallcpus, false); + hash, true, onallcpus, NULL); if (IS_ERR(l_new)) { ret = PTR_ERR(l_new); goto err; @@ -1024,8 +1023,7 @@ static void delete_all_elements(struct bpf_htab *htab) hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { hlist_nulls_del_rcu(&l->hash_node); - if (l->state != HTAB_EXTRA_ELEM_USED) - htab_elem_free(htab, l); + htab_elem_free(htab, l); } } } @@ -1045,7 +1043,7 @@ static void htab_map_free(struct bpf_map *map) * not have executed. Wait for them. */ rcu_barrier(); - if (htab->map.map_flags & BPF_F_NO_PREALLOC) + if (!htab_is_prealloc(htab)) delete_all_elements(htab); else prealloc_destroy(htab); diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index cada17ac00b8..a0aa2009b0e0 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -80,8 +80,9 @@ static void test_hashmap(int task, void *data) assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == 0); key = 2; assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0); - key = 1; - assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0); + key = 3; + assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 && + errno == E2BIG); /* Check that key = 0 doesn't exist. */ key = 0; @@ -110,6 +111,24 @@ static void test_hashmap(int task, void *data) close(fd); } +static void test_hashmap_sizes(int task, void *data) +{ + int fd, i, j; + + for (i = 1; i <= 512; i <<= 1) + for (j = 1; j <= 1 << 18; j <<= 1) { + fd = bpf_create_map(BPF_MAP_TYPE_HASH, i, j, + 2, map_flags); + if (fd < 0) { + printf("Failed to create hashmap key=%d value=%d '%s'\n", + i, j, strerror(errno)); + exit(1); + } + close(fd); + usleep(10); /* give kernel time to destroy */ + } +} + static void test_hashmap_percpu(int task, void *data) { unsigned int nr_cpus = bpf_num_possible_cpus(); @@ -317,7 +336,10 @@ static void test_arraymap_percpu(int task, void *data) static void test_arraymap_percpu_many_keys(void) { unsigned int nr_cpus = bpf_num_possible_cpus(); - unsigned int nr_keys = 20000; + /* nr_keys is not too large otherwise the test stresses percpu + * allocator more than anything else + */ + unsigned int nr_keys = 2000; long values[nr_cpus]; int key, fd, i; @@ -419,6 +441,7 @@ static void test_map_stress(void) { run_parallel(100, test_hashmap, NULL); run_parallel(100, test_hashmap_percpu, NULL); + run_parallel(100, test_hashmap_sizes, NULL); run_parallel(100, test_arraymap, NULL); run_parallel(100, test_arraymap_percpu, NULL); -- cgit v1.2.3 From c64c0b3cac4c5b8cb093727d2c19743ea3965c0b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 Mar 2017 19:22:28 -0700 Subject: ipv4: provide stronger user input validation in nl_fib_input() Alexander reported a KMSAN splat caused by reads of uninitialized field (tb_id_in) from user provided struct fib_result_nl It turns out nl_fib_input() sanity tests on user input is a bit wrong : User can pretend nlh->nlmsg_len is big enough, but provide at sendmsg() time a too small buffer. Reported-by: Alexander Potapenko Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 42bfd08109dd..8f2133ffc2ff 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1083,7 +1083,8 @@ static void nl_fib_input(struct sk_buff *skb) net = sock_net(skb->sk); nlh = nlmsg_hdr(skb); - if (skb->len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len || + if (skb->len < nlmsg_total_size(sizeof(*frn)) || + skb->len < nlh->nlmsg_len || nlmsg_len(nlh) < sizeof(*frn)) return; -- cgit v1.2.3 From a97e50cc4cb67e1e7bff56f6b41cda62ca832336 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 22 Mar 2017 13:08:08 +0100 Subject: socket, bpf: fix sk_filter use after free in sk_clone_lock In sk_clone_lock(), we create a new socket and inherit most of the parent's members via sock_copy() which memcpy()'s various sections. Now, in case the parent socket had a BPF socket filter attached, then newsk->sk_filter points to the same instance as the original sk->sk_filter. sk_filter_charge() is then called on the newsk->sk_filter to take a reference and should that fail due to hitting max optmem, we bail out and release the newsk instance. The issue is that commit 278571baca2a ("net: filter: simplify socket charging") wrongly combined the dismantle path with the failure path of xfrm_sk_clone_policy(). This means, even when charging failed, we call sk_free_unlock_clone() on the newsk, which then still points to the same sk_filter as the original sk. Thus, sk_free_unlock_clone() calls into __sk_destruct() eventually where it tests for present sk_filter and calls sk_filter_uncharge() on it, which potentially lets sk_omem_alloc wrap around and releases the eBPF prog and sk_filter structure from the (still intact) parent. Fix it by making sure that when sk_filter_charge() failed, we reset newsk->sk_filter back to NULL before passing to sk_free_unlock_clone(), so that we don't mess with the parents sk_filter. Only if xfrm_sk_clone_policy() fails, we did reach the point where either the parent's filter was NULL and as a result newsk's as well or where we previously had a successful sk_filter_charge(), thus for that case, we do need sk_filter_uncharge() to release the prior taken reference on sk_filter. Fixes: 278571baca2a ("net: filter: simplify socket charging") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/core/sock.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/core/sock.c b/net/core/sock.c index acb0d4137499..2c4f574168fb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1544,6 +1544,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) is_charged = sk_filter_charge(newsk, filter); if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { + /* We need to make sure that we don't uncharge the new + * socket if we couldn't charge it in the first place + * as otherwise we uncharge the parent's filter. + */ + if (!is_charged) + RCU_INIT_POINTER(newsk->sk_filter, NULL); sk_free_unlock_clone(newsk); newsk = NULL; goto out; -- cgit v1.2.3 From 1d2a6a5e4bf2921531071fcff8538623dce74efa Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 22 Mar 2017 16:08:33 +0100 Subject: genetlink: fix counting regression on ctrl_dumpfamily() Commit 2ae0f17df1cd ("genetlink: use idr to track families") replaced if (++n < fams_to_skip) continue; into: if (n++ < fams_to_skip) continue; This subtle change cause that on retry ctrl_dumpfamily() call we omit one family that failed to do ctrl_fill_info() on previous call, because cb->args[0] = n number counts also family that failed to do ctrl_fill_info(). Patch fixes the problem and avoid confusion in the future just decrease n counter when ctrl_fill_info() fail. User visible problem caused by this bug is failure to get access to some genetlink family i.e. nl80211. However problem is reproducible only if number of registered genetlink families is big enough to cause second call of ctrl_dumpfamily(). Cc: Xose Vazquez Perez Cc: Larry Finger Cc: Johannes Berg Fixes: 2ae0f17df1cd ("genetlink: use idr to track families") Signed-off-by: Stanislaw Gruszka Acked-by: Johannes Berg Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index fb6e10fdb217..92e0981f7404 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -783,8 +783,10 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - skb, CTRL_CMD_NEWFAMILY) < 0) + skb, CTRL_CMD_NEWFAMILY) < 0) { + n--; break; + } } cb->args[0] = n; -- cgit v1.2.3 From 15bb7745e94a665caf42bfaabf0ce062845b533b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Mar 2017 08:10:21 -0700 Subject: tcp: initialize icsk_ack.lrcvtime at session start time icsk_ack.lrcvtime has a 0 value at socket creation time. tcpi_last_data_recv can have bogus value if no payload is ever received. This patch initializes icsk_ack.lrcvtime for active sessions in tcp_finish_connect(), and for passive sessions in tcp_create_openreq_child() Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_minisocks.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 39c393cc0fd3..c43119726a62 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5541,6 +5541,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) struct inet_connection_sock *icsk = inet_csk(sk); tcp_set_state(sk, TCP_ESTABLISHED); + icsk->icsk_ack.lrcvtime = tcp_time_stamp; if (skb) { icsk->icsk_af_ops->sk_rx_dst_set(sk, skb); @@ -5759,7 +5760,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * to stand against the temptation 8) --ANK */ inet_csk_schedule_ack(sk); - icsk->icsk_ack.lrcvtime = tcp_time_stamp; tcp_enter_quickack_mode(sk); inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX, TCP_RTO_MAX); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 7e16243cdb58..65c0f3d13eca 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -460,6 +460,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); minmax_reset(&newtp->rtt_min, tcp_time_stamp, ~0U); newicsk->icsk_rto = TCP_TIMEOUT_INIT; + newicsk->icsk_ack.lrcvtime = tcp_time_stamp; newtp->packets_out = 0; newtp->retrans_out = 0; -- cgit v1.2.3 From ec4fbd64751de18729eaa816ec69e4b504b5a7a2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Mar 2017 08:57:15 -0700 Subject: inet: frag: release spinlock before calling icmp_send() Dmitry reported a lockdep splat [1] (false positive) that we can fix by releasing the spinlock before calling icmp_send() from ip_expire() This is a false positive because sending an ICMP message can not possibly re-enter the IP frag engine. [1] [ INFO: possible circular locking dependency detected ] 4.10.0+ #29 Not tainted ------------------------------------------------------- modprobe/12392 is trying to acquire lock: (_xmit_ETHER#2){+.-...}, at: [] spin_lock include/linux/spinlock.h:299 [inline] (_xmit_ETHER#2){+.-...}, at: [] __netif_tx_lock include/linux/netdevice.h:3486 [inline] (_xmit_ETHER#2){+.-...}, at: [] sch_direct_xmit+0x282/0x6d0 net/sched/sch_generic.c:180 but task is already holding lock: (&(&q->lock)->rlock){+.-...}, at: [] spin_lock include/linux/spinlock.h:299 [inline] (&(&q->lock)->rlock){+.-...}, at: [] ip_expire+0x51/0x6c0 net/ipv4/ip_fragment.c:201 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&(&q->lock)->rlock){+.-...}: validate_chain kernel/locking/lockdep.c:2267 [inline] __lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340 lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline] _raw_spin_lock+0x33/0x50 kernel/locking/spinlock.c:151 spin_lock include/linux/spinlock.h:299 [inline] ip_defrag+0x3a2/0x4130 net/ipv4/ip_fragment.c:669 ip_check_defrag+0x4e3/0x8b0 net/ipv4/ip_fragment.c:713 packet_rcv_fanout+0x282/0x800 net/packet/af_packet.c:1459 deliver_skb net/core/dev.c:1834 [inline] dev_queue_xmit_nit+0x294/0xa90 net/core/dev.c:1890 xmit_one net/core/dev.c:2903 [inline] dev_hard_start_xmit+0x16b/0xab0 net/core/dev.c:2923 sch_direct_xmit+0x31f/0x6d0 net/sched/sch_generic.c:182 __dev_xmit_skb net/core/dev.c:3092 [inline] __dev_queue_xmit+0x13e5/0x1e60 net/core/dev.c:3358 dev_queue_xmit+0x17/0x20 net/core/dev.c:3423 neigh_resolve_output+0x6b9/0xb10 net/core/neighbour.c:1308 neigh_output include/net/neighbour.h:478 [inline] ip_finish_output2+0x8b8/0x15a0 net/ipv4/ip_output.c:228 ip_do_fragment+0x1d93/0x2720 net/ipv4/ip_output.c:672 ip_fragment.constprop.54+0x145/0x200 net/ipv4/ip_output.c:545 ip_finish_output+0x82d/0xe10 net/ipv4/ip_output.c:314 NF_HOOK_COND include/linux/netfilter.h:246 [inline] ip_output+0x1f0/0x7a0 net/ipv4/ip_output.c:404 dst_output include/net/dst.h:486 [inline] ip_local_out+0x95/0x170 net/ipv4/ip_output.c:124 ip_send_skb+0x3c/0xc0 net/ipv4/ip_output.c:1492 ip_push_pending_frames+0x64/0x80 net/ipv4/ip_output.c:1512 raw_sendmsg+0x26de/0x3a00 net/ipv4/raw.c:655 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:761 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg+0xca/0x110 net/socket.c:643 ___sys_sendmsg+0x4a3/0x9f0 net/socket.c:1985 __sys_sendmmsg+0x25c/0x750 net/socket.c:2075 SYSC_sendmmsg net/socket.c:2106 [inline] SyS_sendmmsg+0x35/0x60 net/socket.c:2101 do_syscall_64+0x2e8/0x930 arch/x86/entry/common.c:281 return_from_SYSCALL_64+0x0/0x7a -> #0 (_xmit_ETHER#2){+.-...}: check_prev_add kernel/locking/lockdep.c:1830 [inline] check_prevs_add+0xa8f/0x19f0 kernel/locking/lockdep.c:1940 validate_chain kernel/locking/lockdep.c:2267 [inline] __lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340 lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline] _raw_spin_lock+0x33/0x50 kernel/locking/spinlock.c:151 spin_lock include/linux/spinlock.h:299 [inline] __netif_tx_lock include/linux/netdevice.h:3486 [inline] sch_direct_xmit+0x282/0x6d0 net/sched/sch_generic.c:180 __dev_xmit_skb net/core/dev.c:3092 [inline] __dev_queue_xmit+0x13e5/0x1e60 net/core/dev.c:3358 dev_queue_xmit+0x17/0x20 net/core/dev.c:3423 neigh_hh_output include/net/neighbour.h:468 [inline] neigh_output include/net/neighbour.h:476 [inline] ip_finish_output2+0xf6c/0x15a0 net/ipv4/ip_output.c:228 ip_finish_output+0xa29/0xe10 net/ipv4/ip_output.c:316 NF_HOOK_COND include/linux/netfilter.h:246 [inline] ip_output+0x1f0/0x7a0 net/ipv4/ip_output.c:404 dst_output include/net/dst.h:486 [inline] ip_local_out+0x95/0x170 net/ipv4/ip_output.c:124 ip_send_skb+0x3c/0xc0 net/ipv4/ip_output.c:1492 ip_push_pending_frames+0x64/0x80 net/ipv4/ip_output.c:1512 icmp_push_reply+0x372/0x4d0 net/ipv4/icmp.c:394 icmp_send+0x156c/0x1c80 net/ipv4/icmp.c:754 ip_expire+0x40e/0x6c0 net/ipv4/ip_fragment.c:239 call_timer_fn+0x241/0x820 kernel/time/timer.c:1268 expire_timers kernel/time/timer.c:1307 [inline] __run_timers+0x960/0xcf0 kernel/time/timer.c:1601 run_timer_softirq+0x21/0x80 kernel/time/timer.c:1614 __do_softirq+0x31f/0xbe7 kernel/softirq.c:284 invoke_softirq kernel/softirq.c:364 [inline] irq_exit+0x1cc/0x200 kernel/softirq.c:405 exiting_irq arch/x86/include/asm/apic.h:657 [inline] smp_apic_timer_interrupt+0x76/0xa0 arch/x86/kernel/apic/apic.c:962 apic_timer_interrupt+0x93/0xa0 arch/x86/entry/entry_64.S:707 __read_once_size include/linux/compiler.h:254 [inline] atomic_read arch/x86/include/asm/atomic.h:26 [inline] rcu_dynticks_curr_cpu_in_eqs kernel/rcu/tree.c:350 [inline] __rcu_is_watching kernel/rcu/tree.c:1133 [inline] rcu_is_watching+0x83/0x110 kernel/rcu/tree.c:1147 rcu_read_lock_held+0x87/0xc0 kernel/rcu/update.c:293 radix_tree_deref_slot include/linux/radix-tree.h:238 [inline] filemap_map_pages+0x6d4/0x1570 mm/filemap.c:2335 do_fault_around mm/memory.c:3231 [inline] do_read_fault mm/memory.c:3265 [inline] do_fault+0xbd5/0x2080 mm/memory.c:3370 handle_pte_fault mm/memory.c:3600 [inline] __handle_mm_fault+0x1062/0x2cb0 mm/memory.c:3714 handle_mm_fault+0x1e2/0x480 mm/memory.c:3751 __do_page_fault+0x4f6/0xb60 arch/x86/mm/fault.c:1397 do_page_fault+0x54/0x70 arch/x86/mm/fault.c:1460 page_fault+0x28/0x30 arch/x86/entry/entry_64.S:1011 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&(&q->lock)->rlock); lock(_xmit_ETHER#2); lock(&(&q->lock)->rlock); lock(_xmit_ETHER#2); *** DEADLOCK *** 10 locks held by modprobe/12392: #0: (&mm->mmap_sem){++++++}, at: [] __do_page_fault+0x2b8/0xb60 arch/x86/mm/fault.c:1336 #1: (rcu_read_lock){......}, at: [] filemap_map_pages+0x1e6/0x1570 mm/filemap.c:2324 #2: (&(ptlock_ptr(page))->rlock#2){+.+...}, at: [] spin_lock include/linux/spinlock.h:299 [inline] #2: (&(ptlock_ptr(page))->rlock#2){+.+...}, at: [] pte_alloc_one_map mm/memory.c:2944 [inline] #2: (&(ptlock_ptr(page))->rlock#2){+.+...}, at: [] alloc_set_pte+0x13b8/0x1b90 mm/memory.c:3072 #3: (((&q->timer))){+.-...}, at: [] lockdep_copy_map include/linux/lockdep.h:175 [inline] #3: (((&q->timer))){+.-...}, at: [] call_timer_fn+0x1c2/0x820 kernel/time/timer.c:1258 #4: (&(&q->lock)->rlock){+.-...}, at: [] spin_lock include/linux/spinlock.h:299 [inline] #4: (&(&q->lock)->rlock){+.-...}, at: [] ip_expire+0x51/0x6c0 net/ipv4/ip_fragment.c:201 #5: (rcu_read_lock){......}, at: [] ip_expire+0x1b3/0x6c0 net/ipv4/ip_fragment.c:216 #6: (slock-AF_INET){+.-...}, at: [] spin_trylock include/linux/spinlock.h:309 [inline] #6: (slock-AF_INET){+.-...}, at: [] icmp_xmit_lock net/ipv4/icmp.c:219 [inline] #6: (slock-AF_INET){+.-...}, at: [] icmp_send+0x803/0x1c80 net/ipv4/icmp.c:681 #7: (rcu_read_lock_bh){......}, at: [] ip_finish_output2+0x2c1/0x15a0 net/ipv4/ip_output.c:198 #8: (rcu_read_lock_bh){......}, at: [] __dev_queue_xmit+0x23e/0x1e60 net/core/dev.c:3324 #9: (dev->qdisc_running_key ?: &qdisc_running_key){+.....}, at: [] dev_queue_xmit+0x17/0x20 net/core/dev.c:3423 stack backtrace: CPU: 0 PID: 12392 Comm: modprobe Not tainted 4.10.0+ #29 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x2ee/0x3ef lib/dump_stack.c:52 print_circular_bug+0x307/0x3b0 kernel/locking/lockdep.c:1204 check_prev_add kernel/locking/lockdep.c:1830 [inline] check_prevs_add+0xa8f/0x19f0 kernel/locking/lockdep.c:1940 validate_chain kernel/locking/lockdep.c:2267 [inline] __lock_acquire+0x2149/0x3430 kernel/locking/lockdep.c:3340 lock_acquire+0x2a1/0x630 kernel/locking/lockdep.c:3755 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline] _raw_spin_lock+0x33/0x50 kernel/locking/spinlock.c:151 spin_lock include/linux/spinlock.h:299 [inline] __netif_tx_lock include/linux/netdevice.h:3486 [inline] sch_direct_xmit+0x282/0x6d0 net/sched/sch_generic.c:180 __dev_xmit_skb net/core/dev.c:3092 [inline] __dev_queue_xmit+0x13e5/0x1e60 net/core/dev.c:3358 dev_queue_xmit+0x17/0x20 net/core/dev.c:3423 neigh_hh_output include/net/neighbour.h:468 [inline] neigh_output include/net/neighbour.h:476 [inline] ip_finish_output2+0xf6c/0x15a0 net/ipv4/ip_output.c:228 ip_finish_output+0xa29/0xe10 net/ipv4/ip_output.c:316 NF_HOOK_COND include/linux/netfilter.h:246 [inline] ip_output+0x1f0/0x7a0 net/ipv4/ip_output.c:404 dst_output include/net/dst.h:486 [inline] ip_local_out+0x95/0x170 net/ipv4/ip_output.c:124 ip_send_skb+0x3c/0xc0 net/ipv4/ip_output.c:1492 ip_push_pending_frames+0x64/0x80 net/ipv4/ip_output.c:1512 icmp_push_reply+0x372/0x4d0 net/ipv4/icmp.c:394 icmp_send+0x156c/0x1c80 net/ipv4/icmp.c:754 ip_expire+0x40e/0x6c0 net/ipv4/ip_fragment.c:239 call_timer_fn+0x241/0x820 kernel/time/timer.c:1268 expire_timers kernel/time/timer.c:1307 [inline] __run_timers+0x960/0xcf0 kernel/time/timer.c:1601 run_timer_softirq+0x21/0x80 kernel/time/timer.c:1614 __do_softirq+0x31f/0xbe7 kernel/softirq.c:284 invoke_softirq kernel/softirq.c:364 [inline] irq_exit+0x1cc/0x200 kernel/softirq.c:405 exiting_irq arch/x86/include/asm/apic.h:657 [inline] smp_apic_timer_interrupt+0x76/0xa0 arch/x86/kernel/apic/apic.c:962 apic_timer_interrupt+0x93/0xa0 arch/x86/entry/entry_64.S:707 RIP: 0010:__read_once_size include/linux/compiler.h:254 [inline] RIP: 0010:atomic_read arch/x86/include/asm/atomic.h:26 [inline] RIP: 0010:rcu_dynticks_curr_cpu_in_eqs kernel/rcu/tree.c:350 [inline] RIP: 0010:__rcu_is_watching kernel/rcu/tree.c:1133 [inline] RIP: 0010:rcu_is_watching+0x83/0x110 kernel/rcu/tree.c:1147 RSP: 0000:ffff8801c391f120 EFLAGS: 00000a03 ORIG_RAX: ffffffffffffff10 RAX: dffffc0000000000 RBX: ffff8801c391f148 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 000055edd4374000 RDI: ffff8801dbe1ae0c RBP: ffff8801c391f1a0 R08: 0000000000000002 R09: 0000000000000000 R10: dffffc0000000000 R11: 0000000000000002 R12: 1ffff10038723e25 R13: ffff8801dbe1ae00 R14: ffff8801c391f680 R15: dffffc0000000000 rcu_read_lock_held+0x87/0xc0 kernel/rcu/update.c:293 radix_tree_deref_slot include/linux/radix-tree.h:238 [inline] filemap_map_pages+0x6d4/0x1570 mm/filemap.c:2335 do_fault_around mm/memory.c:3231 [inline] do_read_fault mm/memory.c:3265 [inline] do_fault+0xbd5/0x2080 mm/memory.c:3370 handle_pte_fault mm/memory.c:3600 [inline] __handle_mm_fault+0x1062/0x2cb0 mm/memory.c:3714 handle_mm_fault+0x1e2/0x480 mm/memory.c:3751 __do_page_fault+0x4f6/0xb60 arch/x86/mm/fault.c:1397 do_page_fault+0x54/0x70 arch/x86/mm/fault.c:1460 page_fault+0x28/0x30 arch/x86/entry/entry_64.S:1011 RIP: 0033:0x7f83172f2786 RSP: 002b:00007fffe859ae80 EFLAGS: 00010293 RAX: 000055edd4373040 RBX: 00007f83175111c8 RCX: 000055edd4373238 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00007f8317510970 RBP: 00007fffe859afd0 R08: 0000000000000009 R09: 0000000000000000 R10: 0000000000000064 R11: 0000000000000000 R12: 000055edd4373040 R13: 0000000000000000 R14: 00007fffe859afe8 R15: 0000000000000000 Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Signed-off-by: David S. Miller --- net/ipv4/ip_fragment.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index bbe7f72db9c1..b3cdeec85f1f 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -198,6 +198,7 @@ static void ip_expire(unsigned long arg) qp = container_of((struct inet_frag_queue *) arg, struct ipq, q); net = container_of(qp->q.net, struct net, ipv4.frags); + rcu_read_lock(); spin_lock(&qp->q.lock); if (qp->q.flags & INET_FRAG_COMPLETE) @@ -207,7 +208,7 @@ static void ip_expire(unsigned long arg) __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); if (!inet_frag_evicting(&qp->q)) { - struct sk_buff *head = qp->q.fragments; + struct sk_buff *clone, *head = qp->q.fragments; const struct iphdr *iph; int err; @@ -216,32 +217,40 @@ static void ip_expire(unsigned long arg) if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments) goto out; - rcu_read_lock(); head->dev = dev_get_by_index_rcu(net, qp->iif); if (!head->dev) - goto out_rcu_unlock; + goto out; + /* skb has no dst, perform route lookup again */ iph = ip_hdr(head); err = ip_route_input_noref(head, iph->daddr, iph->saddr, iph->tos, head->dev); if (err) - goto out_rcu_unlock; + goto out; /* Only an end host needs to send an ICMP * "Fragment Reassembly Timeout" message, per RFC792. */ if (frag_expire_skip_icmp(qp->user) && (skb_rtable(head)->rt_type != RTN_LOCAL)) - goto out_rcu_unlock; + goto out; + + clone = skb_clone(head, GFP_ATOMIC); /* Send an ICMP "Fragment Reassembly Timeout" message. */ - icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); -out_rcu_unlock: - rcu_read_unlock(); + if (clone) { + spin_unlock(&qp->q.lock); + icmp_send(clone, ICMP_TIME_EXCEEDED, + ICMP_EXC_FRAGTIME, 0); + consume_skb(clone); + goto out_rcu_unlock; + } } out: spin_unlock(&qp->q.lock); +out_rcu_unlock: + rcu_read_unlock(); ipq_put(qp); } -- cgit v1.2.3 From 6e9e6cc8f4e4f2cd67931510c9f39abf3d9e0d3b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 20 Mar 2017 15:31:10 -0700 Subject: Bluetooth: btqcomsmd: fix compile-test dependency compile-testing fails when QCOM_SMD is a loadable module: drivers/bluetooth/built-in.o: In function `btqcomsmd_send': btqca.c:(.text+0xa8): undefined reference to `qcom_smd_send' drivers/bluetooth/built-in.o: In function `btqcomsmd_probe': btqca.c:(.text+0x3ec): undefined reference to `qcom_wcnss_open_channel' btqca.c:(.text+0x46c): undefined reference to `qcom_smd_set_drvdata' This clarifies the dependency to allow compile-testing only when SMD is completely disabled, otherwise the dependency on QCOM_SMD will make sure we can link against it. Fixes: e27ee2b16bad ("Bluetooth: btqcomsmd: Allow driver to build if COMPILE_TEST is enabled") Signed-off-by: Arnd Bergmann [bjorn: Restructure and clarify dependency to QCOM_WCNSS_CTRL] Signed-off-by: Bjorn Andersson Acked-by: Marcel Holtmann Signed-off-by: David S. Miller --- drivers/bluetooth/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig index c2c14a12713b..08e054507d0b 100644 --- a/drivers/bluetooth/Kconfig +++ b/drivers/bluetooth/Kconfig @@ -344,7 +344,8 @@ config BT_WILINK config BT_QCOMSMD tristate "Qualcomm SMD based HCI support" - depends on (QCOM_SMD && QCOM_WCNSS_CTRL) || COMPILE_TEST + depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n) + depends on QCOM_WCNSS_CTRL || (COMPILE_TEST && QCOM_WCNSS_CTRL=n) select BT_QCA help Qualcomm SMD based HCI driver. -- cgit v1.2.3 From c04ca616eed02b9abe7afd311382c3ed5eef5c40 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 22 Mar 2017 12:10:02 +0300 Subject: sfc: cleanup a condition in efx_udp_tunnel_del() Presumably if there is an "add" function, there is also a "del" function. But it causes a static checker warning because it looks like a common cut and paste bug. Signed-off-by: Dan Carpenter Acked-by: Jarod Wilson Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 334bcc6df6b2..50d28261b6b9 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -2404,7 +2404,7 @@ static void efx_udp_tunnel_del(struct net_device *dev, struct udp_tunnel_info *t tnl.type = (u16)efx_tunnel_type; tnl.port = ti->port; - if (efx->type->udp_tnl_add_port) + if (efx->type->udp_tnl_del_port) (void)efx->type->udp_tnl_del_port(efx, tnl); } -- cgit v1.2.3 From f43feef4e6acde10857fcbfdede790d6b3f2c71d Mon Sep 17 00:00:00 2001 From: "Lendacky, Thomas" Date: Wed, 22 Mar 2017 17:25:27 -0500 Subject: amd-xgbe: Fix the ECC-related bit position definitions The ECC bit positions that describe whether the ECC interrupt is for Tx, Rx or descriptor memory and whether the it is a single correctable or double detected error were defined in incorrectly (reversed order). Fix the bit position definitions for these settings so that the proper ECC handling is performed. Signed-off-by: Tom Lendacky Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-common.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index 86f1626816ff..127adbeefb10 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -984,29 +984,29 @@ #define XP_ECC_CNT1_DESC_DED_WIDTH 8 #define XP_ECC_CNT1_DESC_SEC_INDEX 0 #define XP_ECC_CNT1_DESC_SEC_WIDTH 8 -#define XP_ECC_IER_DESC_DED_INDEX 0 +#define XP_ECC_IER_DESC_DED_INDEX 5 #define XP_ECC_IER_DESC_DED_WIDTH 1 -#define XP_ECC_IER_DESC_SEC_INDEX 1 +#define XP_ECC_IER_DESC_SEC_INDEX 4 #define XP_ECC_IER_DESC_SEC_WIDTH 1 -#define XP_ECC_IER_RX_DED_INDEX 2 +#define XP_ECC_IER_RX_DED_INDEX 3 #define XP_ECC_IER_RX_DED_WIDTH 1 -#define XP_ECC_IER_RX_SEC_INDEX 3 +#define XP_ECC_IER_RX_SEC_INDEX 2 #define XP_ECC_IER_RX_SEC_WIDTH 1 -#define XP_ECC_IER_TX_DED_INDEX 4 +#define XP_ECC_IER_TX_DED_INDEX 1 #define XP_ECC_IER_TX_DED_WIDTH 1 -#define XP_ECC_IER_TX_SEC_INDEX 5 +#define XP_ECC_IER_TX_SEC_INDEX 0 #define XP_ECC_IER_TX_SEC_WIDTH 1 -#define XP_ECC_ISR_DESC_DED_INDEX 0 +#define XP_ECC_ISR_DESC_DED_INDEX 5 #define XP_ECC_ISR_DESC_DED_WIDTH 1 -#define XP_ECC_ISR_DESC_SEC_INDEX 1 +#define XP_ECC_ISR_DESC_SEC_INDEX 4 #define XP_ECC_ISR_DESC_SEC_WIDTH 1 -#define XP_ECC_ISR_RX_DED_INDEX 2 +#define XP_ECC_ISR_RX_DED_INDEX 3 #define XP_ECC_ISR_RX_DED_WIDTH 1 -#define XP_ECC_ISR_RX_SEC_INDEX 3 +#define XP_ECC_ISR_RX_SEC_INDEX 2 #define XP_ECC_ISR_RX_SEC_WIDTH 1 -#define XP_ECC_ISR_TX_DED_INDEX 4 +#define XP_ECC_ISR_TX_DED_INDEX 1 #define XP_ECC_ISR_TX_DED_WIDTH 1 -#define XP_ECC_ISR_TX_SEC_INDEX 5 +#define XP_ECC_ISR_TX_SEC_INDEX 0 #define XP_ECC_ISR_TX_SEC_WIDTH 1 #define XP_I2C_MUTEX_BUSY_INDEX 31 #define XP_I2C_MUTEX_BUSY_WIDTH 1 -- cgit v1.2.3 From 68c386590375b2aea5a3154f17882a30170707bf Mon Sep 17 00:00:00 2001 From: Pavel Belous Date: Thu, 23 Mar 2017 02:20:39 +0300 Subject: net:ethernet:aquantia: Fix for RX checksum offload. Since AQC-100/107/108 chips supports hardware checksums for RX we should indicate this via NETIF_F_RXCSUM flag. v1->v2: 'Signed-off-by' tag added. Signed-off-by: Pavel Belous Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h | 1 + drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h index 1093ea18823a..0592a0330cf0 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0_internal.h @@ -137,6 +137,7 @@ static struct aq_hw_caps_s hw_atl_a0_hw_caps_ = { .tx_rings = HW_ATL_A0_TX_RINGS, .rx_rings = HW_ATL_A0_RX_RINGS, .hw_features = NETIF_F_HW_CSUM | + NETIF_F_RXCSUM | NETIF_F_RXHASH | NETIF_F_SG | NETIF_F_TSO, diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h index 8bdee3ddd5a0..f3957e930340 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h @@ -188,6 +188,7 @@ static struct aq_hw_caps_s hw_atl_b0_hw_caps_ = { .tx_rings = HW_ATL_B0_TX_RINGS, .rx_rings = HW_ATL_B0_RX_RINGS, .hw_features = NETIF_F_HW_CSUM | + NETIF_F_RXCSUM | NETIF_F_RXHASH | NETIF_F_SG | NETIF_F_TSO | -- cgit v1.2.3 From 71fdb70eb48784c1f28cdf2e67c4c587dd7f2594 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 13 Mar 2017 13:46:21 +0100 Subject: sched/clock: Fix clear_sched_clock_stable() preempt wobbly Paul reported a problems with clear_sched_clock_stable(). Since we run all of __clear_sched_clock_stable() from workqueue context, there's a preempt problem. Solve it by only running the static_key_disable() from workqueue. Reported-by: Paul E. McKenney Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: fweisbec@gmail.com Link: http://lkml.kernel.org/r/20170313124621.GA3328@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/sched/clock.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index a08795e21628..fec0f58c8dee 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -141,7 +141,14 @@ static void __set_sched_clock_stable(void) tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE); } -static void __clear_sched_clock_stable(struct work_struct *work) +static void __sched_clock_work(struct work_struct *work) +{ + static_branch_disable(&__sched_clock_stable); +} + +static DECLARE_WORK(sched_clock_work, __sched_clock_work); + +static void __clear_sched_clock_stable(void) { struct sched_clock_data *scd = this_scd(); @@ -160,11 +167,11 @@ static void __clear_sched_clock_stable(struct work_struct *work) scd->tick_gtod, gtod_offset, scd->tick_raw, raw_offset); - static_branch_disable(&__sched_clock_stable); tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE); -} -static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable); + if (sched_clock_stable()) + schedule_work(&sched_clock_work); +} void clear_sched_clock_stable(void) { @@ -173,7 +180,7 @@ void clear_sched_clock_stable(void) smp_mb(); /* matches sched_clock_init_late() */ if (sched_clock_running == 2) - schedule_work(&sched_clock_work); + __clear_sched_clock_stable(); } void sched_clock_init_late(void) -- cgit v1.2.3 From 698eff6355f735d46d1b7113df8b422874cd7988 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 17 Mar 2017 12:48:18 +0100 Subject: sched/clock, x86/perf: Fix "perf test tsc" People reported that commit: 5680d8094ffa ("sched/clock: Provide better clock continuity") broke "perf test tsc". That commit added another offset to the reported clock value; so take that into account when computing the provided offset values. Reported-by: Adrian Hunter Reported-by: Arnaldo Carvalho de Melo Tested-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 5680d8094ffa ("sched/clock: Provide better clock continuity") Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 9 ++++++--- arch/x86/include/asm/timer.h | 2 ++ arch/x86/kernel/tsc.c | 4 ++-- include/linux/sched/clock.h | 13 +++++++------ kernel/sched/clock.c | 22 +++++++++++----------- 5 files changed, 28 insertions(+), 22 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 2aa1ad194db2..580b60f5ac83 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2256,6 +2256,7 @@ void arch_perf_update_userpage(struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now) { struct cyc2ns_data *data; + u64 offset; userpg->cap_user_time = 0; userpg->cap_user_time_zero = 0; @@ -2263,11 +2264,13 @@ void arch_perf_update_userpage(struct perf_event *event, !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED); userpg->pmc_width = x86_pmu.cntval_bits; - if (!sched_clock_stable()) + if (!using_native_sched_clock() || !sched_clock_stable()) return; data = cyc2ns_read_begin(); + offset = data->cyc2ns_offset + __sched_clock_offset; + /* * Internal timekeeping for enabled/running/stopped times * is always in the local_clock domain. @@ -2275,7 +2278,7 @@ void arch_perf_update_userpage(struct perf_event *event, userpg->cap_user_time = 1; userpg->time_mult = data->cyc2ns_mul; userpg->time_shift = data->cyc2ns_shift; - userpg->time_offset = data->cyc2ns_offset - now; + userpg->time_offset = offset - now; /* * cap_user_time_zero doesn't make sense when we're using a different @@ -2283,7 +2286,7 @@ void arch_perf_update_userpage(struct perf_event *event, */ if (!event->attr.use_clockid) { userpg->cap_user_time_zero = 1; - userpg->time_zero = data->cyc2ns_offset; + userpg->time_zero = offset; } cyc2ns_read_end(data); diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index a04eabd43d06..27e9f9d769b8 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -12,6 +12,8 @@ extern int recalibrate_cpu_khz(void); extern int no_timer_check; +extern bool using_native_sched_clock(void); + /* * We use the full linear equation: f(x) = a + b*x, in order to allow * a continuous function in the face of dynamic freq changes. diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index c73a7f9e881a..714dfba6a1e7 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -328,7 +328,7 @@ unsigned long long sched_clock(void) return paravirt_sched_clock(); } -static inline bool using_native_sched_clock(void) +bool using_native_sched_clock(void) { return pv_time_ops.sched_clock == native_sched_clock; } @@ -336,7 +336,7 @@ static inline bool using_native_sched_clock(void) unsigned long long sched_clock(void) __attribute__((alias("native_sched_clock"))); -static inline bool using_native_sched_clock(void) { return true; } +bool using_native_sched_clock(void) { return true; } #endif int check_tsc_unstable(void) diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h index 4a68c6791207..34fe92ce1ebd 100644 --- a/include/linux/sched/clock.h +++ b/include/linux/sched/clock.h @@ -54,15 +54,16 @@ static inline u64 local_clock(void) } #else extern void sched_clock_init_late(void); -/* - * Architectures can set this to 1 if they have specified - * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, - * but then during bootup it turns out that sched_clock() - * is reliable after all: - */ extern int sched_clock_stable(void); extern void clear_sched_clock_stable(void); +/* + * When sched_clock_stable(), __sched_clock_offset provides the offset + * between local_clock() and sched_clock(). + */ +extern u64 __sched_clock_offset; + + extern void sched_clock_tick(void); extern void sched_clock_idle_sleep_event(void); extern void sched_clock_idle_wakeup_event(u64 delta_ns); diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index fec0f58c8dee..24a3e01bf8cb 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -96,10 +96,10 @@ static DEFINE_STATIC_KEY_FALSE(__sched_clock_stable); static int __sched_clock_stable_early = 1; /* - * We want: ktime_get_ns() + gtod_offset == sched_clock() + raw_offset + * We want: ktime_get_ns() + __gtod_offset == sched_clock() + __sched_clock_offset */ -static __read_mostly u64 raw_offset; -static __read_mostly u64 gtod_offset; +__read_mostly u64 __sched_clock_offset; +static __read_mostly u64 __gtod_offset; struct sched_clock_data { u64 tick_raw; @@ -131,11 +131,11 @@ static void __set_sched_clock_stable(void) /* * Attempt to make the (initial) unstable->stable transition continuous. */ - raw_offset = (scd->tick_gtod + gtod_offset) - (scd->tick_raw); + __sched_clock_offset = (scd->tick_gtod + __gtod_offset) - (scd->tick_raw); printk(KERN_INFO "sched_clock: Marking stable (%lld, %lld)->(%lld, %lld)\n", - scd->tick_gtod, gtod_offset, - scd->tick_raw, raw_offset); + scd->tick_gtod, __gtod_offset, + scd->tick_raw, __sched_clock_offset); static_branch_enable(&__sched_clock_stable); tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE); @@ -161,11 +161,11 @@ static void __clear_sched_clock_stable(void) * * Still do what we can. */ - gtod_offset = (scd->tick_raw + raw_offset) - (scd->tick_gtod); + __gtod_offset = (scd->tick_raw + __sched_clock_offset) - (scd->tick_gtod); printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n", - scd->tick_gtod, gtod_offset, - scd->tick_raw, raw_offset); + scd->tick_gtod, __gtod_offset, + scd->tick_raw, __sched_clock_offset); tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE); @@ -238,7 +238,7 @@ again: * scd->tick_gtod + TICK_NSEC); */ - clock = scd->tick_gtod + gtod_offset + delta; + clock = scd->tick_gtod + __gtod_offset + delta; min_clock = wrap_max(scd->tick_gtod, old_clock); max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC); @@ -324,7 +324,7 @@ u64 sched_clock_cpu(int cpu) u64 clock; if (sched_clock_stable()) - return sched_clock() + raw_offset; + return sched_clock() + __sched_clock_offset; if (unlikely(!sched_clock_running)) return 0ull; -- cgit v1.2.3 From e2ebfb2142acefecc2496e71360f50d25726040b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 20 Mar 2017 19:50:29 +0200 Subject: mmc: sdhci: Do not disable interrupts while waiting for clock Disabling interrupts for even a millisecond can cause problems for some devices. That can happen when sdhci changes clock frequency because it waits for the clock to become stable under a spin lock. The spin lock is not necessary here. Anything that is racing with changes to the I/O state is already broken. The mmc core already provides synchronization via "claiming" the host. Although the spin lock probably should be removed from the code paths that lead to this point, such a patch would touch too much code to be suitable for stable trees. Consequently, for this patch, just drop the spin lock while waiting. Signed-off-by: Adrian Hunter Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson Tested-by: Ludovic Desroches --- drivers/mmc/host/sdhci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 6fdd7a70f229..9c1a099afbbe 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1362,7 +1362,9 @@ void sdhci_enable_clk(struct sdhci_host *host, u16 clk) return; } timeout--; - mdelay(1); + spin_unlock_irq(&host->lock); + usleep_range(900, 1100); + spin_lock_irq(&host->lock); } clk |= SDHCI_CLOCK_CARD_EN; -- cgit v1.2.3 From 027fb89e61054b4aedd962adb3e2003dec78a716 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 20 Mar 2017 19:50:30 +0200 Subject: mmc: sdhci-pci: Do not disable interrupts in sdhci_intel_set_power Disabling interrupts for even a millisecond can cause problems for some devices. That can happen when Intel host controllers wait for the present state to propagate. The spin lock is not necessary here. Anything that is racing with changes to the I/O state is already broken. The mmc core already provides synchronization via "claiming" the host. Although the spin lock probably should be removed from the code paths that lead to this point, such a patch would touch too much code to be suitable for stable trees. Consequently, for this patch, just drop the spin lock while waiting. Signed-off-by: Adrian Hunter Cc: stable@vger.kernel.org # v4.9+ Signed-off-by: Ulf Hansson Tested-by: Ludovic Desroches --- drivers/mmc/host/sdhci-pci-core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 982b3e349426..86560d590786 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -451,6 +451,8 @@ static void sdhci_intel_set_power(struct sdhci_host *host, unsigned char mode, if (mode == MMC_POWER_OFF) return; + spin_unlock_irq(&host->lock); + /* * Bus power might not enable after D3 -> D0 transition due to the * present state not yet having propagated. Retry for up to 2ms. @@ -463,6 +465,8 @@ static void sdhci_intel_set_power(struct sdhci_host *host, unsigned char mode, reg |= SDHCI_POWER_ON; sdhci_writeb(host, reg, SDHCI_POWER_CONTROL); } + + spin_lock_irq(&host->lock); } static const struct sdhci_ops sdhci_intel_byt_ops = { -- cgit v1.2.3 From 3f307834e695f59dac4337a40316bdecfb9d0508 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Thu, 23 Mar 2017 10:00:25 +0800 Subject: ALSA: hda - Adding a group of pin definition to fix headset problem A new Dell laptop needs to apply ALC269_FIXUP_DELL1_MIC_NO_PRESENCE to fix the headset problem, and the pin definiton of this machine is not in the pin quirk table yet, now adding it to the table. Signed-off-by: Hui Wang Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8d6b3703d0a2..7f989898cbd9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6102,6 +6102,8 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { ALC295_STANDARD_PINS, {0x17, 0x21014040}, {0x18, 0x21a19050}), + SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, + ALC295_STANDARD_PINS), SND_HDA_PIN_QUIRK(0x10ec0298, 0x1028, "Dell", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE, ALC298_STANDARD_PINS, {0x17, 0x90170110}), -- cgit v1.2.3 From 6e790555b6cae4da9c578376ff500861337fcfc1 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Thu, 2 Mar 2017 17:46:36 +0000 Subject: pinctrl: ti: The IODelay driver is a DRA7xxx feature so depend on that SoC As the IODelay driver is a hardware feature of the DRA7xxx SoC depend on that SoC and compile test. Signed-off-by: Peter Robinson Acked-by: Tony Lindgren Signed-off-by: Linus Walleij --- drivers/pinctrl/ti/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/ti/Kconfig b/drivers/pinctrl/ti/Kconfig index 815a88673d38..542077069391 100644 --- a/drivers/pinctrl/ti/Kconfig +++ b/drivers/pinctrl/ti/Kconfig @@ -1,6 +1,6 @@ config PINCTRL_TI_IODELAY tristate "TI IODelay Module pinconf driver" - depends on OF + depends on OF && (SOC_DRA7XX || COMPILE_TEST) select GENERIC_PINCTRL_GROUPS select GENERIC_PINMUX_FUNCTIONS select GENERIC_PINCONF -- cgit v1.2.3 From 08a7f260c88f8d52be52ff2113119b3464b69874 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 7 Mar 2017 14:31:00 +0100 Subject: pinctrl: meson-gxbb: Fix typo in i2c ao groups Signed-off-by: Neil Armstrong Signed-off-by: Linus Walleij --- drivers/pinctrl/meson/pinctrl-meson-gxbb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson-gxbb.c b/drivers/pinctrl/meson/pinctrl-meson-gxbb.c index 7671424d46cb..31a3a98d067c 100644 --- a/drivers/pinctrl/meson/pinctrl-meson-gxbb.c +++ b/drivers/pinctrl/meson/pinctrl-meson-gxbb.c @@ -667,11 +667,11 @@ static const char * const uart_ao_b_groups[] = { }; static const char * const i2c_ao_groups[] = { - "i2c_sdk_ao", "i2c_sda_ao", + "i2c_sck_ao", "i2c_sda_ao", }; static const char * const i2c_slave_ao_groups[] = { - "i2c_slave_sdk_ao", "i2c_slave_sda_ao", + "i2c_slave_sck_ao", "i2c_slave_sda_ao", }; static const char * const remote_input_ao_groups[] = { -- cgit v1.2.3 From 59f34e80c4910bc69ca643a2bb93fcce51366d83 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Thu, 23 Feb 2017 11:12:40 +0100 Subject: pinctrl: samsung: Fix memory mapping code Some pinctrls share memory regions, and devm_ioremap_resource does not allow to share resources, in opposition to devm_ioremap. This patch restores back usage of devm_ioremap function, but with proper error handling and logging. Fixes: baafaca ("pinctrl: samsung: Fix return value check in samsung_pinctrl_get_soc_data()") Signed-off-by: Andrzej Hajda Tested-by: Marek Szyprowski Reviewed-by: Krzysztof Kozlowski Signed-off-by: Linus Walleij --- drivers/pinctrl/samsung/pinctrl-samsung.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.c b/drivers/pinctrl/samsung/pinctrl-samsung.c index f9ddba7decc1..d7aa22cff480 100644 --- a/drivers/pinctrl/samsung/pinctrl-samsung.c +++ b/drivers/pinctrl/samsung/pinctrl-samsung.c @@ -988,9 +988,16 @@ samsung_pinctrl_get_soc_data(struct samsung_pinctrl_drv_data *d, for (i = 0; i < ctrl->nr_ext_resources + 1; i++) { res = platform_get_resource(pdev, IORESOURCE_MEM, i); - virt_base[i] = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(virt_base[i])) - return ERR_CAST(virt_base[i]); + if (!res) { + dev_err(&pdev->dev, "failed to get mem%d resource\n", i); + return ERR_PTR(-EINVAL); + } + virt_base[i] = devm_ioremap(&pdev->dev, res->start, + resource_size(res)); + if (!virt_base[i]) { + dev_err(&pdev->dev, "failed to ioremap %pR\n", res); + return ERR_PTR(-EIO); + } } bank = d->pin_banks; -- cgit v1.2.3 From a6566710adaa4a7dd5e0d99820ff9c9c30ee5951 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 14 Mar 2017 08:23:26 -0700 Subject: pinctrl: qcom: Don't clear status bit on irq_unmask Clearing the status bit on irq_unmask will discard any pending interrupt that did arrive after the irq_ack, i.e. while the IRQ handler function was executing. Fixes: f365be092572 ("pinctrl: Add Qualcomm TLMM driver") Cc: stable@vger.kernel.org Cc: Stephen Boyd Reported-by: Timur Tabi Signed-off-by: Bjorn Andersson Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-msm.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index c978be5eb9eb..273badd92561 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -609,10 +609,6 @@ static void msm_gpio_irq_unmask(struct irq_data *d) raw_spin_lock_irqsave(&pctrl->lock, flags); - val = readl(pctrl->regs + g->intr_status_reg); - val &= ~BIT(g->intr_status_bit); - writel(val, pctrl->regs + g->intr_status_reg); - val = readl(pctrl->regs + g->intr_cfg_reg); val |= BIT(g->intr_enable_bit); writel(val, pctrl->regs + g->intr_cfg_reg); -- cgit v1.2.3 From e855fa9a65c40788b5069abb0d094537daa22e05 Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Thu, 16 Mar 2017 18:26:02 +0100 Subject: pinctrl: st: add irq_request/release_resources callbacks When using GPIO as IRQ source, the GPIO must be configured in INPUT. Callbacks dedicated for this was missing in pinctrl-st driver. This fix the following kernel error when trying to lock a gpio as IRQ: [ 7.521095] gpio gpiochip7: (PIO11): gpiochip_lock_as_irq: tried to flag a GPIO set as output for IRQ [ 7.526018] gpio gpiochip7: (PIO11): unable to lock HW IRQ 6 for IRQ [ 7.529405] genirq: Failed to request resources for 0-0053 (irq 81) on irqchip GPIO Signed-off-by: Patrice Chotard Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-st.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c index 676efcc032d2..3ae8066bc127 100644 --- a/drivers/pinctrl/pinctrl-st.c +++ b/drivers/pinctrl/pinctrl-st.c @@ -1285,6 +1285,22 @@ static void st_gpio_irq_unmask(struct irq_data *d) writel(BIT(d->hwirq), bank->base + REG_PIO_SET_PMASK); } +static int st_gpio_irq_request_resources(struct irq_data *d) +{ + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + + st_gpio_direction_input(gc, d->hwirq); + + return gpiochip_lock_as_irq(gc, d->hwirq); +} + +static void st_gpio_irq_release_resources(struct irq_data *d) +{ + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + + gpiochip_unlock_as_irq(gc, d->hwirq); +} + static int st_gpio_irq_set_type(struct irq_data *d, unsigned type) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); @@ -1438,12 +1454,14 @@ static struct gpio_chip st_gpio_template = { }; static struct irq_chip st_gpio_irqchip = { - .name = "GPIO", - .irq_disable = st_gpio_irq_mask, - .irq_mask = st_gpio_irq_mask, - .irq_unmask = st_gpio_irq_unmask, - .irq_set_type = st_gpio_irq_set_type, - .flags = IRQCHIP_SKIP_SET_WAKE, + .name = "GPIO", + .irq_request_resources = st_gpio_irq_request_resources, + .irq_release_resources = st_gpio_irq_release_resources, + .irq_disable = st_gpio_irq_mask, + .irq_mask = st_gpio_irq_mask, + .irq_unmask = st_gpio_irq_unmask, + .irq_set_type = st_gpio_irq_set_type, + .flags = IRQCHIP_SKIP_SET_WAKE, }; static int st_gpiolib_register_bank(struct st_pinctrl *info, -- cgit v1.2.3 From d7402de48efae57bbb0072e53d3800c30de57ea5 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Thu, 16 Mar 2017 21:36:07 +0100 Subject: pinctrl: qcom: ipq4019: add missing pingroups for pins > 70 This patch adds the missing PINGROUP for GPIO70-99. This fixes a crash that happens in pinctrl-msm, if any of the GPIO70-99 are accessed. Fixes: 5303f7827fcd41d ("pinctrl: qcom: ipq4019: set ngpios to correct value") Signed-off-by: Christian Lamparter Acked-by: Bjorn Andersson Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-ipq4019.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/pinctrl/qcom/pinctrl-ipq4019.c b/drivers/pinctrl/qcom/pinctrl-ipq4019.c index b68ae424cee2..743d1f458205 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq4019.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq4019.c @@ -405,6 +405,36 @@ static const struct msm_pingroup ipq4019_groups[] = { PINGROUP(67, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), PINGROUP(68, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), PINGROUP(69, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(70, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(71, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(72, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(73, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(74, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(75, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(76, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(77, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(78, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(79, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(80, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(81, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(82, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(83, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(84, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(85, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(86, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(87, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(88, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(89, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(90, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(91, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(92, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(93, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(94, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(95, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(96, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(97, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(98, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), + PINGROUP(99, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), }; static const struct msm_pinctrl_soc_data ipq4019_pinctrl = { -- cgit v1.2.3 From 633ee407b9d15a75ac9740ba9d3338815e1fcb95 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 21 Mar 2017 13:44:28 +0100 Subject: libceph: force GFP_NOIO for socket allocations sock_alloc_inode() allocates socket+inode and socket_wq with GFP_KERNEL, which is not allowed on the writeback path: Workqueue: ceph-msgr con_work [libceph] ffff8810871cb018 0000000000000046 0000000000000000 ffff881085d40000 0000000000012b00 ffff881025cad428 ffff8810871cbfd8 0000000000012b00 ffff880102fc1000 ffff881085d40000 ffff8810871cb038 ffff8810871cb148 Call Trace: [] schedule+0x29/0x70 [] schedule_timeout+0x1bd/0x200 [] ? ttwu_do_wakeup+0x2c/0x120 [] ? ttwu_do_activate.constprop.135+0x66/0x70 [] wait_for_completion+0xbf/0x180 [] ? try_to_wake_up+0x390/0x390 [] flush_work+0x165/0x250 [] ? worker_detach_from_pool+0xd0/0xd0 [] xlog_cil_force_lsn+0x81/0x200 [xfs] [] ? __slab_free+0xee/0x234 [] _xfs_log_force_lsn+0x4d/0x2c0 [xfs] [] ? lookup_page_cgroup_used+0xe/0x30 [] ? xfs_reclaim_inode+0xa3/0x330 [xfs] [] xfs_log_force_lsn+0x3f/0xf0 [xfs] [] ? xfs_reclaim_inode+0xa3/0x330 [xfs] [] xfs_iunpin_wait+0xc6/0x1a0 [xfs] [] ? wake_atomic_t_function+0x40/0x40 [] xfs_reclaim_inode+0xa3/0x330 [xfs] [] xfs_reclaim_inodes_ag+0x257/0x3d0 [xfs] [] xfs_reclaim_inodes_nr+0x33/0x40 [xfs] [] xfs_fs_free_cached_objects+0x15/0x20 [xfs] [] super_cache_scan+0x178/0x180 [] shrink_slab_node+0x14e/0x340 [] ? mem_cgroup_iter+0x16b/0x450 [] shrink_slab+0x100/0x140 [] do_try_to_free_pages+0x335/0x490 [] try_to_free_pages+0xb9/0x1f0 [] ? __alloc_pages_direct_compact+0x69/0x1be [] __alloc_pages_nodemask+0x69a/0xb40 [] alloc_pages_current+0x9e/0x110 [] new_slab+0x2c5/0x390 [] __slab_alloc+0x33b/0x459 [] ? sock_alloc_inode+0x2d/0xd0 [] ? inet_sendmsg+0x71/0xc0 [] ? sock_alloc_inode+0x2d/0xd0 [] kmem_cache_alloc+0x1a2/0x1b0 [] sock_alloc_inode+0x2d/0xd0 [] alloc_inode+0x26/0xa0 [] new_inode_pseudo+0x1a/0x70 [] sock_alloc+0x1e/0x80 [] __sock_create+0x95/0x220 [] sock_create_kern+0x24/0x30 [] con_work+0xef9/0x2050 [libceph] [] ? rbd_img_request_submit+0x4c/0x60 [rbd] [] process_one_work+0x159/0x4f0 [] worker_thread+0x11b/0x530 [] ? create_worker+0x1d0/0x1d0 [] kthread+0xc9/0xe0 [] ? flush_kthread_worker+0x90/0x90 [] ret_from_fork+0x58/0x90 [] ? flush_kthread_worker+0x90/0x90 Use memalloc_noio_{save,restore}() to temporarily force GFP_NOIO here. Cc: stable@vger.kernel.org # 3.10+, needs backporting Link: http://tracker.ceph.com/issues/19309 Reported-by: Sergey Jerusalimov Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton --- net/ceph/messenger.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 38dcf1eb427d..f76bb3332613 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -469,11 +470,16 @@ static int ceph_tcp_connect(struct ceph_connection *con) { struct sockaddr_storage *paddr = &con->peer_addr.in_addr; struct socket *sock; + unsigned int noio_flag; int ret; BUG_ON(con->sock); + + /* sock_create_kern() allocates with GFP_KERNEL */ + noio_flag = memalloc_noio_save(); ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family, SOCK_STREAM, IPPROTO_TCP, &sock); + memalloc_noio_restore(noio_flag); if (ret) return ret; sock->sk->sk_allocation = GFP_NOFS; -- cgit v1.2.3 From 1c5bf78114d38c2761721108a2d925bccb496027 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 18 Mar 2017 18:25:05 +0100 Subject: EDAC: Select DEBUG_FS The debugfs.c functionality relies on DEBUG_FS so select it. Signed-off-by: Borislav Petkov --- drivers/edac/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index be3eac6ad54d..4773f2867234 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -43,6 +43,7 @@ config EDAC_LEGACY_SYSFS config EDAC_DEBUG bool "Debugging" + select DEBUG_FS help This turns on debugging information for the entire EDAC subsystem. You do so by inserting edac_module with "edac_debug_level=x." Valid -- cgit v1.2.3 From cd1be315ac9766fe83e4fb788d5d9fe032a9b877 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 18 Mar 2017 18:26:34 +0100 Subject: EDAC, pnd2_edac: Fix !EDAC_DEBUG build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide debugfs function stubs when EDAC_DEBUG is not enabled so that we don't fail the build: drivers/edac/pnd2_edac.c: In function ‘pnd2_init’: drivers/edac/pnd2_edac.c:1521:2: error: implicit declaration of function ‘setup_pnd2_debug’ [-Werror=implicit-function-declaration] setup_pnd2_debug(); ^ drivers/edac/pnd2_edac.c: In function ‘pnd2_exit’: drivers/edac/pnd2_edac.c:1529:2: error: implicit declaration of function ‘teardown_pnd2_debug’ [-Werror=implicit-function-declaration] teardown_pnd2_debug(); ^ Signed-off-by: Borislav Petkov --- drivers/edac/pnd2_edac.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c index 14d39f05226e..ec2e349d728d 100644 --- a/drivers/edac/pnd2_edac.c +++ b/drivers/edac/pnd2_edac.c @@ -1435,7 +1435,11 @@ static void teardown_pnd2_debug(void) { debugfs_remove_recursive(pnd2_test); } -#endif +#else +static void setup_pnd2_debug(void) {} +static void teardown_pnd2_debug(void) {} +#endif /* CONFIG_EDAC_DEBUG */ + static int pnd2_probe(void) { -- cgit v1.2.3 From 1c2593cc8fd5960f8861de1be67135851f884836 Mon Sep 17 00:00:00 2001 From: Ankur Arora Date: Tue, 21 Mar 2017 15:43:37 -0700 Subject: xen/acpi: Replace hard coded "ACPI0007" Replace hard coded "ACPI0007" with ACPI_PROCESSOR_DEVICE_HID Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Ankur Arora Signed-off-by: Boris Ostrovsky --- drivers/xen/xen-acpi-processor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c index 4ce10bcca18b..fac0d7b0edf7 100644 --- a/drivers/xen/xen-acpi-processor.c +++ b/drivers/xen/xen-acpi-processor.c @@ -408,7 +408,7 @@ static int check_acpi_ids(struct acpi_processor *pr_backup) acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, read_acpi_id, NULL, NULL, NULL); - acpi_get_devices("ACPI0007", read_acpi_id, NULL, NULL); + acpi_get_devices(ACPI_PROCESSOR_DEVICE_HID, read_acpi_id, NULL, NULL); upload: if (!bitmap_equal(acpi_id_present, acpi_ids_done, nr_acpi_bits)) { -- cgit v1.2.3 From 12ffed96d4369f086261ba2ee734fa8c932d7f55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Thu, 23 Mar 2017 17:53:26 +0900 Subject: drm/fb-helper: Allow var->x/yres(_virtual) < fb->width/height again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise this can also prevent modesets e.g. for switching VTs, when multiple monitors with different native resolutions are connected. The depths must match though, so keep the != test for that. Also update the DRM_DEBUG output to be slightly more accurate, this doesn't only affect requests from userspace. Bugzilla: https://bugs.freedesktop.org/99841 Fixes: 865afb11949e ("drm/fb-helper: reject any changes to the fbdev") Signed-off-by: Michel Dänzer Reviewed-by: Daniel Stone Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170323085326.20185-1-michel@daenzer.net --- drivers/gpu/drm/drm_fb_helper.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index f6d4d9700734..324a688b3f30 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1260,9 +1260,9 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var, * to KMS, hence fail if different settings are requested. */ if (var->bits_per_pixel != fb->format->cpp[0] * 8 || - var->xres != fb->width || var->yres != fb->height || - var->xres_virtual != fb->width || var->yres_virtual != fb->height) { - DRM_DEBUG("fb userspace requested width/height/bpp different than current fb " + var->xres > fb->width || var->yres > fb->height || + var->xres_virtual > fb->width || var->yres_virtual > fb->height) { + DRM_DEBUG("fb requested width/height/bpp can't fit in current fb " "request %dx%d-%d (virtual %dx%d) > %dx%d-%d\n", var->xres, var->yres, var->bits_per_pixel, var->xres_virtual, var->yres_virtual, -- cgit v1.2.3 From eb94588dabec82e012281608949a860f64752914 Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Mon, 20 Mar 2017 16:42:48 +0100 Subject: scsi: hpsa: fix volume offline state In a previous patch a hpsa_scsi_dev_t.volume_offline update line has been removed, so let us put it back.. Fixes: 85b29008d8 (hpsa: update check for logical volume status) Signed-off-by: Tomas Henzl Acked-by: Don Brace Signed-off-by: Martin K. Petersen --- drivers/scsi/hpsa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 0d0be7754a65..9d659aaace15 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -3885,6 +3885,7 @@ static int hpsa_update_device_info(struct ctlr_info *h, if (h->fw_support & MISC_FW_RAID_OFFLOAD_BASIC) hpsa_get_ioaccel_status(h, scsi3addr, this_device); volume_offline = hpsa_volume_offline(h, scsi3addr); + this_device->volume_offline = volume_offline; if (volume_offline == HPSA_LV_FAILED) { rc = HPSA_LV_FAILED; dev_err(&h->pdev->dev, -- cgit v1.2.3 From a71e3cdcfce4880a4578915e110e3eaed1659765 Mon Sep 17 00:00:00 2001 From: Dick Kennedy Date: Thu, 23 Mar 2017 08:47:18 -0400 Subject: scsi: lpfc: Fix PT2PT PRLI reject lpfc cannot establish connection with targets that send PRLI in P2P configurations. If lpfc rejects a PRLI that is sent from a target the target will not resend and will reject the PRLI send from the initiator. [mkp: applied by hand] Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_els.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index d9c61d030034..a5ca37e45fb6 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -7968,7 +7968,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, did, vport->port_state, ndlp->nlp_flag); phba->fc_stat.elsRcvPRLI++; - if (vport->port_state < LPFC_DISC_AUTH) { + if ((vport->port_state < LPFC_DISC_AUTH) && + (vport->fc_flag & FC_FABRIC)) { rjt_err = LSRJT_UNABLE_TPC; rjt_exp = LSEXP_NOTHING_MORE; break; -- cgit v1.2.3 From 223b78ea212852726dfa86ef267a9c8a323d5cc2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 23 Mar 2017 15:53:45 +0100 Subject: scsi: lpfc: fix building without debugfs support On a randconfig build without CONFIG_SCSI_LPFC_DEBUG_FS, I ran into multiple compile failures: drivers/scsi/lpfc/lpfc_debugfs.h: In function 'lpfc_debug_dump_wq': drivers/scsi/lpfc/lpfc_debugfs.h:405:15: error: 'DUMP_FCP' undeclared (first use in this function); did you mean 'DUMP_VAR'? drivers/scsi/lpfc/lpfc_debugfs.h:405:15: note: each undeclared identifier is reported only once for each function it appears in drivers/scsi/lpfc/lpfc_debugfs.h:408:22: error: 'DUMP_NVME' undeclared (first use in this function); did you mean 'DUMP_NONE'? drivers/scsi/lpfc/lpfc_nvmet.c: In function 'lpfc_nvmet_xmt_ls_rsp_cmp': drivers/scsi/lpfc/lpfc_nvmet.c:109:2: error: implicit declaration of function 'lpfc_nvmeio_data'; did you mean 'lpfc_mem_free'? [-Werror=implicit-function-declaration] drivers/scsi/lpfc/lpfc_nvmet.c: In function 'lpfc_nvmet_xmt_fcp_op': drivers/scsi/lpfc/lpfc_nvmet.c:523:10: error: unused variable 'id' [-Werror=unused-variable] They are all trivial to fix, so I'm doing it in a combined patch here. Fixes: 1d9d5a9879ad ("scsi: lpfc: refactor debugfs queue dump routines") Fixes: bd2cdd5e400f ("scsi: lpfc: NVME Initiator: Add debugfs support") Fixes: 2b65e18202fd ("scsi: lpfc: NVME Target: Add debugfs support") Signed-off-by: Arnd Bergmann Acked-by: Dick Kennedy Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_debugfs.h | 22 ++++++++++++++-------- drivers/scsi/lpfc/lpfc_nvmet.c | 4 ++-- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_debugfs.h b/drivers/scsi/lpfc/lpfc_debugfs.h index c05f56c3023f..7b7d314af0e0 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.h +++ b/drivers/scsi/lpfc/lpfc_debugfs.h @@ -44,14 +44,6 @@ /* hbqinfo output buffer size */ #define LPFC_HBQINFO_SIZE 8192 -enum { - DUMP_FCP, - DUMP_NVME, - DUMP_MBX, - DUMP_ELS, - DUMP_NVMELS, -}; - /* nvmestat output buffer size */ #define LPFC_NVMESTAT_SIZE 8192 #define LPFC_NVMEKTIME_SIZE 8192 @@ -283,8 +275,22 @@ struct lpfc_idiag { struct lpfc_idiag_offset offset; void *ptr_private; }; + +#else + +#define lpfc_nvmeio_data(phba, fmt, arg...) \ + no_printk(fmt, ##arg) + #endif +enum { + DUMP_FCP, + DUMP_NVME, + DUMP_MBX, + DUMP_ELS, + DUMP_NVMELS, +}; + /* Mask for discovery_trace */ #define LPFC_DISC_TRC_ELS_CMD 0x1 /* Trace ELS commands */ #define LPFC_DISC_TRC_ELS_RSP 0x2 /* Trace ELS response */ diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 7ca868f394da..acba1b67e505 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -520,7 +520,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, struct lpfc_hba *phba = ctxp->phba; struct lpfc_iocbq *nvmewqeq; unsigned long iflags; - int rc, id; + int rc; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS if (phba->ktime_on) { @@ -530,7 +530,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, ctxp->ts_nvme_data = ktime_get_ns(); } if (phba->cpucheck_on & LPFC_CHECK_NVMET_IO) { - id = smp_processor_id(); + int id = smp_processor_id(); ctxp->cpu = id; if (id < LPFC_CHECK_CPU_CNT) phba->cpucheck_xmt_io[id]++; -- cgit v1.2.3 From 1914f0cd203c941bba72f9452c8290324f1ef3dc Mon Sep 17 00:00:00 2001 From: Ankur Arora Date: Tue, 21 Mar 2017 15:43:38 -0700 Subject: xen/acpi: upload PM state from init-domain to Xen This was broken in commit cd979883b9ed ("xen/acpi-processor: fix enabling interrupts on syscore_resume"). do_suspend (from xen/manage.c) and thus xen_resume_notifier never get called on the initial-domain at resume (it is if running as guest.) The rationale for the breaking change was that upload_pm_data() potentially does blocking work in syscore_resume(). This patch addresses the original issue by scheduling upload_pm_data() to execute in workqueue context. Cc: Stanislaw Gruszka Cc: stable@vger.kernel.org Based-on-patch-by: Konrad Wilk Reviewed-by: Konrad Rzeszutek Wilk Reviewed-by: Stanislaw Gruszka Signed-off-by: Ankur Arora Signed-off-by: Boris Ostrovsky --- drivers/xen/xen-acpi-processor.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c index fac0d7b0edf7..23e391d3ec01 100644 --- a/drivers/xen/xen-acpi-processor.c +++ b/drivers/xen/xen-acpi-processor.c @@ -27,10 +27,10 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -466,15 +466,33 @@ static int xen_upload_processor_pm_data(void) return rc; } -static int xen_acpi_processor_resume(struct notifier_block *nb, - unsigned long action, void *data) +static void xen_acpi_processor_resume_worker(struct work_struct *dummy) { + int rc; + bitmap_zero(acpi_ids_done, nr_acpi_bits); - return xen_upload_processor_pm_data(); + + rc = xen_upload_processor_pm_data(); + if (rc != 0) + pr_info("ACPI data upload failed, error = %d\n", rc); +} + +static void xen_acpi_processor_resume(void) +{ + static DECLARE_WORK(wq, xen_acpi_processor_resume_worker); + + /* + * xen_upload_processor_pm_data() calls non-atomic code. + * However, the context for xen_acpi_processor_resume is syscore + * with only the boot CPU online and in an atomic context. + * + * So defer the upload for some point safer. + */ + schedule_work(&wq); } -struct notifier_block xen_acpi_processor_resume_nb = { - .notifier_call = xen_acpi_processor_resume, +static struct syscore_ops xap_syscore_ops = { + .resume = xen_acpi_processor_resume, }; static int __init xen_acpi_processor_init(void) @@ -527,7 +545,7 @@ static int __init xen_acpi_processor_init(void) if (rc) goto err_unregister; - xen_resume_notifier_register(&xen_acpi_processor_resume_nb); + register_syscore_ops(&xap_syscore_ops); return 0; err_unregister: @@ -544,7 +562,7 @@ static void __exit xen_acpi_processor_exit(void) { int i; - xen_resume_notifier_unregister(&xen_acpi_processor_resume_nb); + unregister_syscore_ops(&xap_syscore_ops); kfree(acpi_ids_done); kfree(acpi_id_present); kfree(acpi_id_cst_present); -- cgit v1.2.3 From 950712eb8ef03e4a62ac5b3067efde7ec2f8a91e Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 15 Mar 2017 16:01:18 +0800 Subject: KVM: x86: check existance before destroy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mostly used for split irqchip mode. In that case, these two things are not inited at all, so no need to release. Signed-off-by: Peter Xu Signed-off-by: Radim Krčmář --- arch/x86/kvm/i8259.c | 3 +++ arch/x86/kvm/ioapic.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 73ea24d4f119..047b17a26269 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -657,6 +657,9 @@ void kvm_pic_destroy(struct kvm *kvm) { struct kvm_pic *vpic = kvm->arch.vpic; + if (!vpic) + return; + kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master); kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave); kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr); diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 6e219e5c07d2..289270a6aecb 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -635,6 +635,9 @@ void kvm_ioapic_destroy(struct kvm *kvm) { struct kvm_ioapic *ioapic = kvm->arch.vioapic; + if (!ioapic) + return; + cancel_delayed_work_sync(&ioapic->eoi_inject); kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); kvm->arch.vioapic = NULL; -- cgit v1.2.3 From c761159cf81b8641290f7559a8d8e30f6ab92669 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 15 Mar 2017 16:01:19 +0800 Subject: KVM: x86: use pic/ioapic destructor when destroy vm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have specific destructors for pic/ioapic, we'd better use them when destroying the VM as well. Signed-off-by: Peter Xu Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1faf620a6fdc..d30ff491ecc7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8153,8 +8153,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) if (kvm_x86_ops->vm_destroy) kvm_x86_ops->vm_destroy(kvm); kvm_iommu_unmap_guest(kvm); - kfree(kvm->arch.vpic); - kfree(kvm->arch.vioapic); + kvm_pic_destroy(kvm); + kvm_ioapic_destroy(kvm); kvm_free_vcpus(kvm); kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); kvm_mmu_uninit_vm(kvm); -- cgit v1.2.3 From fb6c8198431311027c3434d4e94ab8bc040f7aea Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Thu, 16 Mar 2017 13:53:59 -0700 Subject: kvm: vmx: Flush TLB when the APIC-access address changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Quoting from the Intel SDM, volume 3, section 28.3.3.4: Guidelines for Use of the INVEPT Instruction: If EPT was in use on a logical processor at one time with EPTP X, it is recommended that software use the INVEPT instruction with the "single-context" INVEPT type and with EPTP X in the INVEPT descriptor before a VM entry on the same logical processor that enables EPT with EPTP X and either (a) the "virtualize APIC accesses" VM-execution control was changed from 0 to 1; or (b) the value of the APIC-access address was changed. In the nested case, the burden falls on L1, unless L0 enables EPT in vmcs02 when L1 doesn't enable EPT in vmcs12. Signed-off-by: Jim Mattson Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c66436530a93..e2f608283a5a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4024,6 +4024,12 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu) __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid); } +static void vmx_flush_tlb_ept_only(struct kvm_vcpu *vcpu) +{ + if (enable_ept) + vmx_flush_tlb(vcpu); +} + static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) { ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; @@ -8548,6 +8554,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) } else { sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + vmx_flush_tlb_ept_only(vcpu); } vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); @@ -8573,8 +8580,10 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) */ if (!is_guest_mode(vcpu) || !nested_cpu_has2(get_vmcs12(&vmx->vcpu), - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { vmcs_write64(APIC_ACCESS_ADDR, hpa); + vmx_flush_tlb_ept_only(vcpu); + } } static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) @@ -10256,6 +10265,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if (nested_cpu_has_ept(vmcs12)) { kvm_mmu_unload(vcpu); nested_ept_init_mmu_context(vcpu); + } else if (nested_cpu_has2(vmcs12, + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { + vmx_flush_tlb_ept_only(vcpu); } /* @@ -11055,6 +11067,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vmx->nested.change_vmcs01_virtual_x2apic_mode = false; vmx_set_virtual_x2apic_mode(vcpu, vcpu->arch.apic_base & X2APIC_ENABLE); + } else if (!nested_cpu_has_ept(vmcs12) && + nested_cpu_has2(vmcs12, + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { + vmx_flush_tlb_ept_only(vcpu); } /* This is needed for same reason as it was needed in prepare_vmcs02 */ -- cgit v1.2.3 From 24dccf83a121b8a4ad5c2ad383a8184ef6c266ee Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 20 Mar 2017 21:18:55 -0700 Subject: KVM: x86: correct async page present tracepoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After async pf setup successfully, there is a broadcast wakeup w/ special token 0xffffffff which tells vCPU that it should wake up all processes waiting for APFs though there is no real process waiting at the moment. The async page present tracepoint print prematurely and fails to catch the special token setup. This patch fixes it by moving the async page present tracepoint after the special token setup. Before patch: qemu-system-x86-8499 [006] ...1 5973.473292: kvm_async_pf_ready: token 0x0 gva 0x0 After patch: qemu-system-x86-8499 [006] ...1 5973.473292: kvm_async_pf_ready: token 0xffffffff gva 0x0 Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d30ff491ecc7..64697fe475c3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8566,11 +8566,11 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, { struct x86_exception fault; - trace_kvm_async_pf_ready(work->arch.token, work->gva); if (work->wakeup_all) work->arch.token = ~0; /* broadcast wakeup */ else kvm_del_async_pf_gfn(vcpu, work->arch.gfn); + trace_kvm_async_pf_ready(work->arch.token, work->gva); if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) && !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) { -- cgit v1.2.3 From 63cb6d5f004ca44f9b8e562b6dd191f717a4960e Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 20 Mar 2017 21:18:53 -0700 Subject: KVM: nVMX: Fix nested VPID vmx exec control MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This can be reproduced by running kvm-unit-tests/vmx.flat on L0 w/ vpid disabled. Test suite: VPID Unhandled exception 6 #UD at ip 00000000004051a6 error_code=0000 rflags=00010047 cs=00000008 rax=0000000000000000 rcx=0000000000000001 rdx=0000000000000047 rbx=0000000000402f79 rbp=0000000000456240 rsi=0000000000000001 rdi=0000000000000000 r8=000000000000000a r9=00000000000003f8 r10=0000000080010011 r11=0000000000000000 r12=0000000000000003 r13=0000000000000708 r14=0000000000000000 r15=0000000000000000 cr0=0000000080010031 cr2=0000000000000000 cr3=0000000007fff000 cr4=0000000000002020 cr8=0000000000000000 STACK: @4051a6 40523e 400f7f 402059 40028f We should hide and forbid VPID in L1 if it is disabled on L0. However, nested VPID enable bit is set unconditionally during setup nested vmx exec controls though VPID is not exposed through nested VMX capablity. This patch fixes it by don't set nested VPID enable bit if it is disabled on L0. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: stable@vger.kernel.org Fixes: 5c614b3583e (KVM: nVMX: nested VPID emulation) Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e2f608283a5a..5a82766e4b07 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2753,7 +2753,6 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_DESC | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | - SECONDARY_EXEC_ENABLE_VPID | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_WBINVD_EXITING | @@ -2781,10 +2780,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) * though it is treated as global context. The alternative is * not failing the single-context invvpid, and it is worse. */ - if (enable_vpid) + if (enable_vpid) { + vmx->nested.nested_vmx_secondary_ctls_high |= + SECONDARY_EXEC_ENABLE_VPID; vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT | VMX_VPID_EXTENT_SUPPORTED_MASK; - else + } else vmx->nested.nested_vmx_vpid_caps = 0; if (enable_unrestricted_guest) -- cgit v1.2.3 From 08d839c4b134b8328ec42f2157a9ca4b93227c03 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 23 Mar 2017 05:30:08 -0700 Subject: KVM: VMX: Fix enable VPID conditions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This can be reproduced by running L2 on L1, and disable VPID on L0 if w/o commit "KVM: nVMX: Fix nested VPID vmx exec control", the L2 crash as below: KVM: entry failed, hardware error 0x7 EAX=00000000 EBX=00000000 ECX=00000000 EDX=000306c3 ESI=00000000 EDI=00000000 EBP=00000000 ESP=00000000 EIP=0000fff0 EFL=00000002 [-------] CPL=0 II=0 A20=1 SMM=0 HLT=0 ES =0000 00000000 0000ffff 00009300 CS =f000 ffff0000 0000ffff 00009b00 SS =0000 00000000 0000ffff 00009300 DS =0000 00000000 0000ffff 00009300 FS =0000 00000000 0000ffff 00009300 GS =0000 00000000 0000ffff 00009300 LDT=0000 00000000 0000ffff 00008200 TR =0000 00000000 0000ffff 00008b00 GDT= 00000000 0000ffff IDT= 00000000 0000ffff CR0=60000010 CR2=00000000 CR3=00000000 CR4=00000000 DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 DR3=0000000000000000 DR6=00000000ffff0ff0 DR7=0000000000000400 EFER=0000000000000000 Reference SDM 30.3 INVVPID: Protected Mode Exceptions - #UD - If not in VMX operation. - If the logical processor does not support VPIDs (IA32_VMX_PROCBASED_CTLS2[37]=0). - If the logical processor supports VPIDs (IA32_VMX_PROCBASED_CTLS2[37]=1) but does not support the INVVPID instruction (IA32_VMX_EPT_VPID_CAP[32]=0). So we should check both VPID enable bit in vmx exec control and INVVPID support bit in vmx capability MSRs to enable VPID. This patch adds the guarantee to not enable VPID if either INVVPID or single-context/all-context invalidation is not exposed in vmx capability MSRs. Reviewed-by: David Hildenbrand Reviewed-by: Jim Mattson Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5a82766e4b07..cd1ba62878f3 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1239,6 +1239,11 @@ static inline bool cpu_has_vmx_invvpid_global(void) return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT; } +static inline bool cpu_has_vmx_invvpid(void) +{ + return vmx_capability.vpid & VMX_VPID_INVVPID_BIT; +} + static inline bool cpu_has_vmx_ept(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & @@ -6524,8 +6529,10 @@ static __init int hardware_setup(void) if (boot_cpu_has(X86_FEATURE_NX)) kvm_enable_efer_bits(EFER_NX); - if (!cpu_has_vmx_vpid()) + if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() || + !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global())) enable_vpid = 0; + if (!cpu_has_vmx_shadow_vmcs()) enable_shadow_vmcs = 0; if (enable_shadow_vmcs) -- cgit v1.2.3 From 90db10434b163e46da413d34db8d0e77404cc645 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 23 Mar 2017 18:24:19 +0100 Subject: KVM: kvm_io_bus_unregister_dev() should never fail No caller currently checks the return value of kvm_io_bus_unregister_dev(). This is evil, as all callers silently go on freeing their device. A stale reference will remain in the io_bus, getting at least used again, when the iobus gets teared down on kvm_destroy_vm() - leading to use after free errors. There is nothing the callers could do, except retrying over and over again. So let's simply remove the bus altogether, print an error and make sure no one can access this broken bus again (returning -ENOMEM on any attempt to access it). Fixes: e93f8a0f821e ("KVM: convert io_bus to SRCU") Cc: stable@vger.kernel.org # 3.4+ Reported-by: Dmitry Vyukov Reviewed-by: Cornelia Huck Signed-off-by: David Hildenbrand Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 4 ++-- virt/kvm/eventfd.c | 3 ++- virt/kvm/kvm_main.c | 42 +++++++++++++++++++++++++----------------- 3 files changed, 29 insertions(+), 20 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2c14ad9809da..d0250744507a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -162,8 +162,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len, void *val); int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, struct kvm_io_device *dev); -int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, - struct kvm_io_device *dev); +void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, + struct kvm_io_device *dev); struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr); diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index a29786dd9522..4d28a9ddbee0 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -870,7 +870,8 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, continue; kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); - kvm->buses[bus_idx]->ioeventfd_count--; + if (kvm->buses[bus_idx]) + kvm->buses[bus_idx]->ioeventfd_count--; ioeventfd_release(p); ret = 0; break; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7445566fadc1..ef1aa7f1ed7a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -728,7 +728,8 @@ static void kvm_destroy_vm(struct kvm *kvm) spin_unlock(&kvm_lock); kvm_free_irq_routing(kvm); for (i = 0; i < KVM_NR_BUSES; i++) { - kvm_io_bus_destroy(kvm->buses[i]); + if (kvm->buses[i]) + kvm_io_bus_destroy(kvm->buses[i]); kvm->buses[i] = NULL; } kvm_coalesced_mmio_free(kvm); @@ -3476,6 +3477,8 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, }; bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + if (!bus) + return -ENOMEM; r = __kvm_io_bus_write(vcpu, bus, &range, val); return r < 0 ? r : 0; } @@ -3493,6 +3496,8 @@ int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, }; bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + if (!bus) + return -ENOMEM; /* First try the device referenced by cookie. */ if ((cookie >= 0) && (cookie < bus->dev_count) && @@ -3543,6 +3548,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, }; bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + if (!bus) + return -ENOMEM; r = __kvm_io_bus_read(vcpu, bus, &range, val); return r < 0 ? r : 0; } @@ -3555,6 +3562,9 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, struct kvm_io_bus *new_bus, *bus; bus = kvm->buses[bus_idx]; + if (!bus) + return -ENOMEM; + /* exclude ioeventfd which is limited by maximum fd */ if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) return -ENOSPC; @@ -3574,45 +3584,41 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, } /* Caller must hold slots_lock. */ -int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, - struct kvm_io_device *dev) +void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, + struct kvm_io_device *dev) { - int i, r; + int i; struct kvm_io_bus *new_bus, *bus; bus = kvm->buses[bus_idx]; - - /* - * It's possible the bus being released before hand. If so, - * we're done here. - */ if (!bus) - return 0; + return; - r = -ENOENT; for (i = 0; i < bus->dev_count; i++) if (bus->range[i].dev == dev) { - r = 0; break; } - if (r) - return r; + if (i == bus->dev_count) + return; new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) * sizeof(struct kvm_io_range)), GFP_KERNEL); - if (!new_bus) - return -ENOMEM; + if (!new_bus) { + pr_err("kvm: failed to shrink bus, removing it completely\n"); + goto broken; + } memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range)); new_bus->dev_count--; memcpy(new_bus->range + i, bus->range + i + 1, (new_bus->dev_count - i) * sizeof(struct kvm_io_range)); +broken: rcu_assign_pointer(kvm->buses[bus_idx], new_bus); synchronize_srcu_expedited(&kvm->srcu); kfree(bus); - return r; + return; } struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, @@ -3625,6 +3631,8 @@ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, srcu_idx = srcu_read_lock(&kvm->srcu); bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); + if (!bus) + goto out_unlock; dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1); if (dev_idx < 0) -- cgit v1.2.3 From a2125d02443e9a4e68bcfd9f8004fa23239e8329 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 23 Mar 2017 16:03:11 +0100 Subject: hwmon: (asus_atk0110) fix uninitialized data access The latest gcc-7 snapshot adds a warning to point out that when atk_read_value_old or atk_read_value_new fails, we copy uninitialized data into sensor->cached_value: drivers/hwmon/asus_atk0110.c: In function 'atk_input_show': drivers/hwmon/asus_atk0110.c:651:26: error: 'value' may be used uninitialized in this function [-Werror=maybe-uninitialized] Adding an error check avoids this. All versions of the driver are affected. Fixes: 2c03d07ad54d ("hwmon: Add Asus ATK0110 support") Signed-off-by: Arnd Bergmann Reviewed-by: Luca Tettamanti Signed-off-by: Guenter Roeck --- drivers/hwmon/asus_atk0110.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hwmon/asus_atk0110.c b/drivers/hwmon/asus_atk0110.c index cccef87963e0..975c43d446f8 100644 --- a/drivers/hwmon/asus_atk0110.c +++ b/drivers/hwmon/asus_atk0110.c @@ -646,6 +646,9 @@ static int atk_read_value(struct atk_sensor_data *sensor, u64 *value) else err = atk_read_value_new(sensor, value); + if (err) + return err; + sensor->is_valid = true; sensor->last_updated = jiffies; sensor->cached_value = *value; -- cgit v1.2.3 From e88162f9dad426f3c83e23150b7f28b7d9486df8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 21 Mar 2017 21:13:09 +0200 Subject: Revert "i2c: mux: pca954x: Add ACPI support for pca954x" In ACPI world any ID should be carefully chosen and registered officially. The commit bbf9d262a147 seems did a wrong assumption because PCA is the registered PNP ID for "PHILIPS BU ADD ON CARD". I'm pretty sure this prefix has nothing to do with the driver in question. Moreover, newer ACPI specification has a support of _DSD method and special device IDs to allow drivers be enumerated via compatible string. The slight change to support this kind of enumeration will be added in sequential patch against pca954x.c. Revert the commit bbf9d262a147 for good. Cc: Tin Huynh Signed-off-by: Andy Shevchenko Signed-off-by: Peter Rosin --- drivers/i2c/muxes/i2c-mux-pca954x.c | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c index dfc1c0e37c40..333a3918b656 100644 --- a/drivers/i2c/muxes/i2c-mux-pca954x.c +++ b/drivers/i2c/muxes/i2c-mux-pca954x.c @@ -35,7 +35,6 @@ * warranty of any kind, whether express or implied. */ -#include #include #include #include @@ -141,21 +140,6 @@ static const struct i2c_device_id pca954x_id[] = { }; MODULE_DEVICE_TABLE(i2c, pca954x_id); -#ifdef CONFIG_ACPI -static const struct acpi_device_id pca954x_acpi_ids[] = { - { .id = "PCA9540", .driver_data = pca_9540 }, - { .id = "PCA9542", .driver_data = pca_9542 }, - { .id = "PCA9543", .driver_data = pca_9543 }, - { .id = "PCA9544", .driver_data = pca_9544 }, - { .id = "PCA9545", .driver_data = pca_9545 }, - { .id = "PCA9546", .driver_data = pca_9545 }, - { .id = "PCA9547", .driver_data = pca_9547 }, - { .id = "PCA9548", .driver_data = pca_9548 }, - { } -}; -MODULE_DEVICE_TABLE(acpi, pca954x_acpi_ids); -#endif - #ifdef CONFIG_OF static const struct of_device_id pca954x_of_match[] = { { .compatible = "nxp,pca9540", .data = &chips[pca_9540] }, @@ -393,17 +377,8 @@ static int pca954x_probe(struct i2c_client *client, match = of_match_device(of_match_ptr(pca954x_of_match), &client->dev); if (match) data->chip = of_device_get_match_data(&client->dev); - else if (id) + else data->chip = &chips[id->driver_data]; - else { - const struct acpi_device_id *acpi_id; - - acpi_id = acpi_match_device(ACPI_PTR(pca954x_acpi_ids), - &client->dev); - if (!acpi_id) - return -ENODEV; - data->chip = &chips[acpi_id->driver_data]; - } data->last_chan = 0; /* force the first selection */ @@ -492,7 +467,6 @@ static struct i2c_driver pca954x_driver = { .name = "pca954x", .pm = &pca954x_pm, .of_match_table = of_match_ptr(pca954x_of_match), - .acpi_match_table = ACPI_PTR(pca954x_acpi_ids), }, .probe = pca954x_probe, .remove = pca954x_remove, -- cgit v1.2.3 From 81caa91b72fd6a0b8dfc5eb10942c34f7efd2bc5 Mon Sep 17 00:00:00 2001 From: Tomasz Nowicki Date: Thu, 23 Mar 2017 17:10:10 -0500 Subject: PCI: thunder-pem: Use Cavium assigned hardware ID for ThunderX host controller "CAV" is the only PNP/ACPI hardware ID vendor prefix assigned to Cavium so fix this as it should be from day one. Fixes: 44f22bd91e88 ("PCI: Add MCFG quirks for Cavium ThunderX pass2.x host controller") Tested-by: Robert Richter Signed-off-by: Tomasz Nowicki Signed-off-by: Bjorn Helgaas Acked-by: Robert Richter CC: stable@vger.kernel.org # v4.10+ --- drivers/pci/host/pci-thunder-pem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c index 52b5bdccf5f0..5b88faeebaaa 100644 --- a/drivers/pci/host/pci-thunder-pem.c +++ b/drivers/pci/host/pci-thunder-pem.c @@ -346,7 +346,7 @@ static int thunder_pem_acpi_init(struct pci_config_window *cfg) if (!res_pem) return -ENOMEM; - ret = acpi_get_rc_resources(dev, "THRX0002", root->segment, res_pem); + ret = acpi_get_rc_resources(dev, "CAVA02B", root->segment, res_pem); if (ret) { dev_err(dev, "can't get rc base address\n"); return ret; -- cgit v1.2.3 From 9abb27c7594a62bbf6385e20b7f5a90b4eceae2f Mon Sep 17 00:00:00 2001 From: Tomasz Nowicki Date: Thu, 23 Mar 2017 17:10:16 -0500 Subject: PCI: thunder-pem: Add legacy firmware support for Cavium ThunderX host controller During early days of PCI quirks support, ThunderX firmware did not provide PNP0c02 node with PCI configuration space and PEM-specific register ranges. This means that for legacy FW we are not reserving these resources and cannot gather PEM-specific resources for further PEM initialization. To support already deployed legacy FW, calculate PEM-specific ranges and provide resources reservation as fallback scenario into PEM driver when we could not gather PEM reg base from ACPI tables. Tested-by: Robert Richter Signed-off-by: Tomasz Nowicki Signed-off-by: Vadim Lomovtsev Signed-off-by: Bjorn Helgaas Acked-by: Robert Richter CC: stable@vger.kernel.org # v4.10+ --- drivers/pci/host/pci-thunder-pem.c | 56 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c index 5b88faeebaaa..b89c373555c5 100644 --- a/drivers/pci/host/pci-thunder-pem.c +++ b/drivers/pci/host/pci-thunder-pem.c @@ -14,6 +14,7 @@ * Copyright (C) 2015 - 2016 Cavium, Inc. */ +#include #include #include #include @@ -334,6 +335,50 @@ static int thunder_pem_init(struct device *dev, struct pci_config_window *cfg, #if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS) +#define PEM_RES_BASE 0x87e0c0000000UL +#define PEM_NODE_MASK GENMASK(45, 44) +#define PEM_INDX_MASK GENMASK(26, 24) +#define PEM_MIN_DOM_IN_NODE 4 +#define PEM_MAX_DOM_IN_NODE 10 + +static void thunder_pem_reserve_range(struct device *dev, int seg, + struct resource *r) +{ + resource_size_t start = r->start, end = r->end; + struct resource *res; + const char *regionid; + + regionid = kasprintf(GFP_KERNEL, "PEM RC:%d", seg); + if (!regionid) + return; + + res = request_mem_region(start, end - start + 1, regionid); + if (res) + res->flags &= ~IORESOURCE_BUSY; + else + kfree(regionid); + + dev_info(dev, "%pR %s reserved\n", r, + res ? "has been" : "could not be"); +} + +static void thunder_pem_legacy_fw(struct acpi_pci_root *root, + struct resource *res_pem) +{ + int node = acpi_get_node(root->device->handle); + int index; + + if (node == NUMA_NO_NODE) + node = 0; + + index = root->segment - PEM_MIN_DOM_IN_NODE; + index -= node * PEM_MAX_DOM_IN_NODE; + res_pem->start = PEM_RES_BASE | FIELD_PREP(PEM_NODE_MASK, node) | + FIELD_PREP(PEM_INDX_MASK, index); + res_pem->end = res_pem->start + SZ_16M - 1; + res_pem->flags = IORESOURCE_MEM; +} + static int thunder_pem_acpi_init(struct pci_config_window *cfg) { struct device *dev = cfg->parent; @@ -347,9 +392,16 @@ static int thunder_pem_acpi_init(struct pci_config_window *cfg) return -ENOMEM; ret = acpi_get_rc_resources(dev, "CAVA02B", root->segment, res_pem); + + /* + * If we fail to gather resources it means that we run with old + * FW where we need to calculate PEM-specific resources manually. + */ if (ret) { - dev_err(dev, "can't get rc base address\n"); - return ret; + thunder_pem_legacy_fw(root, res_pem); + /* Reserve PEM-specific resources and PCI configuration space */ + thunder_pem_reserve_range(dev, root->segment, res_pem); + thunder_pem_reserve_range(dev, root->segment, &cfg->res); } return thunder_pem_init(dev, cfg, res_pem); -- cgit v1.2.3 From f35ec35e5e726e7137deaee0dec24f8cbf07a48f Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 22 Mar 2017 22:40:30 -0700 Subject: net: phy: Export mdiobus_register_board_info() We can build modular code that uses mdiobus_register_board_info() which would lead to linking failure since this symbol is not expoerted. Fixes: 648ea0134069 ("net: phy: Allow pre-declaration of MDIO devices") Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/mdio-boardinfo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/mdio-boardinfo.c b/drivers/net/phy/mdio-boardinfo.c index 6b988f77da08..61941e29daae 100644 --- a/drivers/net/phy/mdio-boardinfo.c +++ b/drivers/net/phy/mdio-boardinfo.c @@ -84,3 +84,4 @@ int mdiobus_register_board_info(const struct mdio_board_info *info, return 0; } +EXPORT_SYMBOL(mdiobus_register_board_info); -- cgit v1.2.3 From 48481c8fa16410ffa45939b13b6c53c2ca609e5f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Mar 2017 12:39:21 -0700 Subject: net: neigh: guard against NULL solicit() method Dmitry posted a nice reproducer of a bug triggering in neigh_probe() when dereferencing a NULL neigh->ops->solicit method. This can happen for arp_direct_ops/ndisc_direct_ops and similar, which can be used for NUD_NOARP neighbours (created when dev->header_ops is NULL). Admin can then force changing nud_state to some other state that would fire neigh timer. Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Signed-off-by: David S. Miller --- net/core/neighbour.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index e7c12caa20c8..4526cbd7e28a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -860,7 +860,8 @@ static void neigh_probe(struct neighbour *neigh) if (skb) skb = skb_clone(skb, GFP_ATOMIC); write_unlock(&neigh->lock); - neigh->ops->solicit(neigh, skb); + if (neigh->ops->solicit) + neigh->ops->solicit(neigh, skb); atomic_inc(&neigh->probes); kfree_skb(skb); } -- cgit v1.2.3 From 854fbd6e5f60fe99e8e3a569865409fca378f143 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 23 Mar 2017 15:46:16 -0700 Subject: lib/syscall: Clear return values when no stack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit: aa1f1a639621 ("lib/syscall: Pin the task stack in collect_syscall()") ... added logic to handle a process stack not existing, but left sp and pc uninitialized, which can be later reported via /proc/$pid/syscall for zombie processes, potentially exposing kernel memory to userspace. Zombie /proc/$pid/syscall before: -1 0xffffffff9a060100 0xffff92f42d6ad900 Zombie /proc/$pid/syscall after: -1 0x0 0x0 Reported-by: Robert Święcki Signed-off-by: Kees Cook Reviewed-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org # v4.9+ Fixes: aa1f1a639621 ("lib/syscall: Pin the task stack in collect_syscall()") Link: http://lkml.kernel.org/r/20170323224616.GA92694@beast Signed-off-by: Ingo Molnar --- lib/syscall.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/syscall.c b/lib/syscall.c index 17d5ff5fa6a3..2c6cd1b5c3ea 100644 --- a/lib/syscall.c +++ b/lib/syscall.c @@ -12,6 +12,7 @@ static int collect_syscall(struct task_struct *target, long *callno, if (!try_get_task_stack(target)) { /* Task has no stack, so the task isn't in a syscall. */ + *sp = *pc = 0; *callno = -1; return 0; } -- cgit v1.2.3 From a46f60d76004965e5669dbf3fc21ef3bc3632eb4 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 24 Mar 2017 12:59:52 +0800 Subject: x86/mm/KASLR: Exclude EFI region from KASLR VA space randomization Currently KASLR is enabled on three regions: the direct mapping of physical memory, vamlloc and vmemmap. However the EFI region is also mistakenly included for VA space randomization because of misusing EFI_VA_START macro and assuming EFI_VA_START < EFI_VA_END. (This breaks kexec and possibly other things that rely on stable addresses.) The EFI region is reserved for EFI runtime services virtual mapping which should not be included in KASLR ranges. In Documentation/x86/x86_64/mm.txt, we can see: ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space EFI uses the space from -4G to -64G thus EFI_VA_START > EFI_VA_END, Here EFI_VA_START = -4G, and EFI_VA_END = -64G. Changing EFI_VA_START to EFI_VA_END in mm/kaslr.c fixes this problem. Signed-off-by: Baoquan He Reviewed-by: Bhupesh Sharma Acked-by: Dave Young Acked-by: Thomas Garnier Cc: #4.8+ Cc: Andrew Morton Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Kees Cook Cc: Linus Torvalds Cc: Masahiro Yamada Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1490331592-31860-1-git-send-email-bhe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/mm/kaslr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 887e57182716..aed206475aa7 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -48,7 +48,7 @@ static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; #if defined(CONFIG_X86_ESPFIX64) static const unsigned long vaddr_end = ESPFIX_BASE_ADDR; #elif defined(CONFIG_EFI) -static const unsigned long vaddr_end = EFI_VA_START; +static const unsigned long vaddr_end = EFI_VA_END; #else static const unsigned long vaddr_end = __START_KERNEL_map; #endif @@ -105,7 +105,7 @@ void __init kernel_randomize_memory(void) */ BUILD_BUG_ON(vaddr_start >= vaddr_end); BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) && - vaddr_end >= EFI_VA_START); + vaddr_end >= EFI_VA_END); BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) || IS_ENABLED(CONFIG_EFI)) && vaddr_end >= __START_KERNEL_map); -- cgit v1.2.3 From dbe4d69d252e9e65c6c46826980b77b11a142065 Mon Sep 17 00:00:00 2001 From: Mike Looijmans Date: Thu, 23 Mar 2017 10:00:36 +0100 Subject: i2c: mux: pca954x: Add missing pca9546 definition to chip_desc The spec for the pca9546 was missing. This chip is the same as the pca9545 except that it lacks interrupt lines. While the i2c_device_id table mapped the pca9546 to the pca9545 definition the compatible table did not. Signed-off-by: Mike Looijmans Signed-off-by: Peter Rosin --- drivers/i2c/muxes/i2c-mux-pca954x.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c index 333a3918b656..ad31d21da316 100644 --- a/drivers/i2c/muxes/i2c-mux-pca954x.c +++ b/drivers/i2c/muxes/i2c-mux-pca954x.c @@ -116,6 +116,10 @@ static const struct chip_desc chips[] = { .has_irq = 1, .muxtype = pca954x_isswi, }, + [pca_9546] = { + .nchans = 4, + .muxtype = pca954x_isswi, + }, [pca_9547] = { .nchans = 8, .enable = 0x8, @@ -133,7 +137,7 @@ static const struct i2c_device_id pca954x_id[] = { { "pca9543", pca_9543 }, { "pca9544", pca_9544 }, { "pca9545", pca_9545 }, - { "pca9546", pca_9545 }, + { "pca9546", pca_9546 }, { "pca9547", pca_9547 }, { "pca9548", pca_9548 }, { } -- cgit v1.2.3 From 74d1cf4897f919837efc4e34d800b996936eb38e Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 17 Mar 2017 13:58:39 +0100 Subject: dt-bindings: rng: clocks property on omap_rng not always mandatory Commit 52060836f79 ("dt-bindings: omap-rng: Document SafeXcel IP-76 device variant") update the omap_rng Device Tree binding to add support for the IP-76 variation of the IP. As part of this change, a "clocks" property was added, but is indicated as "Required", without indicated it's actually only required for some compatible strings. This commit fixes that, by explicitly stating that the clocks property is only required with the inside-secure,safexcel-eip76 compatible string. Fixes: 52060836f79 ("dt-bindings: omap-rng: Document SafeXcel IP-76 device variant") Cc: Signed-off-by: Thomas Petazzoni Signed-off-by: Herbert Xu --- Documentation/devicetree/bindings/rng/omap_rng.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/rng/omap_rng.txt b/Documentation/devicetree/bindings/rng/omap_rng.txt index 471477299ece..9cf7876ab434 100644 --- a/Documentation/devicetree/bindings/rng/omap_rng.txt +++ b/Documentation/devicetree/bindings/rng/omap_rng.txt @@ -12,7 +12,8 @@ Required properties: - reg : Offset and length of the register set for the module - interrupts : the interrupt number for the RNG module. Used for "ti,omap4-rng" and "inside-secure,safexcel-eip76" -- clocks: the trng clock source +- clocks: the trng clock source. Only mandatory for the + "inside-secure,safexcel-eip76" compatible. Example: /* AM335x */ -- cgit v1.2.3 From de5540d088fe97ad583cc7d396586437b32149a5 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 23 Mar 2017 12:24:43 +0100 Subject: padata: avoid race in reordering Under extremely heavy uses of padata, crashes occur, and with list debugging turned on, this happens instead: [87487.298728] WARNING: CPU: 1 PID: 882 at lib/list_debug.c:33 __list_add+0xae/0x130 [87487.301868] list_add corruption. prev->next should be next (ffffb17abfc043d0), but was ffff8dba70872c80. (prev=ffff8dba70872b00). [87487.339011] [] dump_stack+0x68/0xa3 [87487.342198] [] ? console_unlock+0x281/0x6d0 [87487.345364] [] __warn+0xff/0x140 [87487.348513] [] warn_slowpath_fmt+0x4a/0x50 [87487.351659] [] __list_add+0xae/0x130 [87487.354772] [] ? _raw_spin_lock+0x64/0x70 [87487.357915] [] padata_reorder+0x1e6/0x420 [87487.361084] [] padata_do_serial+0xa5/0x120 padata_reorder calls list_add_tail with the list to which its adding locked, which seems correct: spin_lock(&squeue->serial.lock); list_add_tail(&padata->list, &squeue->serial.list); spin_unlock(&squeue->serial.lock); This therefore leaves only place where such inconsistency could occur: if padata->list is added at the same time on two different threads. This pdata pointer comes from the function call to padata_get_next(pd), which has in it the following block: next_queue = per_cpu_ptr(pd->pqueue, cpu); padata = NULL; reorder = &next_queue->reorder; if (!list_empty(&reorder->list)) { padata = list_entry(reorder->list.next, struct padata_priv, list); spin_lock(&reorder->lock); list_del_init(&padata->list); atomic_dec(&pd->reorder_objects); spin_unlock(&reorder->lock); pd->processed++; goto out; } out: return padata; I strongly suspect that the problem here is that two threads can race on reorder list. Even though the deletion is locked, call to list_entry is not locked, which means it's feasible that two threads pick up the same padata object and subsequently call list_add_tail on them at the same time. The fix is thus be hoist that lock outside of that block. Signed-off-by: Jason A. Donenfeld Acked-by: Steffen Klassert Signed-off-by: Herbert Xu --- kernel/padata.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/padata.c b/kernel/padata.c index 05316c9f32da..3202aa17492c 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -186,19 +186,20 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd) reorder = &next_queue->reorder; + spin_lock(&reorder->lock); if (!list_empty(&reorder->list)) { padata = list_entry(reorder->list.next, struct padata_priv, list); - spin_lock(&reorder->lock); list_del_init(&padata->list); atomic_dec(&pd->reorder_objects); - spin_unlock(&reorder->lock); pd->processed++; + spin_unlock(&reorder->lock); goto out; } + spin_unlock(&reorder->lock); if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) { padata = ERR_PTR(-ENODATA); -- cgit v1.2.3 From efc989fce8703914bac091dcc4b8ff7a72ccf987 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Thu, 23 Mar 2017 12:53:30 -0500 Subject: crypto: ccp - Make some CCP DMA channels private The CCP registers its queues as channels capable of handling general DMA operations. The NTB driver will use DMA if directed, but as public channels can be reserved for use in asynchronous operations some channels should be held back as private. Since the public/private determination is handled at a device level, reserve the "other" (secondary) CCP channels as private. Add a module parameter that allows for override, to be applied to all channels on all devices. CC: # 4.10.x- Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-dev-v5.c | 1 + drivers/crypto/ccp/ccp-dev.h | 5 +++++ drivers/crypto/ccp/ccp-dmaengine.c | 41 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+) diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c index 41cc853f8569..fc08b4ed69d9 100644 --- a/drivers/crypto/ccp/ccp-dev-v5.c +++ b/drivers/crypto/ccp/ccp-dev-v5.c @@ -1015,6 +1015,7 @@ const struct ccp_vdata ccpv5a = { const struct ccp_vdata ccpv5b = { .version = CCP_VERSION(5, 0), + .dma_chan_attr = DMA_PRIVATE, .setup = ccp5other_config, .perform = &ccp5_actions, .bar = 2, diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h index 2b5c01fade05..aa36f3f81860 100644 --- a/drivers/crypto/ccp/ccp-dev.h +++ b/drivers/crypto/ccp/ccp-dev.h @@ -179,6 +179,10 @@ /* ------------------------ General CCP Defines ------------------------ */ +#define CCP_DMA_DFLT 0x0 +#define CCP_DMA_PRIV 0x1 +#define CCP_DMA_PUB 0x2 + #define CCP_DMAPOOL_MAX_SIZE 64 #define CCP_DMAPOOL_ALIGN BIT(5) @@ -636,6 +640,7 @@ struct ccp_actions { /* Structure to hold CCP version-specific values */ struct ccp_vdata { const unsigned int version; + const unsigned int dma_chan_attr; void (*setup)(struct ccp_device *); const struct ccp_actions *perform; const unsigned int bar; diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c index 8d0eeb46d4a2..e00be01fbf5a 100644 --- a/drivers/crypto/ccp/ccp-dmaengine.c +++ b/drivers/crypto/ccp/ccp-dmaengine.c @@ -10,6 +10,7 @@ * published by the Free Software Foundation. */ +#include #include #include #include @@ -25,6 +26,37 @@ (mask == 0) ? 64 : fls64(mask); \ }) +/* The CCP as a DMA provider can be configured for public or private + * channels. Default is specified in the vdata for the device (PCI ID). + * This module parameter will override for all channels on all devices: + * dma_chan_attr = 0x2 to force all channels public + * = 0x1 to force all channels private + * = 0x0 to defer to the vdata setting + * = any other value: warning, revert to 0x0 + */ +static unsigned int dma_chan_attr = CCP_DMA_DFLT; +module_param(dma_chan_attr, uint, 0444); +MODULE_PARM_DESC(dma_chan_attr, "Set DMA channel visibility: 0 (default) = device defaults, 1 = make private, 2 = make public"); + +unsigned int ccp_get_dma_chan_attr(struct ccp_device *ccp) +{ + switch (dma_chan_attr) { + case CCP_DMA_DFLT: + return ccp->vdata->dma_chan_attr; + + case CCP_DMA_PRIV: + return DMA_PRIVATE; + + case CCP_DMA_PUB: + return 0; + + default: + dev_info_once(ccp->dev, "Invalid value for dma_chan_attr: %d\n", + dma_chan_attr); + return ccp->vdata->dma_chan_attr; + } +} + static void ccp_free_cmd_resources(struct ccp_device *ccp, struct list_head *list) { @@ -675,6 +707,15 @@ int ccp_dmaengine_register(struct ccp_device *ccp) dma_cap_set(DMA_SG, dma_dev->cap_mask); dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask); + /* The DMA channels for this device can be set to public or private, + * and overridden by the module parameter dma_chan_attr. + * Default: according to the value in vdata (dma_chan_attr=0) + * dma_chan_attr=0x1: all channels private (override vdata) + * dma_chan_attr=0x2: all channels public (override vdata) + */ + if (ccp_get_dma_chan_attr(ccp) == DMA_PRIVATE) + dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask); + INIT_LIST_HEAD(&dma_dev->channels); for (i = 0; i < ccp->cmd_q_count; i++) { chan = ccp->ccp_dma_chan + i; -- cgit v1.2.3 From 9df0eb180c2074451f25556eb566d89c7057c2ac Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 23 Mar 2017 13:39:46 -0700 Subject: crypto: xts,lrw - fix out-of-bounds write after kmalloc failure In the generic XTS and LRW algorithms, for input data > 128 bytes, a temporary buffer is allocated to hold the values to be XOR'ed with the data before and after encryption or decryption. If the allocation fails, the fixed-size buffer embedded in the request buffer is meant to be used as a fallback --- resulting in more calls to the ECB algorithm, but still producing the correct result. However, we weren't correctly limiting subreq->cryptlen in this case, resulting in pre_crypt() overrunning the embedded buffer. Fix this by setting subreq->cryptlen correctly. Fixes: f1c131b45410 ("crypto: xts - Convert to skcipher") Fixes: 700cb3f5fe75 ("crypto: lrw - Convert to skcipher") Cc: stable@vger.kernel.org # v4.10+ Reported-by: Dmitry Vyukov Signed-off-by: Eric Biggers Acked-by: David S. Miller Signed-off-by: Herbert Xu --- crypto/lrw.c | 7 +++++-- crypto/xts.c | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/crypto/lrw.c b/crypto/lrw.c index ecd8474018e3..3ea095adafd9 100644 --- a/crypto/lrw.c +++ b/crypto/lrw.c @@ -286,8 +286,11 @@ static int init_crypt(struct skcipher_request *req, crypto_completion_t done) subreq->cryptlen = LRW_BUFFER_SIZE; if (req->cryptlen > LRW_BUFFER_SIZE) { - subreq->cryptlen = min(req->cryptlen, (unsigned)PAGE_SIZE); - rctx->ext = kmalloc(subreq->cryptlen, gfp); + unsigned int n = min(req->cryptlen, (unsigned int)PAGE_SIZE); + + rctx->ext = kmalloc(n, gfp); + if (rctx->ext) + subreq->cryptlen = n; } rctx->src = req->src; diff --git a/crypto/xts.c b/crypto/xts.c index baeb34dd8582..c976bfac29da 100644 --- a/crypto/xts.c +++ b/crypto/xts.c @@ -230,8 +230,11 @@ static int init_crypt(struct skcipher_request *req, crypto_completion_t done) subreq->cryptlen = XTS_BUFFER_SIZE; if (req->cryptlen > XTS_BUFFER_SIZE) { - subreq->cryptlen = min(req->cryptlen, (unsigned)PAGE_SIZE); - rctx->ext = kmalloc(subreq->cryptlen, gfp); + unsigned int n = min(req->cryptlen, (unsigned int)PAGE_SIZE); + + rctx->ext = kmalloc(n, gfp); + if (rctx->ext) + subreq->cryptlen = n; } rctx->src = req->src; -- cgit v1.2.3 From 9257821c5a1dc57ef3a37f7cbcebaf548395c964 Mon Sep 17 00:00:00 2001 From: Peter Stein Date: Fri, 17 Feb 2017 00:00:50 -0800 Subject: HID: xinmo: fix for out of range for THT 2P arcade controller. There is a new clone of the XIN MO arcade controller which has same issue with out of range like the original. This fix will solve the issue where 2 directions on the joystick are not recognized by the new THT 2P arcade controller with device ID 0x75e1. In details the new device ID is added the hid-id list and the hid-xinmo source code. Signed-off-by: Peter Stein Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 1 + drivers/hid/hid-ids.h | 1 + drivers/hid/hid-xinmo.c | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 3ceb4a2af381..63ec1993eaaa 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -2112,6 +2112,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET) }, { HID_USB_DEVICE(USB_VENDOR_ID_X_TENSIONS, USB_DEVICE_ID_SPEEDLINK_VAD_CEZANNE) }, { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_XIN_MO_DUAL_ARCADE) }, + { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_THT_2P_ARCADE) }, { HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0005) }, { HID_USB_DEVICE(USB_VENDOR_ID_ZEROPLUS, 0x0030) }, { HID_USB_DEVICE(USB_VENDOR_ID_ZYDACRON, USB_DEVICE_ID_ZYDACRON_REMOTE_CONTROL) }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 0e2e7c571d22..4e2648c86c8c 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -1082,6 +1082,7 @@ #define USB_VENDOR_ID_XIN_MO 0x16c0 #define USB_DEVICE_ID_XIN_MO_DUAL_ARCADE 0x05e1 +#define USB_DEVICE_ID_THT_2P_ARCADE 0x75e1 #define USB_VENDOR_ID_XIROKU 0x1477 #define USB_DEVICE_ID_XIROKU_SPX 0x1006 diff --git a/drivers/hid/hid-xinmo.c b/drivers/hid/hid-xinmo.c index 7df5227a7e61..9ad7731d2e10 100644 --- a/drivers/hid/hid-xinmo.c +++ b/drivers/hid/hid-xinmo.c @@ -46,6 +46,7 @@ static int xinmo_event(struct hid_device *hdev, struct hid_field *field, static const struct hid_device_id xinmo_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_XIN_MO_DUAL_ARCADE) }, + { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_THT_2P_ARCADE) }, { } }; -- cgit v1.2.3 From a95600294157ca7527ee7c70249fb53e09d8c566 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Thu, 26 Jan 2017 14:43:32 +0200 Subject: iwlwifi: mvm: fix accessing fw_id_to_mac_id Access should be by rcu_dereference. Issue was found by sparse. Fixes: 65e254821cee ("iwlwifi: mvm: use firmware station PM notification for AP_LINK_PS") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 6927caecd48e..486dcceed17a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -2401,7 +2401,7 @@ void iwl_mvm_sta_pm_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) return; rcu_read_lock(); - sta = mvm->fw_id_to_mac_id[notif->sta_id]; + sta = rcu_dereference(mvm->fw_id_to_mac_id[notif->sta_id]); if (WARN_ON(IS_ERR_OR_NULL(sta))) { rcu_read_unlock(); return; -- cgit v1.2.3 From 251fe09f13bfb54c1ede66ee8bf8ddd0061c4f7c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 23 Mar 2017 13:40:00 +0300 Subject: iwlwifi: mvm: writing zero bytes to debugfs causes a crash This is a static analysis fix. The warning is: drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c:912 iwl_mvm_fw_dbg_collect() warn: integer overflows 'sizeof(*desc) + len' I guess this code is supposed to take a NUL character, but if we write zero bytes then it tries to write -1 characters and crashes. Fixes: c91b865cb14d ("iwlwifi: mvm: support description for user triggered fw dbg collection") Signed-off-by: Dan Carpenter Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c index a260cd503200..077bfd8f4c0c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c @@ -1056,6 +1056,8 @@ static ssize_t iwl_dbgfs_fw_dbg_collect_write(struct iwl_mvm *mvm, if (ret) return ret; + if (count == 0) + return 0; iwl_mvm_fw_dbg_collect(mvm, FW_DBG_TRIGGER_USER, buf, (count - 1), NULL); -- cgit v1.2.3 From 4d339989acd730f17bc814b5ddb9c54e405766b6 Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Tue, 21 Mar 2017 17:13:16 +0200 Subject: iwlwifi: mvm: support ibss in dqa mode Allow working IBSS also when working in DQA mode. This is done by setting it to treat the queues the same as a BSS AP treats the queues. Fixes: 7948b87308a4 ("iwlwifi: mvm: enable dynamic queue allocation mode") Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 3 ++- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 9 ++++++--- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 7 +++++-- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index 99132ea16ede..c5734e1a02d2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -216,7 +216,8 @@ u32 iwl_mvm_mac_get_queues_mask(struct ieee80211_vif *vif) qmask |= BIT(vif->hw_queue[ac]); } - if (vif->type == NL80211_IFTYPE_AP) + if (vif->type == NL80211_IFTYPE_AP || + vif->type == NL80211_IFTYPE_ADHOC) qmask |= BIT(vif->cab_queue); return qmask; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index b51a2853cc80..9d28db7f56aa 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -1806,7 +1806,8 @@ int iwl_mvm_send_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) iwl_mvm_get_wd_timeout(mvm, vif, false, false); int queue; - if (vif->type == NL80211_IFTYPE_AP) + if (vif->type == NL80211_IFTYPE_AP || + vif->type == NL80211_IFTYPE_ADHOC) queue = IWL_MVM_DQA_AP_PROBE_RESP_QUEUE; else if (vif->type == NL80211_IFTYPE_P2P_DEVICE) queue = IWL_MVM_DQA_P2P_DEVICE_QUEUE; @@ -1837,7 +1838,8 @@ int iwl_mvm_send_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) * enabled-cab_queue to the mask) */ if (iwl_mvm_is_dqa_supported(mvm) && - vif->type == NL80211_IFTYPE_AP) { + (vif->type == NL80211_IFTYPE_AP || + vif->type == NL80211_IFTYPE_ADHOC)) { struct iwl_trans_txq_scd_cfg cfg = { .fifo = IWL_MVM_TX_FIFO_MCAST, .sta_id = mvmvif->bcast_sta.sta_id, @@ -1862,7 +1864,8 @@ static void iwl_mvm_free_bcast_sta_queues(struct iwl_mvm *mvm, lockdep_assert_held(&mvm->mutex); - if (vif->type == NL80211_IFTYPE_AP) + if (vif->type == NL80211_IFTYPE_AP || + vif->type == NL80211_IFTYPE_ADHOC) iwl_mvm_disable_txq(mvm, vif->cab_queue, vif->cab_queue, IWL_MAX_TID_COUNT, 0); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 3f37075f4cde..1ba0a6f55503 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -506,6 +506,7 @@ static int iwl_mvm_get_ctrl_vif_queue(struct iwl_mvm *mvm, switch (info->control.vif->type) { case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_ADHOC: /* * Handle legacy hostapd as well, where station may be added * only after assoc. Take care of the case where we send a @@ -517,7 +518,8 @@ static int iwl_mvm_get_ctrl_vif_queue(struct iwl_mvm *mvm, if (info->hw_queue == info->control.vif->cab_queue) return info->hw_queue; - WARN_ONCE(1, "fc=0x%02x", le16_to_cpu(fc)); + WARN_ONCE(info->control.vif->type != NL80211_IFTYPE_ADHOC, + "fc=0x%02x", le16_to_cpu(fc)); return IWL_MVM_DQA_AP_PROBE_RESP_QUEUE; case NL80211_IFTYPE_P2P_DEVICE: if (ieee80211_is_mgmt(fc)) @@ -584,7 +586,8 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) iwl_mvm_vif_from_mac80211(info.control.vif); if (info.control.vif->type == NL80211_IFTYPE_P2P_DEVICE || - info.control.vif->type == NL80211_IFTYPE_AP) { + info.control.vif->type == NL80211_IFTYPE_AP || + info.control.vif->type == NL80211_IFTYPE_ADHOC) { sta_id = mvmvif->bcast_sta.sta_id; queue = iwl_mvm_get_ctrl_vif_queue(mvm, &info, hdr->frame_control); -- cgit v1.2.3 From 2d7d54002e396c180db0c800c1046f0a3c471597 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 24 Mar 2017 17:07:57 +0100 Subject: ALSA: seq: Fix race during FIFO resize When a new event is queued while processing to resize the FIFO in snd_seq_fifo_clear(), it may lead to a use-after-free, as the old pool that is being queued gets removed. For avoiding this race, we need to close the pool to be deleted and sync its usage before actually deleting it. The issue was spotted by syzkaller. Reported-by: Dmitry Vyukov Cc: Signed-off-by: Takashi Iwai --- sound/core/seq/seq_fifo.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c index 33980d1c8037..01c4cfe30c9f 100644 --- a/sound/core/seq/seq_fifo.c +++ b/sound/core/seq/seq_fifo.c @@ -267,6 +267,10 @@ int snd_seq_fifo_resize(struct snd_seq_fifo *f, int poolsize) /* NOTE: overflow flag is not cleared */ spin_unlock_irqrestore(&f->lock, flags); + /* close the old pool and wait until all users are gone */ + snd_seq_pool_mark_closing(oldpool); + snd_use_lock_sync(&f->use_lock); + /* release cells in old pool */ for (cell = oldhead; cell; cell = next) { next = cell->next; -- cgit v1.2.3 From 3c9d3f1bc2defd418b5933bbc928096c9c686d3b Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Thu, 23 Mar 2017 19:39:54 +0100 Subject: ASoC: STI: Fix reader substream pointer set reader->substream is used in IRQ handler for error case but is never set. Set value to pcm substream on DAI startup and clean it on dai shutdown. Signed-off-by: Arnaud Pouliquen Signed-off-by: Mark Brown --- sound/soc/sti/uniperif_reader.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/sti/uniperif_reader.c b/sound/soc/sti/uniperif_reader.c index 5992c6ab3833..93a8df6ed880 100644 --- a/sound/soc/sti/uniperif_reader.c +++ b/sound/soc/sti/uniperif_reader.c @@ -349,6 +349,8 @@ static int uni_reader_startup(struct snd_pcm_substream *substream, struct uniperif *reader = priv->dai_data.uni; int ret; + reader->substream = substream; + if (!UNIPERIF_TYPE_IS_TDM(reader)) return 0; @@ -378,6 +380,7 @@ static void uni_reader_shutdown(struct snd_pcm_substream *substream, /* Stop the reader */ uni_reader_stop(reader); } + reader->substream = NULL; } static const struct snd_soc_dai_ops uni_reader_dai_ops = { -- cgit v1.2.3 From 971edb0a0087b65bd3726d23b2dffff405d48f72 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Thu, 23 Mar 2017 15:05:26 +0100 Subject: ASoC: simple-card: fix simple_dai clk lookup The clock needs to be stored in the simple_dai structure, so it can be enabled later on. This has been broken during the conversion to use devm_* functions for the clk lookup. Fixes: e984fd61e860 (ASoC: simple-card: use devm_get_clk_from_child()) Signed-off-by: Lucas Stach Acked-by: Kuninori Morimoto Signed-off-by: Mark Brown --- sound/soc/generic/simple-card-utils.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c index 4924575d2e95..343b291fc372 100644 --- a/sound/soc/generic/simple-card-utils.c +++ b/sound/soc/generic/simple-card-utils.c @@ -115,6 +115,7 @@ int asoc_simple_card_parse_clk(struct device *dev, clk = devm_get_clk_from_child(dev, node, NULL); if (!IS_ERR(clk)) { simple_dai->sysclk = clk_get_rate(clk); + simple_dai->clk = clk; } else if (!of_property_read_u32(node, "system-clock-frequency", &val)) { simple_dai->sysclk = val; } else { -- cgit v1.2.3 From 49d52e8108a21749dc2114b924c907db43358984 Mon Sep 17 00:00:00 2001 From: Nathan Sullivan Date: Wed, 22 Mar 2017 15:27:01 -0500 Subject: net: phy: handle state correctly in phy_stop_machine If the PHY is halted on stop, then do not set the state to PHY_UP. This ensures the phy will be restarted later in phy_start when the machine is started again. Fixes: 00db8189d984 ("This patch adds a PHY Abstraction Layer to the Linux Kernel, enabling ethernet drivers to remain as ignorant as is reasonable of the connected PHY's design and operation details.") Signed-off-by: Nathan Sullivan Signed-off-by: Brad Mouring Acked-by: Xander Huff Acked-by: Kyle Roeschley Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 1be69d8bc909..a2bfc82e95d7 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -681,7 +681,7 @@ void phy_stop_machine(struct phy_device *phydev) cancel_delayed_work_sync(&phydev->state_queue); mutex_lock(&phydev->lock); - if (phydev->state > PHY_UP) + if (phydev->state > PHY_UP && phydev->state != PHY_HALTED) phydev->state = PHY_UP; mutex_unlock(&phydev->lock); } -- cgit v1.2.3 From 2f25abe6bac573928a990ccbdac75873add8127e Mon Sep 17 00:00:00 2001 From: hayeswang Date: Thu, 23 Mar 2017 19:14:19 +0800 Subject: r8152: prevent the driver from transmitting packets with carrier off The linking status may be changed when autosuspend. And, after autoresume, the driver may try to transmit packets when the device is carrier off, because the interrupt transfer doesn't update the linking status, yet. And, if the device is in ALDPS mode, the device would stop working. The another similar case is 1. unplug the cable. 2. interrupt transfer queue a work_queue for linking change. 3. device enters the ALDPS mode. 4. a tx occurs before the work_queue is called. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 0b1b9188625d..c34df33c6d72 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1294,6 +1294,7 @@ static void intr_callback(struct urb *urb) } } else { if (netif_carrier_ok(tp->netdev)) { + netif_stop_queue(tp->netdev); set_bit(RTL8152_LINK_CHG, &tp->flags); schedule_delayed_work(&tp->schedule, 0); } @@ -3169,6 +3170,9 @@ static void set_carrier(struct r8152 *tp) napi_enable(&tp->napi); netif_wake_queue(netdev); netif_info(tp, link, netdev, "carrier on\n"); + } else if (netif_queue_stopped(netdev) && + skb_queue_len(&tp->tx_queue) < tp->tx_qlen) { + netif_wake_queue(netdev); } } else { if (netif_carrier_ok(netdev)) { @@ -3702,8 +3706,18 @@ static int rtl8152_resume(struct usb_interface *intf) tp->rtl_ops.autosuspend_en(tp, false); napi_disable(&tp->napi); set_bit(WORK_ENABLE, &tp->flags); - if (netif_carrier_ok(tp->netdev)) - rtl_start_rx(tp); + + if (netif_carrier_ok(tp->netdev)) { + if (rtl8152_get_speed(tp) & LINK_STATUS) { + rtl_start_rx(tp); + } else { + netif_carrier_off(tp->netdev); + tp->rtl_ops.disable(tp); + netif_info(tp, link, tp->netdev, + "linking down\n"); + } + } + napi_enable(&tp->napi); clear_bit(SELECTIVE_SUSPEND, &tp->flags); smp_mb__after_atomic(); -- cgit v1.2.3 From 1adbddef118ae8bf6409c13208645d28c29731c1 Mon Sep 17 00:00:00 2001 From: Pavel Belous Date: Thu, 23 Mar 2017 14:19:41 +0300 Subject: net:ethernet:aquantia: Remove adapter re-opening when MTU changed. Closing/opening the adapter is not needed at all. The new MTU settings take effect immediately. Fixes: 97bde5c4f909 ("net: ethernet: aquantia: Support for NIC-specific code") Signed-off-by: Pavel Belous Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_main.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c index d05fbfdce5e5..5d6c40d86775 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c @@ -100,11 +100,6 @@ static int aq_ndev_change_mtu(struct net_device *ndev, int new_mtu) goto err_exit; ndev->mtu = new_mtu; - if (netif_running(ndev)) { - aq_ndev_close(ndev); - aq_ndev_open(ndev); - } - err_exit: return err; } -- cgit v1.2.3 From ea0504f554c8f989eab58549300d15582d36f039 Mon Sep 17 00:00:00 2001 From: Pavel Belous Date: Thu, 23 Mar 2017 14:19:42 +0300 Subject: net:ethernet:aquantia: Fix packet type detection (TCP/UDP) for IPv6. In order for the checksum offloads to work correctly we need to set the packet type bit (TCP/UDP) in the TX context buffer. Fixes: 97bde5c4f909 ("net: ethernet: aquantia: Support for NIC-specific code") Signed-off-by: Pavel Belous Tested-by: David Arcari Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index ee78444bfb88..db2b51da2988 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -510,10 +510,22 @@ static unsigned int aq_nic_map_skb(struct aq_nic_s *self, if (skb->ip_summed == CHECKSUM_PARTIAL) { dx_buff->is_ip_cso = (htons(ETH_P_IP) == skb->protocol) ? 1U : 0U; - dx_buff->is_tcp_cso = - (ip_hdr(skb)->protocol == IPPROTO_TCP) ? 1U : 0U; - dx_buff->is_udp_cso = - (ip_hdr(skb)->protocol == IPPROTO_UDP) ? 1U : 0U; + + if (ip_hdr(skb)->version == 4) { + dx_buff->is_tcp_cso = + (ip_hdr(skb)->protocol == IPPROTO_TCP) ? + 1U : 0U; + dx_buff->is_udp_cso = + (ip_hdr(skb)->protocol == IPPROTO_UDP) ? + 1U : 0U; + } else if (ip_hdr(skb)->version == 6) { + dx_buff->is_tcp_cso = + (ipv6_hdr(skb)->nexthdr == NEXTHDR_TCP) ? + 1U : 0U; + dx_buff->is_udp_cso = + (ipv6_hdr(skb)->nexthdr == NEXTHDR_UDP) ? + 1U : 0U; + } } for (; nr_frags--; ++frag_count) { -- cgit v1.2.3 From 9f0dd8c322e89f137c44b437d6fbecc9dea12204 Mon Sep 17 00:00:00 2001 From: Pavel Belous Date: Thu, 23 Mar 2017 14:19:43 +0300 Subject: net:ethernet:aquantia: Missing spinlock initialization. Fix for missing initialization aq_ring header.lock spinlock. Fixes: 018423e90bee ("net: ethernet: aquantia: Add ring support code") Signed-off-by: Pavel Belous Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 0358e6072d45..3a8a4aa13687 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -101,6 +101,7 @@ int aq_ring_init(struct aq_ring_s *self) self->hw_head = 0; self->sw_head = 0; self->sw_tail = 0; + spin_lock_init(&self->header.lock); return 0; } -- cgit v1.2.3 From 386aff88e32ec3f82e3f032217bad0c8c8846349 Mon Sep 17 00:00:00 2001 From: Pavel Belous Date: Thu, 23 Mar 2017 14:19:44 +0300 Subject: net:ethernet:aquantia: Fix for LSO with IPv6. Fix Context Command bit: L3 type = "0" for IPv4, "1" for IPv6. Fixes: bab6de8fd180 ("net: ethernet: aquantia: Atlantic A0 and B0 specific functions.") Signed-off-by: Pavel Belous Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 3 +++ drivers/net/ethernet/aquantia/atlantic/aq_ring.h | 3 ++- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c | 3 +++ drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 3 +++ 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index db2b51da2988..cdb02991f249 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -487,6 +487,9 @@ static unsigned int aq_nic_map_skb(struct aq_nic_s *self, dx_buff->mss = skb_shinfo(skb)->gso_size; dx_buff->is_txc = 1U; + dx_buff->is_ipv6 = + (ip_hdr(skb)->version == 6) ? 1U : 0U; + dx = aq_ring_next_dx(ring, dx); dx_buff = &ring->buff_ring[dx]; ++ret; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h index 257254645068..eecd6d1c4d73 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h @@ -58,7 +58,8 @@ struct __packed aq_ring_buff_s { u8 len_l2; u8 len_l3; u8 len_l4; - u8 rsvd2; + u8 is_ipv6:1; + u8 rsvd2:7; u32 len_pkt; }; }; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c index a2b746a2dd50..a536875a7d0d 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c @@ -433,6 +433,9 @@ static int hw_atl_a0_hw_ring_tx_xmit(struct aq_hw_s *self, buff->len_l3 + buff->len_l2); is_gso = true; + + if (buff->is_ipv6) + txd->ctl |= HW_ATL_A0_TXD_CTL_CMD_IPV6; } else { buff_pa_len = buff->len; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index cab2931dab9a..69488c9b03e2 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -471,6 +471,9 @@ static int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self, buff->len_l3 + buff->len_l2); is_gso = true; + + if (buff->is_ipv6) + txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_IPV6; } else { buff_pa_len = buff->len; -- cgit v1.2.3 From 5d73bb863c2ef3aa1a28b5885c85ede7307df8ea Mon Sep 17 00:00:00 2001 From: Pavel Belous Date: Thu, 23 Mar 2017 14:19:45 +0300 Subject: net:ethernet:aquantia: Reset is_gso flag when EOP reached. We need to reset is_gso flag when EOP reached (entire LSO packet processed). Fixes: bab6de8fd180 ("net: ethernet: aquantia: Atlantic A0 and B0 specific functions.") Signed-off-by: Pavel Belous Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c | 1 + drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c index a536875a7d0d..4ee15ff06a44 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c @@ -461,6 +461,7 @@ static int hw_atl_a0_hw_ring_tx_xmit(struct aq_hw_s *self, if (unlikely(buff->is_eop)) { txd->ctl |= HW_ATL_A0_TXD_CTL_EOP; txd->ctl |= HW_ATL_A0_TXD_CTL_CMD_WB; + is_gso = false; } } diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 69488c9b03e2..42150708191d 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -499,6 +499,7 @@ static int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self, if (unlikely(buff->is_eop)) { txd->ctl |= HW_ATL_B0_TXD_CTL_EOP; txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_WB; + is_gso = false; } } -- cgit v1.2.3 From 7d969d2e8890f546c8cec634b3aa5f57d4eef883 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 23 Mar 2017 14:55:08 +0100 Subject: s390/qeth: size calculation outbound buffers Depending on the device type, hard_start_xmit() builds different output buffer formats. For instance with HiperSockets, on both L2 and L3 we strip the ETH header from the skb - L3 doesn't need it, and L2 carries it in the buffer's header element. For this, we pass data_offset = ETH_HLEN all the way down to __qeth_fill_buffer(), where skb->data is then adjusted accordingly. But the initial size calculation still considers the *full* skb length (including the ETH header). So qeth_get_elements_no() can erroneously reject a skb as too big, even though it would actually fit into an output buffer once the ETH header has been trimmed off later. Fix this by passing an additional offset to qeth_get_elements_no(), that indicates where in the skb the on-wire data actually begins. Since the current code uses data_offset=-1 for some special handling on OSA, we need to clamp data_offset to 0... On HiperSockets this helps when sending ~MTU-size skbs with weird page alignment. No change for OSA or AF_IUCV. Signed-off-by: Julian Wiedmann Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 3 ++- drivers/s390/net/qeth_core_main.c | 5 +++-- drivers/s390/net/qeth_l2_main.c | 5 +++-- drivers/s390/net/qeth_l3_main.c | 5 +++-- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index e7addea8741b..d9561e39c3b2 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -961,7 +961,8 @@ int qeth_bridgeport_query_ports(struct qeth_card *card, int qeth_bridgeport_setrole(struct qeth_card *card, enum qeth_sbp_roles role); int qeth_bridgeport_an_set(struct qeth_card *card, int enable); int qeth_get_priority_queue(struct qeth_card *, struct sk_buff *, int, int); -int qeth_get_elements_no(struct qeth_card *, struct sk_buff *, int); +int qeth_get_elements_no(struct qeth_card *card, struct sk_buff *skb, + int extra_elems, int data_offset); int qeth_get_elements_for_frags(struct sk_buff *); int qeth_do_send_packet_fast(struct qeth_card *, struct qeth_qdio_out_q *, struct sk_buff *, struct qeth_hdr *, int, int, int); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 315d8a2db7c0..9a5f99ccb122 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -3837,6 +3837,7 @@ EXPORT_SYMBOL_GPL(qeth_get_elements_for_frags); * @card: qeth card structure, to check max. elems. * @skb: SKB address * @extra_elems: extra elems needed, to check against max. + * @data_offset: range starts at skb->data + data_offset * * Returns the number of pages, and thus QDIO buffer elements, needed to cover * skb data, including linear part and fragments. Checks if the result plus @@ -3844,10 +3845,10 @@ EXPORT_SYMBOL_GPL(qeth_get_elements_for_frags); * Note: extra_elems is not included in the returned result. */ int qeth_get_elements_no(struct qeth_card *card, - struct sk_buff *skb, int extra_elems) + struct sk_buff *skb, int extra_elems, int data_offset) { int elements = qeth_get_elements_for_range( - (addr_t)skb->data, + (addr_t)skb->data + data_offset, (addr_t)skb->data + skb_headlen(skb)) + qeth_get_elements_for_frags(skb); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index bea483307618..af4e6a639fec 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -849,7 +849,7 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) * chaining we can not send long frag lists */ if ((card->info.type != QETH_CARD_TYPE_IQD) && - !qeth_get_elements_no(card, new_skb, 0)) { + !qeth_get_elements_no(card, new_skb, 0, 0)) { int lin_rc = skb_linearize(new_skb); if (card->options.performance_stats) { @@ -894,7 +894,8 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) } } - elements = qeth_get_elements_no(card, new_skb, elements_needed); + elements = qeth_get_elements_no(card, new_skb, elements_needed, + (data_offset > 0) ? data_offset : 0); if (!elements) { if (data_offset >= 0) kmem_cache_free(qeth_core_header_cache, hdr); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 06d0addcc058..72aa9537a725 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2867,7 +2867,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) */ if ((card->info.type != QETH_CARD_TYPE_IQD) && ((use_tso && !qeth_l3_get_elements_no_tso(card, new_skb, 1)) || - (!use_tso && !qeth_get_elements_no(card, new_skb, 0)))) { + (!use_tso && !qeth_get_elements_no(card, new_skb, 0, 0)))) { int lin_rc = skb_linearize(new_skb); if (card->options.performance_stats) { @@ -2909,7 +2909,8 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) elements = use_tso ? qeth_l3_get_elements_no_tso(card, new_skb, hdr_elements) : - qeth_get_elements_no(card, new_skb, hdr_elements); + qeth_get_elements_no(card, new_skb, hdr_elements, + (data_offset > 0) ? data_offset : 0); if (!elements) { if (data_offset >= 0) kmem_cache_free(qeth_core_header_cache, hdr); -- cgit v1.2.3 From acd9776b5c45ef02d1a210969a6fcc058afb76e3 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 23 Mar 2017 14:55:09 +0100 Subject: s390/qeth: no ETH header for outbound AF_IUCV With AF_IUCV traffic, the skb passed to hard_start_xmit() has a 14 byte slot at skb->data, intended for an ETH header. qeth_l3_fill_af_iucv_hdr() fills this ETH header... and then immediately moves it to the skb's headroom, where it disappears and is never seen again. But it's still possible for us to return NETDEV_TX_BUSY after the skb has been modified. Since we didn't get a private copy of the skb, the next time the skb is delivered to hard_start_xmit() it no longer has the expected layout (we moved the ETH header to the headroom, so skb->data now starts at the IUCV_TRANS header). So when qeth_l3_fill_af_iucv_hdr() does another round of rebuilding, the resulting qeth header ends up all wrong. On transmission, the buffer is then rejected by the HiperSockets device with SBALF15 = x'04'. When this error is passed back to af_iucv as TX_NOTIFY_UNREACHABLE, it tears down the offending socket. As the ETH header for AF_IUCV serves no purpose, just align the code to what we do for IP traffic on L3 HiperSockets: keep the ETH header at skb->data, and pass down data_offset = ETH_HLEN to qeth_fill_buffer(). When mapping the payload into the SBAL elements, the ETH header is then stripped off. This avoids the skb manipulations in qeth_l3_fill_af_iucv_hdr(), and any buffer re-entering hard_start_xmit() after NETDEV_TX_BUSY is now processed properly. Signed-off-by: Julian Wiedmann Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l3_main.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 72aa9537a725..653f0fb76573 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2609,17 +2609,13 @@ static void qeth_l3_fill_af_iucv_hdr(struct qeth_card *card, char daddr[16]; struct af_iucv_trans_hdr *iucv_hdr; - skb_pull(skb, 14); - card->dev->header_ops->create(skb, card->dev, 0, - card->dev->dev_addr, card->dev->dev_addr, - card->dev->addr_len); - skb_pull(skb, 14); - iucv_hdr = (struct af_iucv_trans_hdr *)skb->data; memset(hdr, 0, sizeof(struct qeth_hdr)); hdr->hdr.l3.id = QETH_HEADER_TYPE_LAYER3; hdr->hdr.l3.ext_flags = 0; - hdr->hdr.l3.length = skb->len; + hdr->hdr.l3.length = skb->len - ETH_HLEN; hdr->hdr.l3.flags = QETH_HDR_IPV6 | QETH_CAST_UNICAST; + + iucv_hdr = (struct af_iucv_trans_hdr *) (skb->data + ETH_HLEN); memset(daddr, 0, sizeof(daddr)); daddr[0] = 0xfe; daddr[1] = 0x80; @@ -2823,10 +2819,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) if ((card->info.type == QETH_CARD_TYPE_IQD) && !skb_is_nonlinear(skb)) { new_skb = skb; - if (new_skb->protocol == ETH_P_AF_IUCV) - data_offset = 0; - else - data_offset = ETH_HLEN; + data_offset = ETH_HLEN; hdr = kmem_cache_alloc(qeth_core_header_cache, GFP_ATOMIC); if (!hdr) goto tx_drop; -- cgit v1.2.3 From 90b14dc731b1b4aac8ba01850b530bf7ba26462f Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 23 Mar 2017 14:55:10 +0100 Subject: MAINTAINERS: add Julian Wiedmann Add Julian Wiedmann as additional maintainer for drivers/s390/net and net/iucv. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index c45c02bc6082..e48678d15401 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10808,6 +10808,7 @@ F: drivers/s390/block/dasd* F: block/partitions/ibm.c S390 NETWORK DRIVERS +M: Julian Wiedmann M: Ursula Braun L: linux-s390@vger.kernel.org W: http://www.ibm.com/developerworks/linux/linux390/ @@ -10838,6 +10839,7 @@ S: Supported F: drivers/s390/scsi/zfcp_* S390 IUCV NETWORK LAYER +M: Julian Wiedmann M: Ursula Braun L: linux-s390@vger.kernel.org W: http://www.ibm.com/developerworks/linux/linux390/ -- cgit v1.2.3 From a5af83925363eb85d467933e3d6ec5a87001eb7c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 23 Mar 2017 17:07:26 +0100 Subject: bna: avoid writing uninitialized data into hw registers The latest gcc-7 snapshot warns about bfa_ioc_send_enable/bfa_ioc_send_disable writing undefined values into the hardware registers: drivers/net/ethernet/brocade/bna/bfa_ioc.c: In function 'bfa_iocpf_sm_disabling_entry': arch/arm/include/asm/io.h:109:22: error: '*((void *)&disable_req+4)' is used uninitialized in this function [-Werror=uninitialized] arch/arm/include/asm/io.h:109:22: error: '*((void *)&disable_req+8)' is used uninitialized in this function [-Werror=uninitialized] The two functions look like they should do the same thing, but only one of them initializes the time stamp and clscode field. The fact that we only get a warning for one of the two functions seems to be arbitrary, based on the inlining decisions in the compiler. To address this, I'm making both functions do the same thing: - set the clscode from the ioc structure in both - set the time stamp from ktime_get_real_seconds (which also avoids the signed-integer overflow in 2038 and extends the well-defined behavior until 2106). - zero-fill the reserved field Fixes: 8b230ed8ec96 ("bna: Brocade 10Gb Ethernet device driver") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/brocade/bna/bfa_ioc.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c index 9e59663a6ead..0f6811860ad5 100644 --- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c +++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c @@ -1930,13 +1930,13 @@ static void bfa_ioc_send_enable(struct bfa_ioc *ioc) { struct bfi_ioc_ctrl_req enable_req; - struct timeval tv; bfi_h2i_set(enable_req.mh, BFI_MC_IOC, BFI_IOC_H2I_ENABLE_REQ, bfa_ioc_portid(ioc)); enable_req.clscode = htons(ioc->clscode); - do_gettimeofday(&tv); - enable_req.tv_sec = ntohl(tv.tv_sec); + enable_req.rsvd = htons(0); + /* overflow in 2106 */ + enable_req.tv_sec = ntohl(ktime_get_real_seconds()); bfa_ioc_mbox_send(ioc, &enable_req, sizeof(struct bfi_ioc_ctrl_req)); } @@ -1947,6 +1947,10 @@ bfa_ioc_send_disable(struct bfa_ioc *ioc) bfi_h2i_set(disable_req.mh, BFI_MC_IOC, BFI_IOC_H2I_DISABLE_REQ, bfa_ioc_portid(ioc)); + disable_req.clscode = htons(ioc->clscode); + disable_req.rsvd = htons(0); + /* overflow in 2106 */ + disable_req.tv_sec = ntohl(ktime_get_real_seconds()); bfa_ioc_mbox_send(ioc, &disable_req, sizeof(struct bfi_ioc_ctrl_req)); } -- cgit v1.2.3 From a80db69e47d764bbcaf2fec54b1f308925e7c490 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 23 Mar 2017 11:03:31 -0700 Subject: kcm: return immediately after copy_from_user() failure There is no reason to continue after a copy_from_user() failure. Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Cc: Tom Herbert Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/kcm/kcmsock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 309062f3debe..31762f76cdb5 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1687,7 +1687,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct kcm_attach info; if (copy_from_user(&info, (void __user *)arg, sizeof(info))) - err = -EFAULT; + return -EFAULT; err = kcm_attach_ioctl(sock, &info); @@ -1697,7 +1697,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct kcm_unattach info; if (copy_from_user(&info, (void __user *)arg, sizeof(info))) - err = -EFAULT; + return -EFAULT; err = kcm_unattach_ioctl(sock, &info); @@ -1708,7 +1708,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct socket *newsock = NULL; if (copy_from_user(&info, (void __user *)arg, sizeof(info))) - err = -EFAULT; + return -EFAULT; err = kcm_clone(sock, &info, &newsock); -- cgit v1.2.3 From 871a8623d3b40221ad1103aff715dfee0aa4dacf Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Fri, 17 Mar 2017 18:30:07 -0500 Subject: i40iw: Receive netdev events post INET_NOTIFIER state Netdev notification events are de-registered only when all client iwdev instances are removed. If a single client is closed and re-opened, netdev events could arrive even before the Control Queue-Pair (CQP) is created, causing a NULL pointer dereference crash in i40iw_get_cqp_request. Fix this by allowing netdev event notification only after we have reached the INET_NOTIFIER state with respect to device initialization. Reported-by: Stefan Assmann Signed-off-by: Shiraz Saleem Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_utils.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index 0f5d43d1f5fc..70c3e9e79508 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -160,6 +160,9 @@ int i40iw_inetaddr_event(struct notifier_block *notifier, return NOTIFY_DONE; iwdev = &hdl->device; + if (iwdev->init_state < INET_NOTIFIER) + return NOTIFY_DONE; + netdev = iwdev->ldev->netdev; upper_dev = netdev_master_upper_dev_get(netdev); if (netdev != event_netdev) @@ -214,6 +217,9 @@ int i40iw_inet6addr_event(struct notifier_block *notifier, return NOTIFY_DONE; iwdev = &hdl->device; + if (iwdev->init_state < INET_NOTIFIER) + return NOTIFY_DONE; + netdev = iwdev->ldev->netdev; if (netdev != event_netdev) return NOTIFY_DONE; @@ -260,6 +266,8 @@ int i40iw_net_event(struct notifier_block *notifier, unsigned long event, void * if (!iwhdl) return NOTIFY_DONE; iwdev = &iwhdl->device; + if (iwdev->init_state < INET_NOTIFIER) + return NOTIFY_DONE; p = (__be32 *)neigh->primary_key; i40iw_copy_ip_ntohl(local_ipaddr, p); if (neigh->nud_state & NUD_VALID) { -- cgit v1.2.3 From ae9955aeb8e47c4f60a02add47acf9850ca0ead7 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Thu, 23 Mar 2017 20:34:45 +0300 Subject: ARC: vdk: Fix support of UIO MotherBoard section has its "ranges" set to 0xE000_0000-0xF000_0000. But UIO node maps 4 different areas in different memory locations and all outside MB's ranges. That obviously breaks UIO mappings in runtime. Cc: Ruud Derwig Cc: stable@vger.kernel.org Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/vdk_axs10x_mb.dtsi | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/arch/arc/boot/dts/vdk_axs10x_mb.dtsi b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi index f0df59b23e21..459fc656b759 100644 --- a/arch/arc/boot/dts/vdk_axs10x_mb.dtsi +++ b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi @@ -112,13 +112,19 @@ interrupts = <7>; bus-width = <4>; }; + }; - /* Embedded Vision subsystem UIO mappings; only relevant for EV VDK */ - uio_ev: uio@0xD0000000 { - compatible = "generic-uio"; - reg = <0xD0000000 0x2000 0xD1000000 0x2000 0x90000000 0x10000000 0xC0000000 0x10000000>; - reg-names = "ev_gsa", "ev_ctrl", "ev_shared_mem", "ev_code_mem"; - interrupts = <23>; - }; + /* + * Embedded Vision subsystem UIO mappings; only relevant for EV VDK + * + * This node is intentionally put outside of MB above becase + * it maps areas outside of MB's 0xEz-0xFz. + */ + uio_ev: uio@0xD0000000 { + compatible = "generic-uio"; + reg = <0xD0000000 0x2000 0xD1000000 0x2000 0x90000000 0x10000000 0xC0000000 0x10000000>; + reg-names = "ev_gsa", "ev_ctrl", "ev_shared_mem", "ev_code_mem"; + interrupt-parent = <&mb_intc>; + interrupts = <23>; }; }; -- cgit v1.2.3 From 86f46aba8d1ac3ed0904542158a9b9cb9c7a143c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 8 Mar 2017 22:00:52 +0200 Subject: IB/core: Protect against self-requeue of a cq work item We need to make sure that the cq work item does not run when we are destroying the cq. Unlike flush_work, cancel_work_sync protects against self-requeue of the work item (which we can do in ib_cq_poll_work). Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche -- Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index e95510117a6d..2746d2eb3d52 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -196,7 +196,7 @@ void ib_free_cq(struct ib_cq *cq) irq_poll_disable(&cq->iop); break; case IB_POLL_WORKQUEUE: - flush_work(&cq->work); + cancel_work_sync(&cq->work); break; default: WARN_ON_ONCE(1); -- cgit v1.2.3 From 9f47a48e6eb97d793db85373e3ef4c55d876d334 Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Thu, 23 Mar 2017 20:47:15 -0700 Subject: Revert "e1000e: driver trying to free already-free irq" This reverts commit 7e54d9d063fa239c95c21548c5267f0ef419ff56. After additional regression testing, several users are experiencing kernel panics during shutdown on e1000e devices. Reverting this change resolves the issue. Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/e1000e/netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 2175cced402f..e9af89ad039c 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6274,8 +6274,8 @@ static int e1000e_pm_freeze(struct device *dev) /* Quiesce the device without resetting the hardware */ e1000e_down(adapter, false); e1000_free_irq(adapter); - e1000e_reset_interrupt_capability(adapter); } + e1000e_reset_interrupt_capability(adapter); /* Allow time for pending master requests to run */ e1000e_disable_pcie_master(&adapter->hw); -- cgit v1.2.3 From cb8864559631754ac93d5734b165ccd0cad4728c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 10 Mar 2017 11:34:20 -0700 Subject: infiniband: Fix alignment of mmap cookies to support VIPT caching When vmalloc_user is used to create memory that is supposed to be mmap'd to user space, it is necessary for the mmap cookie (eg the offset) to be aligned to SHMLBA. This creates a situation where all virtual mappings of the same physical page share the same virtual cache index and guarantees VIPT coherence. Otherwise the cache is non-coherent and the kernel will not see writes by userspace when reading the shared page (or vice-versa). Reported-by: Josh Beavers Signed-off-by: Jason Gunthorpe Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/mmap.c | 4 ++-- drivers/infiniband/sw/rxe/rxe_mmap.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c index e202b8142759..6b712eecbd37 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.c +++ b/drivers/infiniband/sw/rdmavt/mmap.c @@ -170,9 +170,9 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, spin_lock_irq(&rdi->mmap_offset_lock); if (rdi->mmap_offset == 0) - rdi->mmap_offset = PAGE_SIZE; + rdi->mmap_offset = ALIGN(PAGE_SIZE, SHMLBA); ip->offset = rdi->mmap_offset; - rdi->mmap_offset += size; + rdi->mmap_offset += ALIGN(size, SHMLBA); spin_unlock_irq(&rdi->mmap_offset_lock); INIT_LIST_HEAD(&ip->pending_mmaps); diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c index c572a4c09359..bd812e00988e 100644 --- a/drivers/infiniband/sw/rxe/rxe_mmap.c +++ b/drivers/infiniband/sw/rxe/rxe_mmap.c @@ -156,10 +156,10 @@ struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *rxe, spin_lock_bh(&rxe->mmap_offset_lock); if (rxe->mmap_offset == 0) - rxe->mmap_offset = PAGE_SIZE; + rxe->mmap_offset = ALIGN(PAGE_SIZE, SHMLBA); ip->info.offset = rxe->mmap_offset; - rxe->mmap_offset += size; + rxe->mmap_offset += ALIGN(size, SHMLBA); spin_unlock_bh(&rxe->mmap_offset_lock); -- cgit v1.2.3 From 93efe9817e651607d83e5f100076ae62d0ce0b93 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 24 Mar 2017 12:04:19 -0600 Subject: blk-mq: include errors in did_work calculation Currently we return true in blk_mq_dispatch_rq_list() if we queued IO successfully, but we really want to return whether or not the we made progress. Progress includes if we got an error return. If we don't, this can lead to a hang in blk_mq_sched_dispatch_requests() when a driver is draining IO by returning BLK_MQ_QUEUE_ERROR instead of manually ending the IO in error and return BLK_MQ_QUEUE_OK. Tested-by: Josef Bacik Reviewed-by: Bart Van Assche Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-mq.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 08a49c69738b..6b6e7bc041db 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -969,7 +969,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) struct request *rq; LIST_HEAD(driver_list); struct list_head *dptr; - int queued, ret = BLK_MQ_RQ_QUEUE_OK; + int errors, queued, ret = BLK_MQ_RQ_QUEUE_OK; /* * Start off with dptr being NULL, so we start the first request @@ -980,7 +980,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) /* * Now process all the entries, sending them to the driver. */ - queued = 0; + errors = queued = 0; while (!list_empty(list)) { struct blk_mq_queue_data bd; @@ -1037,6 +1037,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) default: pr_err("blk-mq: bad return on queue: %d\n", ret); case BLK_MQ_RQ_QUEUE_ERROR: + errors++; rq->errors = -EIO; blk_mq_end_request(rq, rq->errors); break; @@ -1088,7 +1089,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) blk_mq_run_hw_queue(hctx, true); } - return queued != 0; + return (queued + errors) != 0; } static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) -- cgit v1.2.3 From 9dd5d3ab49f74e1a3fab92c432a7600bd9081ccc Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 24 Mar 2017 14:08:26 -0400 Subject: nbd: handle ERESTARTSYS properly We can submit IO in a processes context, which means there can be pending signals. This isn't a fatal error for NBD, but it does require some finesse. If the signal happens before we transmit anything then we are ok, just requeue the request and carry on. However if we've done a partial transmit we can't allow anything else to be transmitted on this socket until we transmit the remaining part of the request. Deal with this by keeping track of how much we've sent for the current request, and if we get an ERESTARTSYS during any part of our transmission save the state of that request and requeue the IO. If anybody tries to submit a request that isn't our pending request then requeue that request until we are able to service the one that is pending. Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 115 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 89 insertions(+), 26 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 7e4287bc19e5..3d1fc37a83b1 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -47,6 +47,8 @@ static DEFINE_MUTEX(nbd_index_mutex); struct nbd_sock { struct socket *sock; struct mutex tx_lock; + struct request *pending; + int sent; }; #define NBD_TIMEDOUT 0 @@ -202,7 +204,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, * Send or receive packet. */ static int sock_xmit(struct nbd_device *nbd, int index, int send, - struct iov_iter *iter, int msg_flags) + struct iov_iter *iter, int msg_flags, int *sent) { struct socket *sock = nbd->socks[index]->sock; int result; @@ -237,6 +239,8 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, result = -EPIPE; /* short read */ break; } + if (sent) + *sent += result; } while (msg_data_left(&msg)); tsk_restore_flags(current, pflags, PF_MEMALLOC); @@ -248,6 +252,7 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send, static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) { struct request *req = blk_mq_rq_from_pdu(cmd); + struct nbd_sock *nsock = nbd->socks[index]; int result; struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)}; struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)}; @@ -256,6 +261,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) struct bio *bio; u32 type; u32 tag = blk_mq_unique_tag(req); + int sent = nsock->sent, skip = 0; iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); @@ -283,6 +289,17 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) return -EIO; } + /* We did a partial send previously, and we at least sent the whole + * request struct, so just go and send the rest of the pages in the + * request. + */ + if (sent) { + if (sent >= sizeof(request)) { + skip = sent - sizeof(request); + goto send_pages; + } + iov_iter_advance(&from, sent); + } request.type = htonl(type); if (type != NBD_CMD_FLUSH) { request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); @@ -294,15 +311,27 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) cmd, nbdcmd_to_ascii(type), (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); result = sock_xmit(nbd, index, 1, &from, - (type == NBD_CMD_WRITE) ? MSG_MORE : 0); + (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent); if (result <= 0) { + if (result == -ERESTARTSYS) { + /* If we havne't sent anything we can just return BUSY, + * however if we have sent something we need to make + * sure we only allow this req to be sent until we are + * completely done. + */ + if (sent) { + nsock->pending = req; + nsock->sent = sent; + } + return BLK_MQ_RQ_QUEUE_BUSY; + } dev_err_ratelimited(disk_to_dev(nbd->disk), "Send control failed (result %d)\n", result); return -EIO; } - +send_pages: if (type != NBD_CMD_WRITE) - return 0; + goto out; bio = req->bio; while (bio) { @@ -318,8 +347,25 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) cmd, bvec.bv_len); iov_iter_bvec(&from, ITER_BVEC | WRITE, &bvec, 1, bvec.bv_len); - result = sock_xmit(nbd, index, 1, &from, flags); + if (skip) { + if (skip >= iov_iter_count(&from)) { + skip -= iov_iter_count(&from); + continue; + } + iov_iter_advance(&from, skip); + skip = 0; + } + result = sock_xmit(nbd, index, 1, &from, flags, &sent); if (result <= 0) { + if (result == -ERESTARTSYS) { + /* We've already sent the header, we + * have no choice but to set pending and + * return BUSY. + */ + nsock->pending = req; + nsock->sent = sent; + return BLK_MQ_RQ_QUEUE_BUSY; + } dev_err(disk_to_dev(nbd->disk), "Send data failed (result %d)\n", result); @@ -336,6 +382,9 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) } bio = next; } +out: + nsock->pending = NULL; + nsock->sent = 0; return 0; } @@ -353,7 +402,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) reply.magic = 0; iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply)); - result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL); + result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); if (result <= 0) { if (!test_bit(NBD_DISCONNECTED, &nbd->runtime_flags) && !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags)) @@ -395,7 +444,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) rq_for_each_segment(bvec, req, iter) { iov_iter_bvec(&to, ITER_BVEC | READ, &bvec, 1, bvec.bv_len); - result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL); + result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL); if (result <= 0) { dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", result); @@ -482,22 +531,23 @@ static void nbd_clear_que(struct nbd_device *nbd) } -static void nbd_handle_cmd(struct nbd_cmd *cmd, int index) +static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) { struct request *req = blk_mq_rq_from_pdu(cmd); struct nbd_device *nbd = cmd->nbd; struct nbd_sock *nsock; + int ret; if (index >= nbd->num_connections) { dev_err_ratelimited(disk_to_dev(nbd->disk), "Attempted send on invalid socket\n"); - goto error_out; + return -EINVAL; } if (test_bit(NBD_DISCONNECTED, &nbd->runtime_flags)) { dev_err_ratelimited(disk_to_dev(nbd->disk), "Attempted send on closed socket\n"); - goto error_out; + return -EINVAL; } req->errors = 0; @@ -508,29 +558,30 @@ static void nbd_handle_cmd(struct nbd_cmd *cmd, int index) mutex_unlock(&nsock->tx_lock); dev_err_ratelimited(disk_to_dev(nbd->disk), "Attempted send on closed socket\n"); - goto error_out; + return -EINVAL; } - if (nbd_send_cmd(nbd, cmd, index) != 0) { - dev_err_ratelimited(disk_to_dev(nbd->disk), - "Request send failed\n"); - req->errors++; - nbd_end_request(cmd); + /* Handle the case that we have a pending request that was partially + * transmitted that _has_ to be serviced first. We need to call requeue + * here so that it gets put _after_ the request that is already on the + * dispatch list. + */ + if (unlikely(nsock->pending && nsock->pending != req)) { + blk_mq_requeue_request(req, true); + ret = 0; + goto out; } - + ret = nbd_send_cmd(nbd, cmd, index); +out: mutex_unlock(&nsock->tx_lock); - - return; - -error_out: - req->errors++; - nbd_end_request(cmd); + return ret; } static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); + int ret; /* * Since we look at the bio's to send the request over the network we @@ -543,10 +594,20 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx, */ init_completion(&cmd->send_complete); blk_mq_start_request(bd->rq); - nbd_handle_cmd(cmd, hctx->queue_num); + + /* We can be called directly from the user space process, which means we + * could possibly have signals pending so our sendmsg will fail. In + * this case we need to return that we are busy, otherwise error out as + * appropriate. + */ + ret = nbd_handle_cmd(cmd, hctx->queue_num); + if (ret < 0) + ret = BLK_MQ_RQ_QUEUE_ERROR; + if (!ret) + ret = BLK_MQ_RQ_QUEUE_OK; complete(&cmd->send_complete); - return BLK_MQ_RQ_QUEUE_OK; + return ret; } static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev, @@ -581,6 +642,8 @@ static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev, mutex_init(&nsock->tx_lock); nsock->sock = sock; + nsock->pending = NULL; + nsock->sent = 0; socks[nbd->num_connections++] = nsock; if (max_part) @@ -634,7 +697,7 @@ static void send_disconnects(struct nbd_device *nbd) for (i = 0; i < nbd->num_connections; i++) { iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); - ret = sock_xmit(nbd, i, 1, &from, 0); + ret = sock_xmit(nbd, i, 1, &from, 0, NULL); if (ret <= 0) dev_err(disk_to_dev(nbd->disk), "Send disconnect failed %d\n", ret); -- cgit v1.2.3 From c103b4dac8f69ca55196afcd57c4cdd6d3ab88eb Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 24 Mar 2017 14:08:27 -0400 Subject: nbd: set rq->errors to actual error code We've been relying on the block layer to assume rq->errors being set translates into -EIO. I noticed in testing that sometimes this isn't true, and really there's not much of a reason to have a counter instead of just using -EIO. So set it properly so we don't leak random numbers to unsuspecting victims. Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 3d1fc37a83b1..dbc22f4bed3d 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -192,7 +192,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n"); set_bit(NBD_TIMEDOUT, &nbd->runtime_flags); - req->errors++; + req->errors = -EIO; mutex_lock(&nbd->config_lock); sock_shutdown(nbd); @@ -432,7 +432,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) if (ntohl(reply.error)) { dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", ntohl(reply.error)); - req->errors++; + req->errors = -EIO; return cmd; } @@ -448,7 +448,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) if (result <= 0) { dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", result); - req->errors++; + req->errors = -EIO; return cmd; } dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n", @@ -518,7 +518,7 @@ static void nbd_clear_req(struct request *req, void *data, bool reserved) if (!blk_mq_request_started(req)) return; cmd = blk_mq_rq_to_pdu(req); - req->errors++; + req->errors = -EIO; nbd_end_request(cmd); } -- cgit v1.2.3 From f8586855031a1d6b243f013c3082631346fddfad Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 24 Mar 2017 14:08:28 -0400 Subject: nbd: set queue timeout properly We can't just set the timeout on the tagset, we have to set it on the queue as it would have been setup already at this point. Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index dbc22f4bed3d..b0003dab90b9 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -844,7 +844,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, nbd_size_set(nbd, bdev, nbd->blksize, arg); return 0; case NBD_SET_TIMEOUT: - nbd->tag_set.timeout = arg * HZ; + if (arg) { + nbd->tag_set.timeout = arg * HZ; + blk_queue_rq_timeout(nbd->disk->queue, arg * HZ); + } return 0; case NBD_SET_FLAGS: -- cgit v1.2.3 From abbbdf12497d36b001e0865bc5bc6cc363f3a5e1 Mon Sep 17 00:00:00 2001 From: Ratna Manoj Bolla Date: Fri, 24 Mar 2017 14:08:29 -0400 Subject: nbd: replace kill_bdev() with __invalidate_device() When a filesystem is mounted on a nbd device and on a disconnect, because of kill_bdev(), and resetting bdev size to zero, buffer_head mappings are getting destroyed under mounted filesystem. After a bdev size reset(i.e bdev->bd_inode->i_size = 0) on a disconnect, followed by a sys_umount(), generic_shutdown_super()->... ->__sync_blockdev()->... -blkdev_writepages()->... ->do_invalidatepage()->... -discard_buffer() is discarding superblock buffer_head assumed to be in mapped state by ext4_commit_super(). [mlin: ported to 4.11-rc2] Signed-off-by: Ratna Manoj Bolla Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index b0003dab90b9..d8a23561b4cb 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -126,7 +126,8 @@ static const char *nbdcmd_to_ascii(int cmd) static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev) { - bd_set_size(bdev, 0); + if (bdev->bd_openers <= 1) + bd_set_size(bdev, 0); set_capacity(nbd->disk, 0); kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); @@ -665,6 +666,8 @@ static void nbd_reset(struct nbd_device *nbd) static void nbd_bdev_reset(struct block_device *bdev) { + if (bdev->bd_openers > 1) + return; set_device_ro(bdev, false); bdev->bd_inode->i_size = 0; if (max_part > 0) { @@ -728,7 +731,8 @@ static int nbd_clear_sock(struct nbd_device *nbd, struct block_device *bdev) { sock_shutdown(nbd); nbd_clear_que(nbd); - kill_bdev(bdev); + + __invalidate_device(bdev, true); nbd_bdev_reset(bdev); /* * We want to give the run thread a chance to wait for everybody -- cgit v1.2.3 From 95f255211396958c718aef8c45e3923b5211ea7b Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 24 Mar 2017 09:38:03 -0700 Subject: net: Do not allow negative values for busy_read and busy_poll sysctl interfaces This change basically codifies what I think was already the limitations on the busy_poll and busy_read sysctl interfaces. We weren't checking the lower bounds and as such could input negative values. The behavior when that was used was dependent on the architecture. In order to prevent any issues with that I am just disabling support for values less than 0 since this way we don't have to worry about any odd behaviors. By limiting the sysctl values this way it also makes it consistent with how we handle the SO_BUSY_POLL socket option since the value appears to be reported as a signed integer value and negative values are rejected. Signed-off-by: Alexander Duyck Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sysctl_net_core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 4ead336e14ea..7f9cc400eca0 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -408,14 +408,16 @@ static struct ctl_table net_core_table[] = { .data = &sysctl_net_busy_poll, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, { .procname = "busy_read", .data = &sysctl_net_busy_read, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, #endif #ifdef CONFIG_NET_SCHED -- cgit v1.2.3 From 6332dee83d8eab80d6d502cc51135b998fe6df79 Mon Sep 17 00:00:00 2001 From: Adit Ranadive Date: Wed, 22 Feb 2017 17:22:56 -0800 Subject: RDMA/vmw_pvrdma: Cleanup unused variables Removed the unused nreq and redundant index variables. Moved hardcoded async and cq ring pages number to macro. Reported-by: Yuval Shaia Signed-off-by: Adit Ranadive Reviewed-by: Aditya Sarwade Tested-by: Andrew Boyer Signed-off-by: Doug Ledford --- drivers/infiniband/hw/vmw_pvrdma/pvrdma.h | 2 ++ drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 4 ++-- drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c | 33 ++++++++++---------------- 3 files changed, 17 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h index 3cd96c1b9502..dbf61c37835c 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -69,6 +69,8 @@ */ #define PCI_DEVICE_ID_VMWARE_PVRDMA 0x0820 +#define PVRDMA_NUM_RING_PAGES 4 + struct pvrdma_dev; struct pvrdma_page_dir { diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 100bea5c42ff..e77476ca70ac 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -858,7 +858,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, dev->dsr->resp_slot_dma = (u64)slot_dma; /* Async event ring */ - dev->dsr->async_ring_pages.num_pages = 4; + dev->dsr->async_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES; ret = pvrdma_page_dir_init(dev, &dev->async_pdir, dev->dsr->async_ring_pages.num_pages, true); if (ret) @@ -867,7 +867,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, dev->dsr->async_ring_pages.pdir_dma = dev->async_pdir.dir_dma; /* CQ notification ring */ - dev->dsr->cq_ring_pages.num_pages = 4; + dev->dsr->cq_ring_pages.num_pages = PVRDMA_NUM_RING_PAGES; ret = pvrdma_page_dir_init(dev, &dev->cq_pdir, dev->dsr->cq_ring_pages.num_pages, true); if (ret) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index dbbfd35e7da7..3ffbb2d42170 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -554,13 +554,13 @@ out: return ret; } -static inline void *get_sq_wqe(struct pvrdma_qp *qp, int n) +static inline void *get_sq_wqe(struct pvrdma_qp *qp, unsigned int n) { return pvrdma_page_dir_get_ptr(&qp->pdir, qp->sq.offset + n * qp->sq.wqe_size); } -static inline void *get_rq_wqe(struct pvrdma_qp *qp, int n) +static inline void *get_rq_wqe(struct pvrdma_qp *qp, unsigned int n) { return pvrdma_page_dir_get_ptr(&qp->pdir, qp->rq.offset + n * qp->rq.wqe_size); @@ -598,9 +598,7 @@ int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, unsigned long flags; struct pvrdma_sq_wqe_hdr *wqe_hdr; struct pvrdma_sge *sge; - int i, index; - int nreq; - int ret; + int i, ret; /* * In states lower than RTS, we can fail immediately. In other states, @@ -613,9 +611,8 @@ int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, spin_lock_irqsave(&qp->sq.lock, flags); - index = pvrdma_idx(&qp->sq.ring->prod_tail, qp->sq.wqe_cnt); - for (nreq = 0; wr; nreq++, wr = wr->next) { - unsigned int tail; + while (wr) { + unsigned int tail = 0; if (unlikely(!pvrdma_idx_ring_has_space( qp->sq.ring, qp->sq.wqe_cnt, &tail))) { @@ -680,7 +677,7 @@ int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } } - wqe_hdr = (struct pvrdma_sq_wqe_hdr *)get_sq_wqe(qp, index); + wqe_hdr = (struct pvrdma_sq_wqe_hdr *)get_sq_wqe(qp, tail); memset(wqe_hdr, 0, sizeof(*wqe_hdr)); wqe_hdr->wr_id = wr->wr_id; wqe_hdr->num_sge = wr->num_sge; @@ -771,12 +768,11 @@ int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, /* Make sure wqe is written before index update */ smp_wmb(); - index++; - if (unlikely(index >= qp->sq.wqe_cnt)) - index = 0; /* Update shared sq ring */ pvrdma_idx_ring_inc(&qp->sq.ring->prod_tail, qp->sq.wqe_cnt); + + wr = wr->next; } ret = 0; @@ -806,7 +802,6 @@ int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct pvrdma_qp *qp = to_vqp(ibqp); struct pvrdma_rq_wqe_hdr *wqe_hdr; struct pvrdma_sge *sge; - int index, nreq; int ret = 0; int i; @@ -821,9 +816,8 @@ int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, spin_lock_irqsave(&qp->rq.lock, flags); - index = pvrdma_idx(&qp->rq.ring->prod_tail, qp->rq.wqe_cnt); - for (nreq = 0; wr; nreq++, wr = wr->next) { - unsigned int tail; + while (wr) { + unsigned int tail = 0; if (unlikely(wr->num_sge > qp->rq.max_sg || wr->num_sge < 0)) { @@ -843,7 +837,7 @@ int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, goto out; } - wqe_hdr = (struct pvrdma_rq_wqe_hdr *)get_rq_wqe(qp, index); + wqe_hdr = (struct pvrdma_rq_wqe_hdr *)get_rq_wqe(qp, tail); wqe_hdr->wr_id = wr->wr_id; wqe_hdr->num_sge = wr->num_sge; wqe_hdr->total_len = 0; @@ -859,12 +853,11 @@ int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, /* Make sure wqe is written before index update */ smp_wmb(); - index++; - if (unlikely(index >= qp->rq.wqe_cnt)) - index = 0; /* Update shared rq ring */ pvrdma_idx_ring_inc(&qp->rq.ring->prod_tail, qp->rq.wqe_cnt); + + wr = wr->next; } spin_unlock_irqrestore(&qp->rq.lock, flags); -- cgit v1.2.3 From e51c2fb0331cb3440d7dc83ee78019ee8c7bb366 Mon Sep 17 00:00:00 2001 From: Adit Ranadive Date: Wed, 22 Feb 2017 17:22:57 -0800 Subject: RDMA/vmw_pvrdma: Dont hardcode QP header page Moved the header page count to a macro. Reported-by: Yuval Shaia Signed-off-by: Adit Ranadive Reviewed-by: Aditya Sarwade Tested-by: Andrew Boyer Signed-off-by: Doug Ledford --- drivers/infiniband/hw/vmw_pvrdma/pvrdma.h | 1 + drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h index dbf61c37835c..9fbe22d3467b 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -70,6 +70,7 @@ #define PCI_DEVICE_ID_VMWARE_PVRDMA 0x0820 #define PVRDMA_NUM_RING_PAGES 4 +#define PVRDMA_QP_NUM_HEADER_PAGES 1 struct pvrdma_dev; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index 3ffbb2d42170..30062aad3af1 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -170,8 +170,9 @@ static int pvrdma_set_sq_size(struct pvrdma_dev *dev, struct ib_qp_cap *req_cap, sizeof(struct pvrdma_sge) * qp->sq.max_sg); /* Note: one extra page for the header. */ - qp->npages_send = 1 + (qp->sq.wqe_cnt * qp->sq.wqe_size + - PAGE_SIZE - 1) / PAGE_SIZE; + qp->npages_send = PVRDMA_QP_NUM_HEADER_PAGES + + (qp->sq.wqe_cnt * qp->sq.wqe_size + PAGE_SIZE - 1) / + PAGE_SIZE; return 0; } @@ -288,7 +289,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, qp->npages = qp->npages_send + qp->npages_recv; /* Skip header page. */ - qp->sq.offset = PAGE_SIZE; + qp->sq.offset = PVRDMA_QP_NUM_HEADER_PAGES * PAGE_SIZE; /* Recv queue pages are after send pages. */ qp->rq.offset = qp->npages_send * PAGE_SIZE; @@ -341,7 +342,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, cmd->qp_type = ib_qp_type_to_pvrdma(init_attr->qp_type); cmd->access_flags = IB_ACCESS_LOCAL_WRITE; cmd->total_chunks = qp->npages; - cmd->send_chunks = qp->npages_send - 1; + cmd->send_chunks = qp->npages_send - PVRDMA_QP_NUM_HEADER_PAGES; cmd->pdir_dma = qp->pdir.dir_dma; dev_dbg(&dev->pdev->dev, "create queuepair with %d, %d, %d, %d\n", -- cgit v1.2.3 From b172679b0d3b93a6197b2ff892794e7178e3c448 Mon Sep 17 00:00:00 2001 From: Aditya Sarwade Date: Wed, 22 Feb 2017 17:22:58 -0800 Subject: RDMA/vmw_pvrdma: Activate device on ethernet link up Restore device state when ethernet link changes to active. Acked-by: George Zhang Acked-by: Jorgen Hansen Acked-by: Bryan Tan Signed-off-by: Aditya Sarwade Signed-off-by: Adit Ranadive Signed-off-by: Doug Ledford --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h | 2 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 13 +++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h index e69d6f3cae32..09078ccfaec7 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h @@ -132,7 +132,7 @@ enum pvrdma_pci_resource { enum pvrdma_device_ctl { PVRDMA_DEVICE_CTL_ACTIVATE, /* Activate device. */ - PVRDMA_DEVICE_CTL_QUIESCE, /* Quiesce device. */ + PVRDMA_DEVICE_CTL_UNQUIESCE, /* Unquiesce device. */ PVRDMA_DEVICE_CTL_RESET, /* Reset device. */ }; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index e77476ca70ac..34ebc7615411 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -56,7 +56,7 @@ #include "pvrdma.h" #define DRV_NAME "vmw_pvrdma" -#define DRV_VERSION "1.0.0.0-k" +#define DRV_VERSION "1.0.1.0-k" static DEFINE_MUTEX(pvrdma_device_list_lock); static LIST_HEAD(pvrdma_device_list); @@ -660,7 +660,16 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR); break; case NETDEV_UP: - pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE); + pvrdma_write_reg(dev, PVRDMA_REG_CTL, + PVRDMA_DEVICE_CTL_UNQUIESCE); + + mb(); + + if (pvrdma_read_reg(dev, PVRDMA_REG_ERR)) + dev_err(&dev->pdev->dev, + "failed to activate device during link up\n"); + else + pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE); break; default: dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n", -- cgit v1.2.3 From ded260235308f340b979258a4c736e06ba12c747 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 8 Mar 2017 08:21:52 +0300 Subject: IB/rxe: double free on error "goto err;" has it's own kfree_skb() call so it's a double free. We only need to free on the "goto exit;" path. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Dan Carpenter Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_req.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index dbfde0dc6ff7..9f95f50b2909 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -729,11 +729,11 @@ next_wqe: ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb); if (ret) { qp->need_req_skb = 1; - kfree_skb(skb); rollback_state(wqe, qp, &rollback_wqe, rollback_psn); if (ret == -EAGAIN) { + kfree_skb(skb); rxe_run_task(&qp->req.task, 1); goto exit; } -- cgit v1.2.3 From 004d18ea99b0f3b7b3d5790e4dc5ca7d5238706d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 23 Feb 2017 13:40:16 +0300 Subject: RDMA/ocrdma: fix a type issue in ocrdma_put_pd_num() We want to return zero on success or negative error codes. The type should be int and not u8. Signed-off-by: Dan Carpenter Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index bc9fb144e57b..c52edeafd616 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -372,7 +372,7 @@ static int _ocrdma_pd_mgr_put_bitmap(struct ocrdma_dev *dev, u16 pd_id, return 0; } -static u8 ocrdma_put_pd_num(struct ocrdma_dev *dev, u16 pd_id, +static int ocrdma_put_pd_num(struct ocrdma_dev *dev, u16 pd_id, bool dpp_pool) { int status; -- cgit v1.2.3 From a1c5dd13228a1f9e5087375f9702422dfc2adbf1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 28 Feb 2017 21:42:53 +0200 Subject: IB/rxe: Update documentation link All Soft-RoCE (rxe) is handled now in rdma-core user space library, so the documentation. The patch below updates the documentation link to that new location. Reported-by: Josh Beavers Signed-off-by: Leon Romanovsky Reviewed-by: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig index 7d1ac27ed251..6332dedc11e8 100644 --- a/drivers/infiniband/sw/rxe/Kconfig +++ b/drivers/infiniband/sw/rxe/Kconfig @@ -22,4 +22,4 @@ config RDMA_RXE To configure and work with soft-RoCE driver please use the following wiki page under "configure Soft-RoCE (RXE)" section: - https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home + https://github.com/linux-rdma/rdma-core/blob/master/Documentation/rxe.md -- cgit v1.2.3 From 0957c29f78af7d890c4ac506eda8f76bfc5a137a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 7 Mar 2017 22:56:53 +0000 Subject: IB/core: Restore I/O MMU, s390 and powerpc support Avoid that the following error message is reported on the console while loading an RDMA driver with I/O MMU support enabled: DMAR: Allocating domain for mlx5_0 failed Ensure that DMA mapping operations that use to_pci_dev() to access to struct pci_dev see the correct PCI device. E.g. the s390 and powerpc DMA mapping operations use to_pci_dev() even with I/O MMU support disabled. This patch preserves the following changes of the DMA mapping updates patch series: - Introduction of dma_virt_ops. - Removal of ib_device.dma_ops. - Removal of struct ib_dma_mapping_ops. - Removal of an if-statement from each ib_dma_*() operation. - IB HW drivers no longer set dma_device directly. Reported-by: Sebastian Ott Reported-by: Parav Pandit Fixes: commit 99db9494035f ("IB/core: Remove ib_device.dma_device") Signed-off-by: Bart Van Assche Reviewed-by: parav@mellanox.com Tested-by: parav@mellanox.com Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/device.c | 26 ++++++++++++++++++++------ include/rdma/ib_verbs.h | 30 +++++++++++++++++------------- 2 files changed, 37 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 593d2ce6ec7c..addf869045cc 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -336,12 +336,26 @@ int ib_register_device(struct ib_device *device, struct device *parent = device->dev.parent; WARN_ON_ONCE(!parent); - if (!device->dev.dma_ops) - device->dev.dma_ops = parent->dma_ops; - if (!device->dev.dma_mask) - device->dev.dma_mask = parent->dma_mask; - if (!device->dev.coherent_dma_mask) - device->dev.coherent_dma_mask = parent->coherent_dma_mask; + WARN_ON_ONCE(device->dma_device); + if (device->dev.dma_ops) { + /* + * The caller provided custom DMA operations. Copy the + * DMA-related fields that are used by e.g. dma_alloc_coherent() + * into device->dev. + */ + device->dma_device = &device->dev; + if (!device->dev.dma_mask) + device->dev.dma_mask = parent->dma_mask; + if (!device->dev.coherent_dma_mask) + device->dev.coherent_dma_mask = + parent->coherent_dma_mask; + } else { + /* + * The caller did not provide custom DMA operations. Use the + * DMA mapping operations of the parent device. + */ + device->dma_device = parent; + } mutex_lock(&device_mutex); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0f1813c13687..99e4423eb2b8 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1863,6 +1863,9 @@ struct ib_port_immutable { }; struct ib_device { + /* Do not access @dma_device directly from ULP nor from HW drivers. */ + struct device *dma_device; + char name[IB_DEVICE_NAME_MAX]; struct list_head event_handler_list; @@ -3007,7 +3010,7 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) */ static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr) { - return dma_mapping_error(&dev->dev, dma_addr); + return dma_mapping_error(dev->dma_device, dma_addr); } /** @@ -3021,7 +3024,7 @@ static inline u64 ib_dma_map_single(struct ib_device *dev, void *cpu_addr, size_t size, enum dma_data_direction direction) { - return dma_map_single(&dev->dev, cpu_addr, size, direction); + return dma_map_single(dev->dma_device, cpu_addr, size, direction); } /** @@ -3035,7 +3038,7 @@ static inline void ib_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { - dma_unmap_single(&dev->dev, addr, size, direction); + dma_unmap_single(dev->dma_device, addr, size, direction); } /** @@ -3052,7 +3055,7 @@ static inline u64 ib_dma_map_page(struct ib_device *dev, size_t size, enum dma_data_direction direction) { - return dma_map_page(&dev->dev, page, offset, size, direction); + return dma_map_page(dev->dma_device, page, offset, size, direction); } /** @@ -3066,7 +3069,7 @@ static inline void ib_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { - dma_unmap_page(&dev->dev, addr, size, direction); + dma_unmap_page(dev->dma_device, addr, size, direction); } /** @@ -3080,7 +3083,7 @@ static inline int ib_dma_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { - return dma_map_sg(&dev->dev, sg, nents, direction); + return dma_map_sg(dev->dma_device, sg, nents, direction); } /** @@ -3094,7 +3097,7 @@ static inline void ib_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { - dma_unmap_sg(&dev->dev, sg, nents, direction); + dma_unmap_sg(dev->dma_device, sg, nents, direction); } static inline int ib_dma_map_sg_attrs(struct ib_device *dev, @@ -3102,7 +3105,8 @@ static inline int ib_dma_map_sg_attrs(struct ib_device *dev, enum dma_data_direction direction, unsigned long dma_attrs) { - return dma_map_sg_attrs(&dev->dev, sg, nents, direction, dma_attrs); + return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, + dma_attrs); } static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, @@ -3110,7 +3114,7 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, enum dma_data_direction direction, unsigned long dma_attrs) { - dma_unmap_sg_attrs(&dev->dev, sg, nents, direction, dma_attrs); + dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs); } /** * ib_sg_dma_address - Return the DMA address from a scatter/gather entry @@ -3152,7 +3156,7 @@ static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev, size_t size, enum dma_data_direction dir) { - dma_sync_single_for_cpu(&dev->dev, addr, size, dir); + dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); } /** @@ -3167,7 +3171,7 @@ static inline void ib_dma_sync_single_for_device(struct ib_device *dev, size_t size, enum dma_data_direction dir) { - dma_sync_single_for_device(&dev->dev, addr, size, dir); + dma_sync_single_for_device(dev->dma_device, addr, size, dir); } /** @@ -3182,7 +3186,7 @@ static inline void *ib_dma_alloc_coherent(struct ib_device *dev, dma_addr_t *dma_handle, gfp_t flag) { - return dma_alloc_coherent(&dev->dev, size, dma_handle, flag); + return dma_alloc_coherent(dev->dma_device, size, dma_handle, flag); } /** @@ -3196,7 +3200,7 @@ static inline void ib_dma_free_coherent(struct ib_device *dev, size_t size, void *cpu_addr, dma_addr_t dma_handle) { - dma_free_coherent(&dev->dev, size, cpu_addr, dma_handle); + dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle); } /** -- cgit v1.2.3 From 812755d69efcf7c12fded57983d456647a0bbadd Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Fri, 24 Feb 2017 03:28:13 +0300 Subject: uapi: fix rdma/mlx5-abi.h userspace compilation errors Consistently use types from linux/types.h to fix the following rdma/mlx5-abi.h userspace compilation errors: /usr/include/rdma/mlx5-abi.h:69:25: error: 'u64' undeclared here (not in a function) MLX5_LIB_CAP_4K_UAR = (u64)1 << 0, /usr/include/rdma/mlx5-abi.h:69:29: error: expected ',' or '}' before numeric constant MLX5_LIB_CAP_4K_UAR = (u64)1 << 0, Include to fix the following rdma/mlx5-abi.h userspace compilation error: /usr/include/rdma/mlx5-abi.h:286:12: error: 'ETH_ALEN' undeclared here (not in a function) __u8 dmac[ETH_ALEN]; Signed-off-by: Dmitry V. Levin Signed-off-by: Doug Ledford --- include/uapi/rdma/mlx5-abi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index da7cd62bace7..0b3d30837a9f 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -34,6 +34,7 @@ #define MLX5_ABI_USER_H #include +#include /* For ETH_ALEN. */ enum { MLX5_QP_FLAG_SIGNATURE = 1 << 0, @@ -66,7 +67,7 @@ struct mlx5_ib_alloc_ucontext_req { }; enum mlx5_lib_caps { - MLX5_LIB_CAP_4K_UAR = (u64)1 << 0, + MLX5_LIB_CAP_4K_UAR = (__u64)1 << 0, }; struct mlx5_ib_alloc_ucontext_req_v2 { -- cgit v1.2.3 From 9fcd67d1772c43d2f23e8fca56acc7219e991676 Mon Sep 17 00:00:00 2001 From: David Marchand Date: Fri, 24 Feb 2017 15:38:26 +0100 Subject: IB/rxe: increment msn only when completing a request According to C9-147, MSN should only be incremented when the last packet of a multi packet request has been received. "Logically, the requester associates a sequential Send Sequence Number (SSN) with each WQE posted to the send queue. The SSN bears a one- to-one relationship to the MSN returned by the responder in each re- sponse packet. Therefore, when the requester receives a response, it in- terprets the MSN as representing the SSN of the most recent request completed by the responder to determine which send WQE(s) can be completed." Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: David Marchand Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_resp.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index d404a8aba7af..c9dd385ce62e 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -813,18 +813,17 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) WARN_ON_ONCE(1); } - /* We successfully processed this new request. */ - qp->resp.msn++; - /* next expected psn, read handles this separately */ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; qp->resp.opcode = pkt->opcode; qp->resp.status = IB_WC_SUCCESS; - if (pkt->mask & RXE_COMP_MASK) + if (pkt->mask & RXE_COMP_MASK) { + /* We successfully processed this new request. */ + qp->resp.msn++; return RESPST_COMPLETE; - else if (qp_type(qp) == IB_QPT_RC) + } else if (qp_type(qp) == IB_QPT_RC) return RESPST_ACKNOWLEDGE; else return RESPST_CLEANUP; -- cgit v1.2.3 From fedd9e1f75828992ace5833379116f38c66ad7bb Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 16 Mar 2017 18:57:00 +0200 Subject: IB/cq: Don't process more than the given budget The caller might not want this overhead. Reviewed-by: Bart Van Assche Reviewed-by: Leon Romanovsky Signed-off-by: Sagi Grimberg Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/core/cq.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index 2746d2eb3d52..f2ae75fa3128 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -29,7 +29,13 @@ static int __ib_process_cq(struct ib_cq *cq, int budget) { int i, n, completed = 0; - while ((n = ib_poll_cq(cq, IB_POLL_BATCH, cq->wc)) > 0) { + /* + * budget might be (-1) if the caller does not + * want to bound this call, thus we need unsigned + * minimum here. + */ + while ((n = ib_poll_cq(cq, min_t(u32, IB_POLL_BATCH, + budget - completed), cq->wc)) > 0) { for (i = 0; i < n; i++) { struct ib_wc *wc = &cq->wc[i]; -- cgit v1.2.3 From b7363e67b23e04c23c2a99437feefac7292a88bc Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 8 Mar 2017 22:03:17 +0200 Subject: IB/device: Convert ib-comp-wq to be CPU-bound This workqueue is used by our storage target mode ULPs via the new CQ API. Recent observations when working with very high-end flash storage devices reveal that UNBOUND workqueue threads can migrate between cpu cores and even numa nodes (although some numa locality is accounted for). While this attribute can be useful in some workloads, it does not fit in very nicely with the normal run-to-completion model we usually use in our target-mode ULPs and the block-mq irq<->cpu affinity facilities. The whole block-mq concept is that the completion will land on the same cpu where the submission was performed. The fact that our submitter thread is migrating cpus can break this locality. We assume that as a target mode ULP, we will serve multiple initiators/clients and we can spread the load enough without having to use unbound kworkers. Also, while we're at it, expose this workqueue via sysfs which is harmless and can be useful for debug. Signed-off-by: Sagi Grimberg Reviewed-by: Bart Van Assche -- Signed-off-by: Doug Ledford --- drivers/infiniband/core/device.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index addf869045cc..7c9e34d679d3 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1029,8 +1029,7 @@ static int __init ib_core_init(void) return -ENOMEM; ib_comp_wq = alloc_workqueue("ib-comp-wq", - WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM, - WQ_UNBOUND_MAX_ACTIVE); + WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0); if (!ib_comp_wq) { ret = -ENOMEM; goto err; -- cgit v1.2.3 From 28ee1b746f493b7c62347d714f58fbf4f70df4f0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 24 Mar 2017 19:42:37 +0100 Subject: secure_seq: downgrade to per-host timestamp offsets Unfortunately too many devices (not under our control) use tcp_tw_recycle=1, which depends on timestamps being identical of the same saddr. Although tcp_tw_recycle got removed in net-next we can't make such end hosts disappear so downgrade to per-host timestamp offsets. Cc: Soheil Hassas Yeganeh Cc: Eric Dumazet Cc: Neal Cardwell Cc: Yuchung Cheng Reported-by: Yvan Vanrossomme Fixes: 95a22caee396c ("tcp: randomize tcp timestamp offsets for each connection") Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/core/secure_seq.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 758f140b6bed..d28da7d363f1 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -20,9 +20,11 @@ #include static siphash_key_t net_secret __read_mostly; +static siphash_key_t ts_secret __read_mostly; static __always_inline void net_secret_init(void) { + net_get_random_once(&ts_secret, sizeof(ts_secret)); net_get_random_once(&net_secret, sizeof(net_secret)); } #endif @@ -45,6 +47,23 @@ static u32 seq_scale(u32 seq) #endif #if IS_ENABLED(CONFIG_IPV6) +static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr) +{ + const struct { + struct in6_addr saddr; + struct in6_addr daddr; + } __aligned(SIPHASH_ALIGNMENT) combined = { + .saddr = *(struct in6_addr *)saddr, + .daddr = *(struct in6_addr *)daddr, + }; + + if (sysctl_tcp_timestamps != 1) + return 0; + + return siphash(&combined, offsetofend(typeof(combined), daddr), + &ts_secret); +} + u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, __be16 sport, __be16 dport, u32 *tsoff) { @@ -63,7 +82,7 @@ u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, net_secret_init(); hash = siphash(&combined, offsetofend(typeof(combined), dport), &net_secret); - *tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0; + *tsoff = secure_tcpv6_ts_off(saddr, daddr); return seq_scale(hash); } EXPORT_SYMBOL(secure_tcpv6_sequence_number); @@ -88,6 +107,14 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #endif #ifdef CONFIG_INET +static u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr) +{ + if (sysctl_tcp_timestamps != 1) + return 0; + + return siphash_2u32((__force u32)saddr, (__force u32)daddr, + &ts_secret); +} /* secure_tcp_sequence_number(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d), * but fortunately, `sport' cannot be 0 in any circumstances. If this changes, @@ -103,7 +130,7 @@ u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, hash = siphash_3u32((__force u32)saddr, (__force u32)daddr, (__force u32)sport << 16 | (__force u32)dport, &net_secret); - *tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0; + *tsoff = secure_tcp_ts_off(saddr, daddr); return seq_scale(hash); } -- cgit v1.2.3 From 13a8cd191a2b470cfd435b3b57dbd21aa65ff78c Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 24 Mar 2017 15:01:42 -0700 Subject: i40e: Do not enable NAPI on q_vectors that have no rings When testing the epoll w/ busy poll code I found that I could get into a state where the i40e driver had q_vectors w/ active NAPI that had no rings. This was resulting in a divide by zero error. To correct it I am updating the driver code so that we only support NAPI on q_vectors that have 1 or more rings allocated to them. Signed-off-by: Alexander Duyck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_main.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index e8a8351c8ea9..82a95cc2c8ee 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4438,8 +4438,12 @@ static void i40e_napi_enable_all(struct i40e_vsi *vsi) if (!vsi->netdev) return; - for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) - napi_enable(&vsi->q_vectors[q_idx]->napi); + for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) { + struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx]; + + if (q_vector->rx.ring || q_vector->tx.ring) + napi_enable(&q_vector->napi); + } } /** @@ -4453,8 +4457,12 @@ static void i40e_napi_disable_all(struct i40e_vsi *vsi) if (!vsi->netdev) return; - for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) - napi_disable(&vsi->q_vectors[q_idx]->napi); + for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) { + struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx]; + + if (q_vector->rx.ring || q_vector->tx.ring) + napi_disable(&q_vector->napi); + } } /** -- cgit v1.2.3 From ea174c9573b0e0c8bc1a7a90fe9360ccb7aa9cbb Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 27 Feb 2017 20:16:33 +0200 Subject: RDMA/iser: Fix possible mr leak on device removal event When the rdma device is removed, we must cleanup all the rdma resources within the DEVICE_REMOVAL event handler to let the device teardown gracefully. When this happens with live I/O, some memory regions are occupied. Thus, track them too and dereg all the mr's. We are safe with mr access by iscsi_iser_cleanup_task. Reported-by: Raju Rangoju Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Reviewed-by: Max Gurtovoy Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/iser/iscsi_iser.h | 2 ++ drivers/infiniband/ulp/iser/iser_verbs.c | 8 +++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 9d0b22ad58c1..c1ae4aeae2f9 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -430,6 +430,7 @@ struct iser_fr_desc { struct list_head list; struct iser_reg_resources rsc; struct iser_pi_context *pi_ctx; + struct list_head all_list; }; /** @@ -443,6 +444,7 @@ struct iser_fr_pool { struct list_head list; spinlock_t lock; int size; + struct list_head all_list; }; /** diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 30b622f2ab73..c538a38c91ce 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -362,6 +362,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn, int i, ret; INIT_LIST_HEAD(&fr_pool->list); + INIT_LIST_HEAD(&fr_pool->all_list); spin_lock_init(&fr_pool->lock); fr_pool->size = 0; for (i = 0; i < cmds_max; i++) { @@ -373,6 +374,7 @@ int iser_alloc_fastreg_pool(struct ib_conn *ib_conn, } list_add_tail(&desc->list, &fr_pool->list); + list_add_tail(&desc->all_list, &fr_pool->all_list); fr_pool->size++; } @@ -392,13 +394,13 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn) struct iser_fr_desc *desc, *tmp; int i = 0; - if (list_empty(&fr_pool->list)) + if (list_empty(&fr_pool->all_list)) return; iser_info("freeing conn %p fr pool\n", ib_conn); - list_for_each_entry_safe(desc, tmp, &fr_pool->list, list) { - list_del(&desc->list); + list_for_each_entry_safe(desc, tmp, &fr_pool->all_list, all_list) { + list_del(&desc->all_list); iser_free_reg_res(&desc->rsc); if (desc->pi_ctx) iser_free_pi_ctx(desc->pi_ctx); -- cgit v1.2.3 From f6aafac184a3e46e919769dd4faa8bf0dc436534 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 Mar 2017 13:18:45 +0100 Subject: IB/qib: fix false-postive maybe-uninitialized warning aarch64-linux-gcc-7 complains about code it doesn't fully understand: drivers/infiniband/hw/qib/qib_iba7322.c: In function 'qib_7322_txchk_change': include/asm-generic/bitops/non-atomic.h:105:35: error: 'shadow' may be used uninitialized in this function [-Werror=maybe-uninitialized] The code is right, and despite trying hard, I could not come up with a version that I liked better than just adding a fake initialization here to shut up the warning. Fixes: f931551bafe1 ("IB/qib: Add new qib driver for QLogic PCIe InfiniBand adapters") Signed-off-by: Arnd Bergmann Acked-by: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_iba7322.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 12c4208fd701..af9f596bb68b 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -7068,7 +7068,7 @@ static void qib_7322_txchk_change(struct qib_devdata *dd, u32 start, unsigned long flags; while (wait) { - unsigned long shadow; + unsigned long shadow = 0; int cstart, previ = -1; /* -- cgit v1.2.3 From 43a6684519ab0a6c52024b5e25322476cabad893 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 24 Mar 2017 19:36:13 -0700 Subject: ping: implement proper locking We got a report of yet another bug in ping http://www.openwall.com/lists/oss-security/2017/03/24/6 ->disconnect() is not called with socket lock held. Fix this by acquiring ping rwlock earlier. Thanks to Daniel, Alexander and Andrey for letting us know this problem. Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind") Signed-off-by: Eric Dumazet Reported-by: Daniel Jiang Reported-by: Solar Designer Reported-by: Andrey Konovalov Signed-off-by: David S. Miller --- net/ipv4/ping.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 2af6244b83e2..ccfbce13a633 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -156,17 +156,18 @@ int ping_hash(struct sock *sk) void ping_unhash(struct sock *sk) { struct inet_sock *isk = inet_sk(sk); + pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); + write_lock_bh(&ping_table.lock); if (sk_hashed(sk)) { - write_lock_bh(&ping_table.lock); hlist_nulls_del(&sk->sk_nulls_node); sk_nulls_node_init(&sk->sk_nulls_node); sock_put(sk); isk->inet_num = 0; isk->inet_sport = 0; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - write_unlock_bh(&ping_table.lock); } + write_unlock_bh(&ping_table.lock); } EXPORT_SYMBOL_GPL(ping_unhash); -- cgit v1.2.3 From b1977682a3858b5584ffea7cfb7bd863f68db18d Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 24 Mar 2017 15:57:33 -0700 Subject: bpf: improve verifier packet range checks llvm can optimize the 'if (ptr > data_end)' checks to be in the order slightly different than the original C code which will confuse verifier. Like: if (ptr + 16 > data_end) return TC_ACT_SHOT; // may be followed by if (ptr + 14 > data_end) return TC_ACT_SHOT; while llvm can see that 'ptr' is valid for all 16 bytes, the verifier could not. Fix verifier logic to account for such case and add a test. Reported-by: Huapeng Zhou Fixes: 969bf05eb3ce ("bpf: direct packet access") Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 5 +++-- tools/testing/selftests/bpf/test_verifier.c | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 796b68d00119..5e6202e62265 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1973,14 +1973,15 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, for (i = 0; i < MAX_BPF_REG; i++) if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id) - regs[i].range = dst_reg->off; + /* keep the maximum range already checked */ + regs[i].range = max(regs[i].range, dst_reg->off); for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { if (state->stack_slot_type[i] != STACK_SPILL) continue; reg = &state->spilled_regs[i / BPF_REG_SIZE]; if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id) - reg->range = dst_reg->off; + reg->range = max(reg->range, dst_reg->off); } } diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index d1555e4240c0..7d761d4cc759 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -3417,6 +3417,26 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_LWT_XMIT, }, + { + "overlapping checks for direct packet access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_LWT_XMIT, + }, { "invalid access of tc_classid for LWT_IN", .insns = { -- cgit v1.2.3 From a096926ed4532eac38d4ec92aaba8c7f2149d89a Mon Sep 17 00:00:00 2001 From: Enric Balletbo i Serra Date: Fri, 24 Mar 2017 18:44:02 +0100 Subject: iio: cros_ec_sensors: Fix return value to get raw and calibbias data. The cros_ec_sensors_read function must return the type of value on all cases. This was always true except for RAW and CALIBBIAS data which returned an error or 0. This patch just fixes the mistake I introduced when submitting the series. Fixes: commit c14dca07a31d (iio: cros_ec_sensors: add ChromeOS EC Contiguous Sensors driver) Signed-off-by: Enric Balletbo i Serra Signed-off-by: Jonathan Cameron --- drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c index d6c372bb433b..c17596f7ed2c 100644 --- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c +++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c @@ -61,7 +61,7 @@ static int cros_ec_sensors_read(struct iio_dev *indio_dev, ret = st->core.read_ec_sensors_data(indio_dev, 1 << idx, &data); if (ret < 0) break; - + ret = IIO_VAL_INT; *val = data; break; case IIO_CHAN_INFO_CALIBBIAS: @@ -76,7 +76,7 @@ static int cros_ec_sensors_read(struct iio_dev *indio_dev, for (i = CROS_EC_SENSOR_X; i < CROS_EC_SENSOR_MAX_AXIS; i++) st->core.calib[i] = st->core.resp->sensor_offset.offset[i]; - + ret = IIO_VAL_INT; *val = st->core.calib[idx]; break; case IIO_CHAN_INFO_SCALE: -- cgit v1.2.3 From 4bdc9029685ac03be50b320b29691766d2326c2b Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Tue, 21 Mar 2017 16:52:14 +0100 Subject: iio: bmg160: reset chip when probing The gyroscope chip might need to be reset to be used. Without the chip being reset, the driver stopped at the first regmap_read (to get the CHIP_ID) and failed to probe. The datasheet of the gyroscope says that a minimum wait of 30ms after the reset has to be done. This patch has been checked on a BMX055 and the datasheet of the BMG160 and the BMI055 give the same reset register and bits. Signed-off-by: Quentin Schulz Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/gyro/bmg160_core.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/iio/gyro/bmg160_core.c b/drivers/iio/gyro/bmg160_core.c index f7fcfa886f72..821919dd245b 100644 --- a/drivers/iio/gyro/bmg160_core.c +++ b/drivers/iio/gyro/bmg160_core.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "bmg160.h" #define BMG160_IRQ_NAME "bmg160_event" @@ -52,6 +53,9 @@ #define BMG160_DEF_BW 100 #define BMG160_REG_PMU_BW_RES BIT(7) +#define BMG160_GYRO_REG_RESET 0x14 +#define BMG160_GYRO_RESET_VAL 0xb6 + #define BMG160_REG_INT_MAP_0 0x17 #define BMG160_INT_MAP_0_BIT_ANY BIT(1) @@ -236,6 +240,14 @@ static int bmg160_chip_init(struct bmg160_data *data) int ret; unsigned int val; + /* + * Reset chip to get it in a known good state. A delay of 30ms after + * reset is required according to the datasheet. + */ + regmap_write(data->regmap, BMG160_GYRO_REG_RESET, + BMG160_GYRO_RESET_VAL); + usleep_range(30000, 30700); + ret = regmap_read(data->regmap, BMG160_REG_CHIP_ID, &val); if (ret < 0) { dev_err(dev, "Error reading reg_chip_id\n"); -- cgit v1.2.3 From dac7a4b4b1f664934e8b713f529b629f67db313c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 25 Mar 2017 17:22:47 -0400 Subject: ext4: lock the xattr block before checksuming it We must lock the xattr block before calculating or verifying the checksum in order to avoid spurious checksum failures. https://bugzilla.kernel.org/show_bug.cgi?id=193661 Reported-by: Colin Ian King Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/xattr.c | 65 +++++++++++++++++++++++++++------------------------------ 1 file changed, 31 insertions(+), 34 deletions(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 67636acf7624..996e7900d4c8 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -131,31 +131,26 @@ static __le32 ext4_xattr_block_csum(struct inode *inode, } static int ext4_xattr_block_csum_verify(struct inode *inode, - sector_t block_nr, - struct ext4_xattr_header *hdr) + struct buffer_head *bh) { - if (ext4_has_metadata_csum(inode->i_sb) && - (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr))) - return 0; - return 1; -} - -static void ext4_xattr_block_csum_set(struct inode *inode, - sector_t block_nr, - struct ext4_xattr_header *hdr) -{ - if (!ext4_has_metadata_csum(inode->i_sb)) - return; + struct ext4_xattr_header *hdr = BHDR(bh); + int ret = 1; - hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr); + if (ext4_has_metadata_csum(inode->i_sb)) { + lock_buffer(bh); + ret = (hdr->h_checksum == ext4_xattr_block_csum(inode, + bh->b_blocknr, hdr)); + unlock_buffer(bh); + } + return ret; } -static inline int ext4_handle_dirty_xattr_block(handle_t *handle, - struct inode *inode, - struct buffer_head *bh) +static void ext4_xattr_block_csum_set(struct inode *inode, + struct buffer_head *bh) { - ext4_xattr_block_csum_set(inode, bh->b_blocknr, BHDR(bh)); - return ext4_handle_dirty_metadata(handle, inode, bh); + if (ext4_has_metadata_csum(inode->i_sb)) + BHDR(bh)->h_checksum = ext4_xattr_block_csum(inode, + bh->b_blocknr, BHDR(bh)); } static inline const struct xattr_handler * @@ -233,7 +228,7 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh) if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || BHDR(bh)->h_blocks != cpu_to_le32(1)) return -EFSCORRUPTED; - if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh))) + if (!ext4_xattr_block_csum_verify(inode, bh)) return -EFSBADCRC; error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size, bh->b_data); @@ -618,23 +613,22 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, } } + ext4_xattr_block_csum_set(inode, bh); /* * Beware of this ugliness: Releasing of xattr block references * from different inodes can race and so we have to protect * from a race where someone else frees the block (and releases * its journal_head) before we are done dirtying the buffer. In * nojournal mode this race is harmless and we actually cannot - * call ext4_handle_dirty_xattr_block() with locked buffer as + * call ext4_handle_dirty_metadata() with locked buffer as * that function can call sync_dirty_buffer() so for that case * we handle the dirtying after unlocking the buffer. */ if (ext4_handle_valid(handle)) - error = ext4_handle_dirty_xattr_block(handle, inode, - bh); + error = ext4_handle_dirty_metadata(handle, inode, bh); unlock_buffer(bh); if (!ext4_handle_valid(handle)) - error = ext4_handle_dirty_xattr_block(handle, inode, - bh); + error = ext4_handle_dirty_metadata(handle, inode, bh); if (IS_SYNC(inode)) ext4_handle_sync(handle); dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1)); @@ -863,13 +857,14 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, ext4_xattr_cache_insert(ext4_mb_cache, bs->bh); } + ext4_xattr_block_csum_set(inode, bs->bh); unlock_buffer(bs->bh); if (error == -EFSCORRUPTED) goto bad_block; if (!error) - error = ext4_handle_dirty_xattr_block(handle, - inode, - bs->bh); + error = ext4_handle_dirty_metadata(handle, + inode, + bs->bh); if (error) goto cleanup; goto inserted; @@ -967,10 +962,11 @@ inserted: ce->e_reusable = 0; ea_bdebug(new_bh, "reusing; refcount now=%d", ref); + ext4_xattr_block_csum_set(inode, new_bh); unlock_buffer(new_bh); - error = ext4_handle_dirty_xattr_block(handle, - inode, - new_bh); + error = ext4_handle_dirty_metadata(handle, + inode, + new_bh); if (error) goto cleanup_dquot; } @@ -1020,11 +1016,12 @@ getblk_failed: goto getblk_failed; } memcpy(new_bh->b_data, s->base, new_bh->b_size); + ext4_xattr_block_csum_set(inode, new_bh); set_buffer_uptodate(new_bh); unlock_buffer(new_bh); ext4_xattr_cache_insert(ext4_mb_cache, new_bh); - error = ext4_handle_dirty_xattr_block(handle, - inode, new_bh); + error = ext4_handle_dirty_metadata(handle, inode, + new_bh); if (error) goto cleanup; } -- cgit v1.2.3 From d67d64f423147cf4fe8212658255e1160a4ef02c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 25 Mar 2017 17:33:31 -0400 Subject: ext4: fix two spelling nits Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 2 +- fs/ext4/move_extent.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index f622d4a577e3..f303d3a7f44a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5400,7 +5400,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, * If there is inline data in the inode, the inode will normally not * have data blocks allocated (it may have an external xattr block). * Report at least one sector for such files, so tools like tar, rsync, - * others doen't incorrectly think the file is completely sparse. + * others don't incorrectly think the file is completely sparse. */ if (unlikely(ext4_has_inline_data(inode))) stat->blocks += (stat->size + 511) >> 9; diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 6fc14def0c70..615bc03d0fbd 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -511,7 +511,7 @@ mext_check_arguments(struct inode *orig_inode, if ((orig_start & ~(PAGE_MASK >> orig_inode->i_blkbits)) != (donor_start & ~(PAGE_MASK >> orig_inode->i_blkbits))) { ext4_debug("ext4 move extent: orig and donor's start " - "offset are not alligned [ino:orig %lu, donor %lu]\n", + "offsets are not aligned [ino:orig %lu, donor %lu]\n", orig_inode->i_ino, donor_inode->i_ino); return -EINVAL; } -- cgit v1.2.3 From a17f1861b5ea4327f9f35e9edb3c5fadceaa7c64 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 24 Mar 2017 23:02:49 +0100 Subject: net: hns: fix uninitialized data use When dev_dbg() is enabled, we print uninitialized data, as gcc-7.0.1 now points out: ethernet/hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_set_promisc_tcam': ethernet/hisilicon/hns/hns_dsaf_main.c:2947:75: error: 'tbl_tcam_data.low.val' may be used uninitialized in this function [-Werror=maybe-uninitialized] ethernet/hisilicon/hns/hns_dsaf_main.c:2947:75: error: 'tbl_tcam_data.high.val' may be used uninitialized in this function [-Werror=maybe-uninitialized] We also pass the data into hns_dsaf_tcam_mc_cfg(), which might later use it (not sure about that), so it seems safer to just always initialize the tbl_tcam_data structure. Fixes: 1f5fa2dd1cfa ("net: hns: fix for promisc mode in HNS driver") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index 90dbda792614..cd93657abe87 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -2924,10 +2924,11 @@ void hns_dsaf_set_promisc_tcam(struct dsaf_device *dsaf_dev, /* find the tcam entry index for promisc */ entry_index = dsaf_promisc_tcam_entry(port); + memset(&tbl_tcam_data, 0, sizeof(tbl_tcam_data)); + memset(&tbl_tcam_mask, 0, sizeof(tbl_tcam_mask)); + /* config key mask */ if (enable) { - memset(&tbl_tcam_data, 0, sizeof(tbl_tcam_data)); - memset(&tbl_tcam_mask, 0, sizeof(tbl_tcam_mask)); dsaf_set_field(tbl_tcam_data.low.bits.port_vlan, DSAF_TBL_TCAM_KEY_PORT_M, DSAF_TBL_TCAM_KEY_PORT_S, port); -- cgit v1.2.3 From 834a61d455ff8069552f44140b2c1de85e6bc84d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 24 Mar 2017 23:02:50 +0100 Subject: net: hns: avoid gcc-7.0.1 warning for uninitialized data hns_dsaf_set_mac_key() calls dsaf_set_field() on an uninitialized field, which will then change only a few of its bits, causing a warning with the latest gcc: hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_set_mac_uc_entry': hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized] (origin) &= (~(mask)); \ ^~ hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_set_mac_mc_entry': hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized] hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_add_mac_mc_port': hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized] hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_del_mac_entry': hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized] hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_rm_mac_addr': hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized] hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_del_mac_mc_port': hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized] hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_get_mac_uc_entry': hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized] hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_get_mac_mc_entry': hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized] The code is actually correct since we always set all 16 bits of the port_vlan field, but gcc correctly points out that the first access does contain uninitialized data. This initializes the field to zero first before setting the individual bits. Fixes: 5483bfcb169c ("net: hns: modify tcam table and set mac key") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index cd93657abe87..403ea9db6dbd 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -1519,6 +1519,7 @@ static void hns_dsaf_set_mac_key( mac_key->high.bits.mac_3 = addr[3]; mac_key->low.bits.mac_4 = addr[4]; mac_key->low.bits.mac_5 = addr[5]; + mac_key->low.bits.port_vlan = 0; dsaf_set_field(mac_key->low.bits.port_vlan, DSAF_TBL_TCAM_KEY_VLAN_M, DSAF_TBL_TCAM_KEY_VLAN_S, vlan_id); dsaf_set_field(mac_key->low.bits.port_vlan, DSAF_TBL_TCAM_KEY_PORT_M, -- cgit v1.2.3 From 6ac3b77a6ffff7513ff86b684aa256ea01c0e5b5 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 25 Mar 2017 01:48:08 +0300 Subject: irda: vlsi_ir: fix check for DMA mapping errors vlsi_alloc_ring() checks for DMA mapping errors by comparing returned address with zero, while pci_dma_mapping_error() should be used. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: David S. Miller --- drivers/net/irda/vlsi_ir.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c index ffedad2a360a..15b920086251 100644 --- a/drivers/net/irda/vlsi_ir.c +++ b/drivers/net/irda/vlsi_ir.c @@ -418,8 +418,9 @@ static struct vlsi_ring *vlsi_alloc_ring(struct pci_dev *pdev, struct ring_descr memset(rd, 0, sizeof(*rd)); rd->hw = hwmap + i; rd->buf = kmalloc(len, GFP_KERNEL|GFP_DMA); - if (rd->buf == NULL || - !(busaddr = pci_map_single(pdev, rd->buf, len, dir))) { + if (rd->buf) + busaddr = pci_map_single(pdev, rd->buf, len, dir); + if (rd->buf == NULL || pci_dma_mapping_error(pdev, busaddr)) { if (rd->buf) { net_err_ratelimited("%s: failed to create PCI-MAP for %p\n", __func__, rd->buf); @@ -430,8 +431,7 @@ static struct vlsi_ring *vlsi_alloc_ring(struct pci_dev *pdev, struct ring_descr rd = r->rd + j; busaddr = rd_get_addr(rd); rd_set_addr_status(rd, 0, 0); - if (busaddr) - pci_unmap_single(pdev, busaddr, len, dir); + pci_unmap_single(pdev, busaddr, len, dir); kfree(rd->buf); rd->buf = NULL; } -- cgit v1.2.3 From 819f60fb7db169d851186d04e571e9bca27321e8 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Sat, 25 Mar 2017 19:29:01 +0800 Subject: EDAC, pnd2_edac: Fix reported DIMM number DIMM number passed to edac_mc_handle_error() was accidentally hardcoded to zero. Pass in the correct daddr->dimm value. Signed-off-by: Qiuxu Zhuo Signed-off-by: Borislav Petkov --- drivers/edac/pnd2_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c index ec2e349d728d..928e0dba41fc 100644 --- a/drivers/edac/pnd2_edac.c +++ b/drivers/edac/pnd2_edac.c @@ -1164,7 +1164,7 @@ static void pnd2_mce_output_error(struct mem_ctl_info *mci, const struct mce *m, /* Call the helper to output message */ edac_mc_handle_error(tp_event, mci, core_err_cnt, m->addr >> PAGE_SHIFT, - m->addr & ~PAGE_MASK, 0, daddr->chan, 0, -1, optype, msg); + m->addr & ~PAGE_MASK, 0, daddr->chan, daddr->dimm, -1, optype, msg); return; -- cgit v1.2.3 From c02ed2e75ef4c74e41e421acb4ef1494671585e8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 26 Mar 2017 14:15:16 -0700 Subject: Linux 4.11-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b2faa9319372..231e6a7749bd 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit v1.2.3 From 2db2c250dd3d1e74a50d4ab5f44c44ca5cb4e42b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 27 Mar 2017 15:11:44 +1100 Subject: selftests/powerpc: Fix standalone powerpc build The changes to enable building with a separate output directory, in commit a8ba798bc8ec ("selftests: enable O and KBUILD_OUTPUT") broke building the powerpc selftests on their own, eg: $ cd tools/testing/selftests/powerpc; make It was partially fixed in commit e53aff45c490 ("selftests: lib.mk Fix individual test builds"), which defined OUTPUT for standalone tests. But that only defines OUTPUT within the Makefile, the value is not exported so sub-shells can't see it. We could export OUTPUT, but it's actually cleaner to just expand the value of OUTPUT before we invoke the shell. Fixes: a8ba798bc8ec ("selftests: enable O and KBUILD_OUTPUT") Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index 1c5d0575802e..bf13fc2297aa 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -34,34 +34,34 @@ endif all: $(SUB_DIRS) $(SUB_DIRS): - BUILD_TARGET=$$OUTPUT/$@; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $@ all + BUILD_TARGET=$(OUTPUT)/$@; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $@ all include ../lib.mk override define RUN_TESTS @for TARGET in $(SUB_DIRS); do \ - BUILD_TARGET=$$OUTPUT/$$TARGET; \ + BUILD_TARGET=$(OUTPUT)/$$TARGET; \ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\ done; endef override define INSTALL_RULE @for TARGET in $(SUB_DIRS); do \ - BUILD_TARGET=$$OUTPUT/$$TARGET; \ + BUILD_TARGET=$(OUTPUT)/$$TARGET; \ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install;\ done; endef override define EMIT_TESTS @for TARGET in $(SUB_DIRS); do \ - BUILD_TARGET=$$OUTPUT/$$TARGET; \ + BUILD_TARGET=$(OUTPUT)/$$TARGET; \ $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests;\ done; endef clean: @for TARGET in $(SUB_DIRS); do \ - BUILD_TARGET=$$OUTPUT/$$TARGET; \ + BUILD_TARGET=$(OUTPUT)/$$TARGET; \ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean; \ done; rm -f tags -- cgit v1.2.3 From 1633682053a7ee8058e10c76722b9b28e97fb73f Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 24 Mar 2017 13:38:28 -0400 Subject: USB: fix linked-list corruption in rh_call_control() Using KASAN, Dmitry found a bug in the rh_call_control() routine: If buffer allocation fails, the routine returns immediately without unlinking its URB from the control endpoint, eventually leading to linked-list corruption. This patch fixes the problem by jumping to the end of the routine (where the URB is unlinked) when an allocation failure occurs. Signed-off-by: Alan Stern Reported-and-tested-by: Dmitry Vyukov CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 612fab6e54fb..79bdca5cb9c7 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -520,8 +520,10 @@ static int rh_call_control (struct usb_hcd *hcd, struct urb *urb) */ tbuf_size = max_t(u16, sizeof(struct usb_hub_descriptor), wLength); tbuf = kzalloc(tbuf_size, GFP_KERNEL); - if (!tbuf) - return -ENOMEM; + if (!tbuf) { + status = -ENOMEM; + goto err_alloc; + } bufp = tbuf; @@ -734,6 +736,7 @@ error: } kfree(tbuf); + err_alloc: /* any errors get returned through the urb completion */ spin_lock_irq(&hcd_root_hub_lock); -- cgit v1.2.3 From 7b09cc5a9debc86c903c2eff8f8a1fdef773c649 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Wed, 22 Mar 2017 16:24:17 -0400 Subject: sched/clock: Fix broken stable to unstable transfer When it is determined that the clock is actually unstable, and we switch from stable to unstable, the __clear_sched_clock_stable() function is eventually called. In this function we set gtod_offset so the following holds true: sched_clock() + raw_offset == ktime_get_ns() + gtod_offset But instead of getting the latest timestamps, we use the last values from scd, so instead of sched_clock() we use scd->tick_raw, and instead of ktime_get_ns() we use scd->tick_gtod. However, later, when we use gtod_offset sched_clock_local() we do not add it to scd->tick_gtod to calculate the correct clock value when we determine the boundaries for min/max clocks. This can result in tick granularity sched_clock() values, so fix it. Signed-off-by: Pavel Tatashin Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: hpa@zytor.com Fixes: 5680d8094ffa ("sched/clock: Provide better clock continuity") Link: http://lkml.kernel.org/r/1490214265-899964-2-git-send-email-pasha.tatashin@oracle.com Signed-off-by: Ingo Molnar --- kernel/sched/clock.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index 24a3e01bf8cb..00a45c45beca 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -221,7 +221,7 @@ static inline u64 wrap_max(u64 x, u64 y) */ static u64 sched_clock_local(struct sched_clock_data *scd) { - u64 now, clock, old_clock, min_clock, max_clock; + u64 now, clock, old_clock, min_clock, max_clock, gtod; s64 delta; again: @@ -238,9 +238,10 @@ again: * scd->tick_gtod + TICK_NSEC); */ - clock = scd->tick_gtod + __gtod_offset + delta; - min_clock = wrap_max(scd->tick_gtod, old_clock); - max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC); + gtod = scd->tick_gtod + __gtod_offset; + clock = gtod + delta; + min_clock = wrap_max(gtod, old_clock); + max_clock = wrap_max(old_clock, gtod + TICK_NSEC); clock = wrap_max(clock, min_clock); clock = wrap_min(clock, max_clock); -- cgit v1.2.3 From 0abfe7e2570d7c729a7662e82c09a23f00f29346 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 22 Mar 2017 20:59:30 +0000 Subject: drm/i915: Restore marking context objects as dirty on pinning Commit e8a9c58fcd9a ("drm/i915: Unify active context tracking between legacy/execlists/guc") converted the legacy intel_ringbuffer submission to the same context pinning mechanism as execlists - that is to pin the context until the subsequent request is retired. Previously it used the vma retirement of the context object to keep itself pinned until the next request (after i915_vma_move_to_active()). In the conversion, I missed that the vma retirement was also responsible for marking the object as dirty. Mark the context object as dirty when pinning (equivalent to execlists) which ensures that if the context is swapped out due to mempressure or suspend/hibernation, when it is loaded back in it does so with the previous state (and not all zero). Fixes: e8a9c58fcd9a ("drm/i915: Unify active context tracking between legacy/execlists/guc") Reported-by: Dennis Gilmore Reported-by: Mathieu Marquer Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99993 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100181 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: # v4.11-rc1 Link: http://patchwork.freedesktop.org/patch/msgid/20170322205930.12762-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin (cherry picked from commit 5d4bac5503fcc67dd7999571e243cee49371aef7) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 91bc4abf5d3e..6c5f9958197d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2024,6 +2024,8 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine, ret = context_pin(ctx, flags); if (ret) goto error; + + ce->state->obj->mm.dirty = true; } /* The kernel context is only used as a placeholder for flushing the -- cgit v1.2.3 From 3b7dabf029478bb80507a6c4500ca94132a2bc0b Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 25 Mar 2017 08:53:12 +0800 Subject: netfilter: invoke synchronize_rcu after set the _hook_ to NULL Otherwise, another CPU may access the invalid pointer. For example: CPU0 CPU1 - rcu_read_lock(); - pfunc = _hook_; _hook_ = NULL; - mod unload - - pfunc(); // invalid, panic - rcu_read_unlock(); So we must call synchronize_rcu() to wait the rcu reader to finish. Also note, in nf_nat_snmp_basic_fini, synchronize_rcu() will be invoked by later nf_conntrack_helper_unregister, but I'm inclined to add a explicit synchronize_rcu after set the nf_nat_snmp_hook to NULL. Depend on such obscure assumptions is not a good idea. Last, in nfnetlink_cttimeout, we use kfree_rcu to free the time object, so in cttimeout_exit, invoking rcu_barrier() is not necessary at all, remove it too. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_nat_snmp_basic.c | 1 + net/netfilter/nf_conntrack_ecache.c | 2 ++ net/netfilter/nf_conntrack_netlink.c | 1 + net/netfilter/nf_nat_core.c | 2 ++ net/netfilter/nfnetlink_cttimeout.c | 2 +- 5 files changed, 7 insertions(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index c9b52c361da2..5a8f7c360887 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -1304,6 +1304,7 @@ static int __init nf_nat_snmp_basic_init(void) static void __exit nf_nat_snmp_basic_fini(void) { RCU_INIT_POINTER(nf_nat_snmp_hook, NULL); + synchronize_rcu(); nf_conntrack_helper_unregister(&snmp_trap_helper); } diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index da9df2d56e66..22fc32143e9c 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -290,6 +290,7 @@ void nf_conntrack_unregister_notifier(struct net *net, BUG_ON(notify != new); RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); + /* synchronize_rcu() is called from ctnetlink_exit. */ } EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); @@ -326,6 +327,7 @@ void nf_ct_expect_unregister_notifier(struct net *net, BUG_ON(notify != new); RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); + /* synchronize_rcu() is called from ctnetlink_exit. */ } EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 6806b5e73567..908d858034e4 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -3442,6 +3442,7 @@ static void __exit ctnetlink_exit(void) #ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT RCU_INIT_POINTER(nfnl_ct_hook, NULL); #endif + synchronize_rcu(); } module_init(ctnetlink_init); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 94b14c5a8b17..82802e4a6640 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -903,6 +903,8 @@ static void __exit nf_nat_cleanup(void) #ifdef CONFIG_XFRM RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL); #endif + synchronize_rcu(); + for (i = 0; i < NFPROTO_NUMPROTO; i++) kfree(nf_nat_l4protos[i]); diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 139e0867e56e..47d6656c9119 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -646,8 +646,8 @@ static void __exit cttimeout_exit(void) #ifdef CONFIG_NF_CONNTRACK_TIMEOUT RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL); RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL); + synchronize_rcu(); #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ - rcu_barrier(); } module_init(cttimeout_init); -- cgit v1.2.3 From 83d90219a5df8d950855ce73229a97b63605c317 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 25 Mar 2017 12:09:15 +0800 Subject: netfilter: nfnl_cthelper: fix a race when walk the nf_ct_helper_hash table The nf_ct_helper_hash table is protected by nf_ct_helper_mutex, while nfct_helper operation is protected by nfnl_lock(NFNL_SUBSYS_CTHELPER). So it's possible that one CPU is walking the nf_ct_helper_hash for cthelper add/get/del, another cpu is doing nf_conntrack_helpers_unregister at the same time. This is dangrous, and may cause use after free error. Note, delete operation will flush all cthelpers added via nfnetlink, so using rcu to do protect is not easy. Now introduce a dummy list to record all the cthelpers added via nfnetlink, then we can walk the dummy list instead of walking the nf_ct_helper_hash. Also, keep nfnl_cthelper_dump_table unchanged, it may be invoked without nfnl_lock(NFNL_SUBSYS_CTHELPER) held. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_cthelper.c | 177 +++++++++++++++++-------------------- 1 file changed, 81 insertions(+), 96 deletions(-) diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 2b987d2a77bc..d45558178da5 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -32,6 +32,13 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso "); MODULE_DESCRIPTION("nfnl_cthelper: User-space connection tracking helpers"); +struct nfnl_cthelper { + struct list_head list; + struct nf_conntrack_helper helper; +}; + +static LIST_HEAD(nfnl_cthelper_list); + static int nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -205,14 +212,16 @@ nfnl_cthelper_create(const struct nlattr * const tb[], struct nf_conntrack_tuple *tuple) { struct nf_conntrack_helper *helper; + struct nfnl_cthelper *nfcth; int ret; if (!tb[NFCTH_TUPLE] || !tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN]) return -EINVAL; - helper = kzalloc(sizeof(struct nf_conntrack_helper), GFP_KERNEL); - if (helper == NULL) + nfcth = kzalloc(sizeof(*nfcth), GFP_KERNEL); + if (nfcth == NULL) return -ENOMEM; + helper = &nfcth->helper; ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]); if (ret < 0) @@ -249,11 +258,12 @@ nfnl_cthelper_create(const struct nlattr * const tb[], if (ret < 0) goto err2; + list_add_tail(&nfcth->list, &nfnl_cthelper_list); return 0; err2: kfree(helper->expect_policy); err1: - kfree(helper); + kfree(nfcth); return ret; } @@ -379,7 +389,8 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, const char *helper_name; struct nf_conntrack_helper *cur, *helper = NULL; struct nf_conntrack_tuple tuple; - int ret = 0, i; + struct nfnl_cthelper *nlcth; + int ret = 0; if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE]) return -EINVAL; @@ -390,31 +401,22 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, if (ret < 0) return ret; - rcu_read_lock(); - for (i = 0; i < nf_ct_helper_hsize && !helper; i++) { - hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) { + list_for_each_entry(nlcth, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; - /* skip non-userspace conntrack helpers. */ - if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) - continue; + if (strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) + continue; - if (strncmp(cur->name, helper_name, - NF_CT_HELPER_NAME_LEN) != 0) - continue; + if ((tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; - if ((tuple.src.l3num != cur->tuple.src.l3num || - tuple.dst.protonum != cur->tuple.dst.protonum)) - continue; + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; - if (nlh->nlmsg_flags & NLM_F_EXCL) { - ret = -EEXIST; - goto err; - } - helper = cur; - break; - } + helper = cur; + break; } - rcu_read_unlock(); if (helper == NULL) ret = nfnl_cthelper_create(tb, &tuple); @@ -422,9 +424,6 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl, ret = nfnl_cthelper_update(tb, helper); return ret; -err: - rcu_read_unlock(); - return ret; } static int @@ -588,11 +587,12 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const tb[]) { - int ret = -ENOENT, i; + int ret = -ENOENT; struct nf_conntrack_helper *cur; struct sk_buff *skb2; char *helper_name = NULL; struct nf_conntrack_tuple tuple; + struct nfnl_cthelper *nlcth; bool tuple_set = false; if (nlh->nlmsg_flags & NLM_F_DUMP) { @@ -613,45 +613,39 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl, tuple_set = true; } - for (i = 0; i < nf_ct_helper_hsize; i++) { - hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) { + list_for_each_entry(nlcth, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; + if (helper_name && + strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) + continue; - /* skip non-userspace conntrack helpers. */ - if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) - continue; + if (tuple_set && + (tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; - if (helper_name && strncmp(cur->name, helper_name, - NF_CT_HELPER_NAME_LEN) != 0) { - continue; - } - if (tuple_set && - (tuple.src.l3num != cur->tuple.src.l3num || - tuple.dst.protonum != cur->tuple.dst.protonum)) - continue; - - skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (skb2 == NULL) { - ret = -ENOMEM; - break; - } + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) { + ret = -ENOMEM; + break; + } - ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid, - nlh->nlmsg_seq, - NFNL_MSG_TYPE(nlh->nlmsg_type), - NFNL_MSG_CTHELPER_NEW, cur); - if (ret <= 0) { - kfree_skb(skb2); - break; - } + ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, + NFNL_MSG_TYPE(nlh->nlmsg_type), + NFNL_MSG_CTHELPER_NEW, cur); + if (ret <= 0) { + kfree_skb(skb2); + break; + } - ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, - MSG_DONTWAIT); - if (ret > 0) - ret = 0; + ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, + MSG_DONTWAIT); + if (ret > 0) + ret = 0; - /* this avoids a loop in nfnetlink. */ - return ret == -EAGAIN ? -ENOBUFS : ret; - } + /* this avoids a loop in nfnetlink. */ + return ret == -EAGAIN ? -ENOBUFS : ret; } return ret; } @@ -662,10 +656,10 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, { char *helper_name = NULL; struct nf_conntrack_helper *cur; - struct hlist_node *tmp; struct nf_conntrack_tuple tuple; bool tuple_set = false, found = false; - int i, j = 0, ret; + struct nfnl_cthelper *nlcth, *n; + int j = 0, ret; if (tb[NFCTH_NAME]) helper_name = nla_data(tb[NFCTH_NAME]); @@ -678,30 +672,27 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl, tuple_set = true; } - for (i = 0; i < nf_ct_helper_hsize; i++) { - hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i], - hnode) { - /* skip non-userspace conntrack helpers. */ - if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) - continue; + list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; + j++; - j++; + if (helper_name && + strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN)) + continue; - if (helper_name && strncmp(cur->name, helper_name, - NF_CT_HELPER_NAME_LEN) != 0) { - continue; - } - if (tuple_set && - (tuple.src.l3num != cur->tuple.src.l3num || - tuple.dst.protonum != cur->tuple.dst.protonum)) - continue; + if (tuple_set && + (tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; - found = true; - nf_conntrack_helper_unregister(cur); - kfree(cur->expect_policy); - kfree(cur); - } + found = true; + nf_conntrack_helper_unregister(cur); + kfree(cur->expect_policy); + + list_del(&nlcth->list); + kfree(nlcth); } + /* Make sure we return success if we flush and there is no helpers */ return (found || j == 0) ? 0 : -ENOENT; } @@ -750,22 +741,16 @@ err_out: static void __exit nfnl_cthelper_exit(void) { struct nf_conntrack_helper *cur; - struct hlist_node *tmp; - int i; + struct nfnl_cthelper *nlcth, *n; nfnetlink_subsys_unregister(&nfnl_cthelper_subsys); - for (i=0; iflags & NF_CT_HELPER_F_USERSPACE)) - continue; + list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) { + cur = &nlcth->helper; - nf_conntrack_helper_unregister(cur); - kfree(cur->expect_policy); - kfree(cur); - } + nf_conntrack_helper_unregister(cur); + kfree(cur->expect_policy); + kfree(nlcth); } } -- cgit v1.2.3 From 9c3f3794926a997b1cab6c42480ff300efa2d162 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 25 Mar 2017 16:35:29 +0800 Subject: netfilter: nf_ct_ext: fix possible panic after nf_ct_extend_unregister If one cpu is doing nf_ct_extend_unregister while another cpu is doing __nf_ct_ext_add_length, then we may hit BUG_ON(t == NULL). Moreover, there's no synchronize_rcu invocation after set nf_ct_ext_types[id] to NULL, so it's possible that we may access invalid pointer. But actually, most of the ct extends are built-in, so the problem listed above will not happen. However, there are two exceptions: NF_CT_EXT_NAT and NF_CT_EXT_SYNPROXY. For _EXT_NAT, the panic will not happen, since adding the nat extend and unregistering the nat extend are located in the same file(nf_nat_core.c), this means that after the nat module is removed, we cannot add the nat extend too. For _EXT_SYNPROXY, synproxy extend may be added by init_conntrack, while synproxy extend unregister will be done by synproxy_core_exit. So after nf_synproxy_core.ko is removed, we may still try to add the synproxy extend, then kernel panic may happen. I know it's very hard to reproduce this issue, but I can play a tricky game to make it happen very easily :) Step 1. Enable SYNPROXY for tcp dport 1234 at FORWARD hook: # iptables -I FORWARD -p tcp --dport 1234 -j SYNPROXY Step 2. Queue the syn packet to the userspace at raw table OUTPUT hook. Also note, in the userspace we only add a 20s' delay, then reinject the syn packet to the kernel: # iptables -t raw -I OUTPUT -p tcp --syn -j NFQUEUE --queue-num 1 Step 3. Using "nc 2.2.2.2 1234" to connect the server. Step 4. Now remove the nf_synproxy_core.ko quickly: # iptables -F FORWARD # rmmod ipt_SYNPROXY # rmmod nf_synproxy_core Step 5. After 20s' delay, the syn packet is reinjected to the kernel. Now you will see the panic like this: kernel BUG at net/netfilter/nf_conntrack_extend.c:91! Call Trace: ? __nf_ct_ext_add_length+0x53/0x3c0 [nf_conntrack] init_conntrack+0x12b/0x600 [nf_conntrack] nf_conntrack_in+0x4cc/0x580 [nf_conntrack] ipv4_conntrack_local+0x48/0x50 [nf_conntrack_ipv4] nf_reinject+0x104/0x270 nfqnl_recv_verdict+0x3e1/0x5f9 [nfnetlink_queue] ? nfqnl_recv_verdict+0x5/0x5f9 [nfnetlink_queue] ? nla_parse+0xa0/0x100 nfnetlink_rcv_msg+0x175/0x6a9 [nfnetlink] [...] One possible solution is to make NF_CT_EXT_SYNPROXY extend built-in, i.e. introduce nf_conntrack_synproxy.c and only do ct extend register and unregister in it, similar to nf_conntrack_timeout.c. But having such a obscure restriction of nf_ct_extend_unregister is not a good idea, so we should invoke synchronize_rcu after set nf_ct_ext_types to NULL, and check the NULL pointer when do __nf_ct_ext_add_length. Then it will be easier if we add new ct extend in the future. Last, we use kfree_rcu to free nf_ct_ext, so rcu_barrier() is unnecessary anymore, remove it too. Signed-off-by: Liping Zhang Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_extend.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 02bcf00c2492..008299b7f78f 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -53,7 +53,11 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[id]); - BUG_ON(t == NULL); + if (!t) { + rcu_read_unlock(); + return NULL; + } + off = ALIGN(sizeof(struct nf_ct_ext), t->align); len = off + t->len + var_alloc_len; alloc_size = t->alloc_size + var_alloc_len; @@ -88,7 +92,10 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[id]); - BUG_ON(t == NULL); + if (!t) { + rcu_read_unlock(); + return NULL; + } newoff = ALIGN(old->len, t->align); newlen = newoff + t->len + var_alloc_len; @@ -175,6 +182,6 @@ void nf_ct_extend_unregister(struct nf_ct_ext_type *type) RCU_INIT_POINTER(nf_ct_ext_types[type->id], NULL); update_alloc_size(type); mutex_unlock(&nf_ct_ext_type_mutex); - rcu_barrier(); /* Wait for completion of call_rcu()'s */ + synchronize_rcu(); } EXPORT_SYMBOL_GPL(nf_ct_extend_unregister); -- cgit v1.2.3 From 75c689dca98851d65ef5a27e5ce26b625b68751c Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Sat, 25 Mar 2017 18:24:36 +0800 Subject: netfilter: nf_nat_snmp: Fix panic when snmp_trap_helper fails to register In the commit 93557f53e1fb ("netfilter: nf_conntrack: nf_conntrack snmp helper"), the snmp_helper is replaced by nf_nat_snmp_hook. So the snmp_helper is never registered. But it still tries to unregister the snmp_helper, it could cause the panic. Now remove the useless snmp_helper and the unregister call in the error handler. Fixes: 93557f53e1fb ("netfilter: nf_conntrack: nf_conntrack snmp helper") Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_nat_snmp_basic.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 5a8f7c360887..53e49f5011d3 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -1260,16 +1260,6 @@ static const struct nf_conntrack_expect_policy snmp_exp_policy = { .timeout = 180, }; -static struct nf_conntrack_helper snmp_helper __read_mostly = { - .me = THIS_MODULE, - .help = help, - .expect_policy = &snmp_exp_policy, - .name = "snmp", - .tuple.src.l3num = AF_INET, - .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT), - .tuple.dst.protonum = IPPROTO_UDP, -}; - static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { .me = THIS_MODULE, .help = help, @@ -1288,17 +1278,10 @@ static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { static int __init nf_nat_snmp_basic_init(void) { - int ret = 0; - BUG_ON(nf_nat_snmp_hook != NULL); RCU_INIT_POINTER(nf_nat_snmp_hook, help); - ret = nf_conntrack_helper_register(&snmp_trap_helper); - if (ret < 0) { - nf_conntrack_helper_unregister(&snmp_helper); - return ret; - } - return ret; + return nf_conntrack_helper_register(&snmp_trap_helper); } static void __exit nf_nat_snmp_basic_fini(void) -- cgit v1.2.3 From d4ea7e3c5c0e341c15b073016dbf3ab6c65f12f3 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Wed, 15 Mar 2017 21:50:09 -0400 Subject: NFS: Fix old dentry rehash after move Now that nfs_rename()'s d_move has moved within the RPC task's rpc_call_done callback, rehashing new_dentry will actually rehash the old dentry's name in nfs_rename(). d_move() is going to rehash the new dentry for us anyway, so doing it again here is unnecessary. Reported-by: Chuck Lever Fixes: 920b4530fb80 ("NFS: nfs_rename() handle -ERESTARTSYS dentry left behind") Signed-off-by: Benjamin Coddington Tested-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index fb499a3f21b5..f92ba8d6c556 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2055,7 +2055,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, { struct inode *old_inode = d_inode(old_dentry); struct inode *new_inode = d_inode(new_dentry); - struct dentry *dentry = NULL, *rehash = NULL; + struct dentry *dentry = NULL; struct rpc_task *task; int error = -EBUSY; @@ -2078,10 +2078,8 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, * To prevent any new references to the target during the * rename, we unhash the dentry in advance. */ - if (!d_unhashed(new_dentry)) { + if (!d_unhashed(new_dentry)) d_drop(new_dentry); - rehash = new_dentry; - } if (d_count(new_dentry) > 2) { int err; @@ -2098,7 +2096,6 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; new_dentry = dentry; - rehash = NULL; new_inode = NULL; } } @@ -2119,8 +2116,6 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, error = task->tk_status; rpc_put_task(task); out: - if (rehash) - d_rehash(rehash); trace_nfs_rename_exit(old_dir, old_dentry, new_dir, new_dentry, error); /* new dentry created? */ -- cgit v1.2.3 From 2f0ba790df51721794c11abc7a076d407392f648 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 27 Mar 2017 19:33:09 +0200 Subject: cpufreq: Fix creation of symbolic links to policy directories The cpufreq core only tries to create symbolic links from CPU directories in sysfs to policy directories in cpufreq_add_dev(), either when a given CPU is registered or when the cpufreq driver is registered, whichever happens first. That is not sufficient, however, because cpufreq_add_dev() may be called for an offline CPU whose policy object has not been created yet and, quite obviously, the symbolic cannot be added in that case. Fix that by making cpufreq_online() attempt to add symbolic links to policy objects for the CPUs in the related_cpus mask of every new policy object created by it. The cpufreq_driver_lock locking around the for_each_cpu() loop in cpufreq_online() is dropped, because it is not necessary and the code is somewhat simpler without it. Moreover, failures to create a symbolic link will not be regarded as hard errors any more and the CPUs without those links will not be taken offline automatically, but that should not be problematic in practice. Reported-and-tested-by: Prashanth Prakash Signed-off-by: Rafael J. Wysocki Cc: 4.9+ # 4.9+ --- drivers/cpufreq/cpufreq.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 5dbdd261aa73..bc96d423781a 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -918,11 +918,19 @@ static struct kobj_type ktype_cpufreq = { .release = cpufreq_sysfs_release, }; -static int add_cpu_dev_symlink(struct cpufreq_policy *policy, - struct device *dev) +static void add_cpu_dev_symlink(struct cpufreq_policy *policy, unsigned int cpu) { + struct device *dev = get_cpu_device(cpu); + + if (!dev) + return; + + if (cpumask_test_and_set_cpu(cpu, policy->real_cpus)) + return; + dev_dbg(dev, "%s: Adding symlink\n", __func__); - return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq"); + if (sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq")) + dev_err(dev, "cpufreq symlink creation failed\n"); } static void remove_cpu_dev_symlink(struct cpufreq_policy *policy, @@ -1180,10 +1188,10 @@ static int cpufreq_online(unsigned int cpu) policy->user_policy.min = policy->min; policy->user_policy.max = policy->max; - write_lock_irqsave(&cpufreq_driver_lock, flags); - for_each_cpu(j, policy->related_cpus) + for_each_cpu(j, policy->related_cpus) { per_cpu(cpufreq_cpu_data, j) = policy; - write_unlock_irqrestore(&cpufreq_driver_lock, flags); + add_cpu_dev_symlink(policy, j); + } } else { policy->min = policy->user_policy.min; policy->max = policy->user_policy.max; @@ -1275,13 +1283,15 @@ out_exit_policy: if (cpufreq_driver->exit) cpufreq_driver->exit(policy); + + for_each_cpu(j, policy->real_cpus) + remove_cpu_dev_symlink(policy, get_cpu_device(j)); + out_free_policy: cpufreq_policy_free(policy); return ret; } -static int cpufreq_offline(unsigned int cpu); - /** * cpufreq_add_dev - the cpufreq interface for a CPU device. * @dev: CPU device. @@ -1303,16 +1313,10 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) /* Create sysfs link on CPU registration */ policy = per_cpu(cpufreq_cpu_data, cpu); - if (!policy || cpumask_test_and_set_cpu(cpu, policy->real_cpus)) - return 0; + if (policy) + add_cpu_dev_symlink(policy, cpu); - ret = add_cpu_dev_symlink(policy, dev); - if (ret) { - cpumask_clear_cpu(cpu, policy->real_cpus); - cpufreq_offline(cpu); - } - - return ret; + return 0; } static int cpufreq_offline(unsigned int cpu) -- cgit v1.2.3 From a431ecd2d459da3c91a612061f09eb422ffe78e2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 27 Mar 2017 13:52:00 -0400 Subject: Revert "pata_atiixp: Don't use unconnected secondary port on SB600/SB700" This reverts commit 5946fdaee4ba449e8fbb5d403e1ed69437f916e8. The original commit's assumption that the secondary port is unconnected turns out to be false. Signed-off-by: Tejun Heo Reported-by: Markku Pesonen Fixes: 5946fdaee4ba ("pata_atiixp: Don't use unconnected secondary port on SB600/SB700") Cc: Darren Stevens --- drivers/ata/pata_atiixp.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/ata/pata_atiixp.c b/drivers/ata/pata_atiixp.c index 6c9aa95a9a05..49d705c9f0f7 100644 --- a/drivers/ata/pata_atiixp.c +++ b/drivers/ata/pata_atiixp.c @@ -278,11 +278,6 @@ static int atiixp_init_one(struct pci_dev *pdev, const struct pci_device_id *id) }; const struct ata_port_info *ppi[] = { &info, &info }; - /* SB600/700 don't have secondary port wired */ - if ((pdev->device == PCI_DEVICE_ID_ATI_IXP600_IDE) || - (pdev->device == PCI_DEVICE_ID_ATI_IXP700_IDE)) - ppi[1] = &ata_dummy_port_info; - return ata_pci_bmdma_init_one(pdev, ppi, &atiixp_sht, NULL, ATA_HOST_PARALLEL_SCAN); } -- cgit v1.2.3 From ab6434a1377a768a1e6d3e6cf819eb21724a99c2 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 27 Mar 2017 14:30:06 -0400 Subject: audit: move audit_signal_info() into kernel/auditsc.c Commit 5b52330bbfe6 ("audit: fix auditd/kernel connection state tracking") made inlining audit_signal_info() a bit pointless as it was always calling into auditd_test_task() so let's remove the inline function in kernel/audit.h and convert __audit_signal_info() in kernel/auditsc.c into audit_signal_info(). Reviewed-by: Richard Guy Briggs Signed-off-by: Paul Moore --- kernel/audit.h | 8 +------- kernel/auditsc.c | 25 +++++++++++++------------ 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/kernel/audit.h b/kernel/audit.h index 0f1cf6d1878a..0d87f8ab8778 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -333,13 +333,7 @@ extern u32 audit_sig_sid; extern int audit_filter(int msgtype, unsigned int listtype); #ifdef CONFIG_AUDITSYSCALL -extern int __audit_signal_info(int sig, struct task_struct *t); -static inline int audit_signal_info(int sig, struct task_struct *t) -{ - if (auditd_test_task(t) || (audit_signals && !audit_dummy_context())) - return __audit_signal_info(sig, t); - return 0; -} +extern int audit_signal_info(int sig, struct task_struct *t); extern void audit_filter_inodes(struct task_struct *, struct audit_context *); extern struct list_head *audit_killed_trees(void); #else diff --git a/kernel/auditsc.c b/kernel/auditsc.c index e59ffc7fc522..1c2333155893 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2249,26 +2249,27 @@ void __audit_ptrace(struct task_struct *t) * If the audit subsystem is being terminated, record the task (pid) * and uid that is doing that. */ -int __audit_signal_info(int sig, struct task_struct *t) +int audit_signal_info(int sig, struct task_struct *t) { struct audit_aux_data_pids *axp; struct task_struct *tsk = current; struct audit_context *ctx = tsk->audit_context; kuid_t uid = current_uid(), t_uid = task_uid(t); - if (auditd_test_task(t)) { - if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) { - audit_sig_pid = task_tgid_nr(tsk); - if (uid_valid(tsk->loginuid)) - audit_sig_uid = tsk->loginuid; - else - audit_sig_uid = uid; - security_task_getsecid(tsk, &audit_sig_sid); - } - if (!audit_signals || audit_dummy_context()) - return 0; + if (auditd_test_task(t) && + (sig == SIGTERM || sig == SIGHUP || + sig == SIGUSR1 || sig == SIGUSR2)) { + audit_sig_pid = task_tgid_nr(tsk); + if (uid_valid(tsk->loginuid)) + audit_sig_uid = tsk->loginuid; + else + audit_sig_uid = uid; + security_task_getsecid(tsk, &audit_sig_sid); } + if (!audit_signals || audit_dummy_context()) + return 0; + /* optimize the common case by putting first signal recipient directly * in audit_context */ if (!ctx->target_pid) { -- cgit v1.2.3 From ce4b4f228e51219b0b79588caf73225b08b5b779 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Fri, 24 Mar 2017 19:01:09 +0900 Subject: drm/radeon: Override fpfn for all VRAM placements in radeon_evict_flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We were accidentally only overriding the first VRAM placement. For BOs with the RADEON_GEM_NO_CPU_ACCESS flag set, radeon_ttm_placement_from_domain creates a second VRAM placment with fpfn == 0. If VRAM is almost full, the first VRAM placement with fpfn > 0 may not work, but the second one with fpfn == 0 always will (the BO's current location trivially satisfies it). Because "moving" the BO to its current location puts it back on the LRU list, this results in an infinite loop. Fixes: 2a85aedd117c ("drm/radeon: Try evicting from CPU accessible to inaccessible VRAM first") Reported-by: Zachary Michaels Reported-and-Tested-by: Julien Isorce Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_ttm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 684f1703aa5c..aaa3e80fecb4 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -213,8 +213,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo, rbo->placement.num_busy_placement = 0; for (i = 0; i < rbo->placement.num_placement; i++) { if (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) { - if (rbo->placements[0].fpfn < fpfn) - rbo->placements[0].fpfn = fpfn; + if (rbo->placements[i].fpfn < fpfn) + rbo->placements[i].fpfn = fpfn; } else { rbo->placement.busy_placement = &rbo->placements[i]; -- cgit v1.2.3 From 551afbb85b3898e78068405d78708245999c19c0 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Mon, 20 Mar 2017 18:07:00 -0400 Subject: NFS cleanup struct nfs4_filelayout_segment Signed-off-by: Andy Adamson Signed-off-by: Anna Schumaker --- fs/nfs/filelayout/filelayout.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/nfs/filelayout/filelayout.h b/fs/nfs/filelayout/filelayout.h index 2896cb833a11..4c4d436a6796 100644 --- a/fs/nfs/filelayout/filelayout.h +++ b/fs/nfs/filelayout/filelayout.h @@ -55,15 +55,15 @@ struct nfs4_file_layout_dsaddr { }; struct nfs4_filelayout_segment { - struct pnfs_layout_segment generic_hdr; - u32 stripe_type; - u32 commit_through_mds; - u32 stripe_unit; - u32 first_stripe_index; - u64 pattern_offset; - struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ - unsigned int num_fh; - struct nfs_fh **fh_array; + struct pnfs_layout_segment generic_hdr; + u32 stripe_type; + u32 commit_through_mds; + u32 stripe_unit; + u32 first_stripe_index; + u64 pattern_offset; + struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ + unsigned int num_fh; + struct nfs_fh **fh_array; }; struct nfs4_filelayout { -- cgit v1.2.3 From 248ccd5ee644fcf68984d945cffcdc364992ddbf Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 27 Mar 2017 10:48:11 -0700 Subject: MAINTAINERS: Add Andrew Lunn as co-maintainer of PHYLIB Andrew has been contributing a lot to PHYLIB over the past months and his feedback on patches is more than welcome. Signed-off-by: Florian Fainelli Acked-by: Andrew Lunn Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index e48678d15401..9975dcefc372 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4922,6 +4922,7 @@ F: include/linux/netfilter_bridge/ F: net/bridge/ ETHERNET PHY LIBRARY +M: Andrew Lunn M: Florian Fainelli L: netdev@vger.kernel.org S: Maintained -- cgit v1.2.3 From a3902ee98304324f681088203391b5c0353ce977 Mon Sep 17 00:00:00 2001 From: Jaehoon Chung Date: Tue, 21 Mar 2017 21:19:57 +0900 Subject: scsi: ufs: remove the duplicated checking for supporting clkscaling There are same conditions for checking whether supporting clkscaling or not. When ufshcd is supporting clkscaling, active_reqs should be decreased by one. [mkp: addressed comment from Bartlomiej] Signed-off-by: Jaehoon Chung Reviewed-by: Subhash Jadavani Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index e8c26e6e6237..096e95b911bd 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -4662,8 +4662,6 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba, } if (ufshcd_is_clkscaling_supported(hba)) hba->clk_scaling.active_reqs--; - if (ufshcd_is_clkscaling_supported(hba)) - hba->clk_scaling.active_reqs--; } /* clear corresponding bits of completed commands */ -- cgit v1.2.3 From ffefb6f4d6ad699a2b5484241bc46745a53235d0 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 27 Mar 2017 18:00:14 +0100 Subject: net: ipconfig: fix ic_close_devs() use-after-free Our chosen ic_dev may be anywhere in our list of ic_devs, and we may free it before attempting to close others. When we compare d->dev and ic_dev->dev, we're potentially dereferencing memory returned to the allocator. This causes KASAN to scream for each subsequent ic_dev we check. As there's a 1-1 mapping between ic_devs and netdevs, we can instead compare d and ic_dev directly, which implicitly handles the !ic_dev case, and avoids the use-after-free. The ic_dev pointer may be stale, but we will not dereference it. Original splat: [ 6.487446] ================================================================== [ 6.494693] BUG: KASAN: use-after-free in ic_close_devs+0xc4/0x154 at addr ffff800367efa708 [ 6.503013] Read of size 8 by task swapper/0/1 [ 6.507452] CPU: 5 PID: 1 Comm: swapper/0 Not tainted 4.11.0-rc3-00002-gda42158 #8 [ 6.514993] Hardware name: AppliedMicro Mustang/Mustang, BIOS 3.05.05-beta_rc Jan 27 2016 [ 6.523138] Call trace: [ 6.525590] [] dump_backtrace+0x0/0x570 [ 6.530976] [] show_stack+0x20/0x30 [ 6.536017] [] dump_stack+0x120/0x188 [ 6.541231] [] kasan_object_err+0x24/0xa0 [ 6.546790] [] kasan_report_error+0x244/0x738 [ 6.552695] [] __asan_report_load8_noabort+0x54/0x80 [ 6.559204] [] ic_close_devs+0xc4/0x154 [ 6.564590] [] ip_auto_config+0x2ed4/0x2f1c [ 6.570321] [] do_one_initcall+0xcc/0x370 [ 6.575882] [] kernel_init_freeable+0x5f8/0x6c4 [ 6.581959] [] kernel_init+0x18/0x190 [ 6.587171] [] ret_from_fork+0x10/0x40 [ 6.592468] Object at ffff800367efa700, in cache kmalloc-128 size: 128 [ 6.598969] Allocated: [ 6.601324] PID = 1 [ 6.603427] save_stack_trace_tsk+0x0/0x418 [ 6.607603] save_stack_trace+0x20/0x30 [ 6.611430] kasan_kmalloc+0xd8/0x188 [ 6.615087] ip_auto_config+0x8c4/0x2f1c [ 6.619002] do_one_initcall+0xcc/0x370 [ 6.622832] kernel_init_freeable+0x5f8/0x6c4 [ 6.627178] kernel_init+0x18/0x190 [ 6.630660] ret_from_fork+0x10/0x40 [ 6.634223] Freed: [ 6.636233] PID = 1 [ 6.638334] save_stack_trace_tsk+0x0/0x418 [ 6.642510] save_stack_trace+0x20/0x30 [ 6.646337] kasan_slab_free+0x88/0x178 [ 6.650167] kfree+0xb8/0x478 [ 6.653131] ic_close_devs+0x130/0x154 [ 6.656875] ip_auto_config+0x2ed4/0x2f1c [ 6.660875] do_one_initcall+0xcc/0x370 [ 6.664705] kernel_init_freeable+0x5f8/0x6c4 [ 6.669051] kernel_init+0x18/0x190 [ 6.672534] ret_from_fork+0x10/0x40 [ 6.676098] Memory state around the buggy address: [ 6.680880] ffff800367efa600: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 6.688078] ffff800367efa680: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 6.695276] >ffff800367efa700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 6.702469] ^ [ 6.705952] ffff800367efa780: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 6.713149] ffff800367efa800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 6.720343] ================================================================== [ 6.727536] Disabling lock debugging due to kernel taint Signed-off-by: Mark Rutland Cc: Alexey Kuznetsov Cc: David S. Miller Cc: Hideaki YOSHIFUJI Cc: James Morris Cc: Patrick McHardy Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index fd9f34bbd740..dfb2ab2dd3c8 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -306,7 +306,7 @@ static void __init ic_close_devs(void) while ((d = next)) { next = d->next; dev = d->dev; - if ((!ic_dev || dev != ic_dev->dev) && !netdev_uses_dsa(dev)) { + if (d != ic_dev && !netdev_uses_dsa(dev)) { pr_debug("IP-Config: Downing %s\n", dev->name); dev_change_flags(dev, d->flags); } -- cgit v1.2.3 From 59f1183dd368f12c0a80da3c91a4a42afa4e1d38 Mon Sep 17 00:00:00 2001 From: Nitin Gupta Date: Fri, 3 Mar 2017 14:40:44 -0800 Subject: sparc64: Fix size check in huge_pte_alloc Signed-off-by: Nitin Gupta Signed-off-by: David S. Miller --- arch/sparc/mm/hugetlbpage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index 323bc6b6e3ad..30168500603e 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -261,7 +261,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, if (!pmd) return NULL; - if (sz == PMD_SHIFT) + if (sz >= PMD_SIZE) pte = (pte_t *)pmd; else pte = pte_alloc_map(mm, pmd, addr); -- cgit v1.2.3 From 85b1da7c47052330af9485a5f5c7e54ede882e65 Mon Sep 17 00:00:00 2001 From: Nitin Gupta Date: Thu, 9 Mar 2017 14:22:23 -0800 Subject: sparc64: Add support for 2G hugepages Signed-off-by: Nitin Gupta Signed-off-by: David S. Miller --- arch/sparc/include/asm/page_64.h | 3 ++- arch/sparc/mm/hugetlbpage.c | 7 +++++++ arch/sparc/mm/init_64.c | 4 ++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index f294dd42fc7d..5961b2d8398a 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -17,6 +17,7 @@ #define HPAGE_SHIFT 23 #define REAL_HPAGE_SHIFT 22 +#define HPAGE_2GB_SHIFT 31 #define HPAGE_256MB_SHIFT 28 #define HPAGE_64K_SHIFT 16 #define REAL_HPAGE_SIZE (_AC(1,UL) << REAL_HPAGE_SHIFT) @@ -27,7 +28,7 @@ #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA #define REAL_HPAGE_PER_HPAGE (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT)) -#define HUGE_MAX_HSTATE 3 +#define HUGE_MAX_HSTATE 4 #endif #ifndef __ASSEMBLY__ diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index 30168500603e..ee5273ad918d 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -143,6 +143,10 @@ static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift) pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V; switch (shift) { + case HPAGE_2GB_SHIFT: + hugepage_size = _PAGE_SZ2GB_4V; + pte_val(entry) |= _PAGE_PMD_HUGE; + break; case HPAGE_256MB_SHIFT: hugepage_size = _PAGE_SZ256MB_4V; pte_val(entry) |= _PAGE_PMD_HUGE; @@ -183,6 +187,9 @@ static unsigned int sun4v_huge_tte_to_shift(pte_t entry) unsigned int shift; switch (tte_szbits) { + case _PAGE_SZ2GB_4V: + shift = HPAGE_2GB_SHIFT; + break; case _PAGE_SZ256MB_4V: shift = HPAGE_256MB_SHIFT; break; diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index ccd455328989..3328043e990c 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -337,6 +337,10 @@ static int __init setup_hugepagesz(char *string) hugepage_shift = ilog2(hugepage_size); switch (hugepage_shift) { + case HPAGE_2GB_SHIFT: + hv_pgsz_mask = HV_PGSZ_MASK_2GB; + hv_pgsz_idx = HV_PGSZ_IDX_2GB; + break; case HPAGE_256MB_SHIFT: hv_pgsz_mask = HV_PGSZ_MASK_256MB; hv_pgsz_idx = HV_PGSZ_IDX_256MB; -- cgit v1.2.3 From adfae8a5d833fa2b46577a8081f350e408851f5b Mon Sep 17 00:00:00 2001 From: bob picco Date: Fri, 10 Mar 2017 14:31:19 -0500 Subject: sparc64: kern_addr_valid regression I encountered this bug when using /proc/kcore to examine the kernel. Plus a coworker inquired about debugging tools. We computed pa but did not use it during the maximum physical address bits test. Instead we used the identity mapped virtual address which will always fail this test. I believe the defect came in here: [bpicco@zareason linus.git]$ git describe --contains bb4e6e85daa52 v3.18-rc1~87^2~4 . Signed-off-by: Bob Picco Signed-off-by: David S. Miller --- arch/sparc/mm/init_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 3328043e990c..0cda653ae007 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -1567,7 +1567,7 @@ bool kern_addr_valid(unsigned long addr) if ((long)addr < 0L) { unsigned long pa = __pa(addr); - if ((addr >> max_phys_bits) != 0UL) + if ((pa >> max_phys_bits) != 0UL) return false; return pfn_valid(pa >> PAGE_SHIFT); -- cgit v1.2.3 From 0ae2d26ffe70c32d4a7fe77593f0a55ce416c09e Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 17 Mar 2017 14:52:21 -0600 Subject: arch/sparc: Avoid DCTI Couples MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid un-intended DCTI Couples. Use of DCTI couples is deprecated. Also address the "Programming Note" for optimal performance. Here is the complete text from Oracle SPARC Architecture Specs. 6.3.4.7 DCTI Couples "A delayed control transfer instruction (DCTI) in the delay slot of another DCTI is referred to as a “DCTI couple”. The use of DCTI couples is deprecated in the Oracle SPARC Architecture; no new software should place a DCTI in the delay slot of another DCTI, because on future Oracle SPARC Architecture implementations DCTI couples may execute either slowly or differently than the programmer assumes it will. SPARC V8 and SPARC V9 Compatibility Note The SPARC V8 architecture left behavior undefined for a DCTI couple. The SPARC V9 architecture defined behavior in that case, but as of UltraSPARC Architecture 2005, use of DCTI couples was deprecated. Software should not expect high performance from DCTI couples, and performance of DCTI couples should be expected to decline further in future processors. Programming Note As noted in TABLE 6-5 on page 115, an annulled branch-always (branch-always with a = 1) instruction is not architecturally a DCTI. However, since not all implementations make that distinction, for optimal performance, a DCTI should not be placed in the instruction word immediately following an annulled branch-always instruction (BA,A or BPA,A)." Signed-off-by: Babu Moger Reviewed-by: Rob Gardner Signed-off-by: David S. Miller --- arch/sparc/kernel/head_64.S | 4 ++++ arch/sparc/kernel/misctrap.S | 1 + arch/sparc/kernel/rtrap_64.S | 1 + arch/sparc/kernel/spiterrs.S | 1 + arch/sparc/kernel/sun4v_tlb_miss.S | 1 + arch/sparc/kernel/urtt_fill.S | 1 + arch/sparc/kernel/winfixup.S | 2 ++ arch/sparc/lib/NG2memcpy.S | 4 ++++ arch/sparc/lib/NG4memcpy.S | 1 + arch/sparc/lib/NG4memset.S | 1 + arch/sparc/lib/NGmemcpy.S | 1 + 11 files changed, 18 insertions(+) diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 6aa3da152c20..44101196d02b 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -96,6 +96,7 @@ sparc64_boot: andn %g1, PSTATE_AM, %g1 wrpr %g1, 0x0, %pstate ba,a,pt %xcc, 1f + nop .globl prom_finddev_name, prom_chosen_path, prom_root_node .globl prom_getprop_name, prom_mmu_name, prom_peer_name @@ -613,6 +614,7 @@ niagara_tlb_fixup: nop ba,a,pt %xcc, 80f + nop niagara4_patch: call niagara4_patch_copyops nop @@ -622,6 +624,7 @@ niagara4_patch: nop ba,a,pt %xcc, 80f + nop niagara2_patch: call niagara2_patch_copyops @@ -632,6 +635,7 @@ niagara2_patch: nop ba,a,pt %xcc, 80f + nop niagara_patch: call niagara_patch_copyops diff --git a/arch/sparc/kernel/misctrap.S b/arch/sparc/kernel/misctrap.S index 34b4933900bf..9276d2f0dd86 100644 --- a/arch/sparc/kernel/misctrap.S +++ b/arch/sparc/kernel/misctrap.S @@ -82,6 +82,7 @@ do_stdfmna: call handle_stdfmna add %sp, PTREGS_OFF, %o0 ba,a,pt %xcc, rtrap + nop .size do_stdfmna,.-do_stdfmna .type breakpoint_trap,#function diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S index 216948ca4382..709a82ebd294 100644 --- a/arch/sparc/kernel/rtrap_64.S +++ b/arch/sparc/kernel/rtrap_64.S @@ -237,6 +237,7 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 bne,pt %xcc, user_rtt_fill_32bit wrpr %g1, %cwp ba,a,pt %xcc, user_rtt_fill_64bit + nop user_rtt_fill_fixup_dax: ba,pt %xcc, user_rtt_fill_fixup_common diff --git a/arch/sparc/kernel/spiterrs.S b/arch/sparc/kernel/spiterrs.S index 4a73009f66a5..d7e540842809 100644 --- a/arch/sparc/kernel/spiterrs.S +++ b/arch/sparc/kernel/spiterrs.S @@ -86,6 +86,7 @@ __spitfire_cee_trap_continue: rd %pc, %g7 ba,a,pt %xcc, 2f + nop 1: ba,pt %xcc, etrap_irq rd %pc, %g7 diff --git a/arch/sparc/kernel/sun4v_tlb_miss.S b/arch/sparc/kernel/sun4v_tlb_miss.S index 6179e19bc9b9..c19f352f46c7 100644 --- a/arch/sparc/kernel/sun4v_tlb_miss.S +++ b/arch/sparc/kernel/sun4v_tlb_miss.S @@ -352,6 +352,7 @@ sun4v_mna: call sun4v_do_mna add %sp, PTREGS_OFF, %o0 ba,a,pt %xcc, rtrap + nop /* Privileged Action. */ sun4v_privact: diff --git a/arch/sparc/kernel/urtt_fill.S b/arch/sparc/kernel/urtt_fill.S index 5604a2b051d4..364af3250646 100644 --- a/arch/sparc/kernel/urtt_fill.S +++ b/arch/sparc/kernel/urtt_fill.S @@ -92,6 +92,7 @@ user_rtt_fill_fixup_common: call sun4v_data_access_exception nop ba,a,pt %xcc, rtrap + nop 1: call spitfire_data_access_exception nop diff --git a/arch/sparc/kernel/winfixup.S b/arch/sparc/kernel/winfixup.S index 855019a8590e..1ee173cc3c39 100644 --- a/arch/sparc/kernel/winfixup.S +++ b/arch/sparc/kernel/winfixup.S @@ -152,6 +152,8 @@ fill_fixup_dax: call sun4v_data_access_exception nop ba,a,pt %xcc, rtrap + nop 1: call spitfire_data_access_exception nop ba,a,pt %xcc, rtrap + nop diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S index c629dbd121b6..64dcd6cdb606 100644 --- a/arch/sparc/lib/NG2memcpy.S +++ b/arch/sparc/lib/NG2memcpy.S @@ -326,11 +326,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ blu 170f nop ba,a,pt %xcc, 180f + nop 4: /* 32 <= low bits < 48 */ blu 150f nop ba,a,pt %xcc, 160f + nop 5: /* 0 < low bits < 32 */ blu,a 6f cmp %g2, 8 @@ -338,6 +340,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ blu 130f nop ba,a,pt %xcc, 140f + nop 6: /* 0 < low bits < 16 */ bgeu 120f nop @@ -475,6 +478,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ brz,pt %o2, 85f sub %o0, %o1, GLOBAL_SPARE ba,a,pt %XCC, 90f + nop .align 64 75: /* 16 < len <= 64 */ diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S index 75bb93b1437f..78ea962edcbe 100644 --- a/arch/sparc/lib/NG4memcpy.S +++ b/arch/sparc/lib/NG4memcpy.S @@ -530,4 +530,5 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ bne,pt %icc, 1b EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1) ba,a,pt %icc, .Lexit + nop .size FUNC_NAME, .-FUNC_NAME diff --git a/arch/sparc/lib/NG4memset.S b/arch/sparc/lib/NG4memset.S index 41da4bdd95cb..7c0c81f18837 100644 --- a/arch/sparc/lib/NG4memset.S +++ b/arch/sparc/lib/NG4memset.S @@ -102,4 +102,5 @@ NG4bzero: bne,pt %icc, 1b add %o0, 0x30, %o0 ba,a,pt %icc, .Lpostloop + nop .size NG4bzero,.-NG4bzero diff --git a/arch/sparc/lib/NGmemcpy.S b/arch/sparc/lib/NGmemcpy.S index d88c4ed50a00..cd654a719b27 100644 --- a/arch/sparc/lib/NGmemcpy.S +++ b/arch/sparc/lib/NGmemcpy.S @@ -394,6 +394,7 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ brz,pt %i2, 85f sub %o0, %i1, %i3 ba,a,pt %XCC, 90f + nop .align 64 70: /* 16 < len <= 64 */ -- cgit v1.2.3 From cc66afea58f858ff6da7f79b8a595a67bbb4f9a9 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 27 Mar 2017 11:32:59 +0200 Subject: x86/mce: Don't print MCEs when mcelog is active Since: cd9c57cad3fe ("x86/MCE: Dump MCE to dmesg if no consumers") all MCEs are printed even when mcelog is running. Fix the regression to not print to dmesg when mcelog is running as it is a consumer too. Signed-off-by: Andi Kleen [ Massage commit message. ] Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Cc: stable@vger.kernel.org # 4.10.. Fixes: cd9c57cad3fe ("x86/MCE: Dump MCE to dmesg if no consumers") Link: http://lkml.kernel.org/r/20170327093304.10683-2-bp@alien8.de Signed-off-by: Ingo Molnar Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8e9725c607ea..5accfbdee3f0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -54,6 +54,8 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex); +static int mce_chrdev_open_count; /* #times opened */ + #define mce_log_get_idx_check(p) \ ({ \ RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ @@ -598,6 +600,10 @@ static int mce_default_notifier(struct notifier_block *nb, unsigned long val, if (atomic_read(&num_notifiers) > 2) return NOTIFY_DONE; + /* Don't print when mcelog is running */ + if (mce_chrdev_open_count > 0) + return NOTIFY_DONE; + __print_mce(m); return NOTIFY_DONE; @@ -1828,7 +1834,6 @@ void mcheck_cpu_clear(struct cpuinfo_x86 *c) */ static DEFINE_SPINLOCK(mce_chrdev_state_lock); -static int mce_chrdev_open_count; /* #times opened */ static int mce_chrdev_open_exclu; /* already open exclusive? */ static int mce_chrdev_open(struct inode *inode, struct file *file) -- cgit v1.2.3 From 07de36b378a58f1d1426829acf0ab7cf86f651f3 Mon Sep 17 00:00:00 2001 From: Alexander Kochetkov Date: Wed, 22 Mar 2017 17:32:49 +0300 Subject: clockevents: Fix syntax error in clkevt-of macro The patch fix syntax errors introduced by commit 0c8893c9095d ("clockevents: Add a clkevt-of mechanism like clksrc-of"). Fixes: 0c8893c9095d ("clockevents: Add a clkevt-of mechanism like clksrc-of") Signed-off-by: Alexander Kochetkov Signed-off-by: Daniel Lezcano --- drivers/clocksource/clkevt-probe.c | 2 +- include/linux/clockchips.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/clkevt-probe.c b/drivers/clocksource/clkevt-probe.c index 8c30fec86094..eb89b502acbd 100644 --- a/drivers/clocksource/clkevt-probe.c +++ b/drivers/clocksource/clkevt-probe.c @@ -17,7 +17,7 @@ #include #include -#include +#include extern struct of_device_id __clkevt_of_table[]; diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 5d3053c34fb3..6d7edc3082f9 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -229,7 +229,7 @@ static inline void tick_setup_hrtimer_broadcast(void) { } #ifdef CONFIG_CLKEVT_PROBE extern int clockevent_probe(void); -#els +#else static inline int clockevent_probe(void) { return 0; } #endif -- cgit v1.2.3 From 8d09617b076fd03ee9ae124abce94dda17bf3723 Mon Sep 17 00:00:00 2001 From: Alexander Kochetkov Date: Wed, 22 Mar 2017 17:29:06 +0300 Subject: vmlinux.lds: Add __clkevt_of_table to kernel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code introduced by commit 0c8893c9095d ("clockevents: Add a clkevt-of mechanism like clksrc-of") refer to __clkevt_of_table what doesn't exist in the vmlinux. As a result kernel build failed with error: "clkevt-probe.c:63: undefined reference to `__clkevt_of_table’" Fixes: 0c8893c9095d ("clockevents: Add a clkevt-of mechanism like clksrc-of") Signed-off-by: Alexander Kochetkov Signed-off-by: Daniel Lezcano --- include/asm-generic/vmlinux.lds.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 0968d13b3885..8c6b525eb0fa 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -173,6 +173,7 @@ KEEP(*(__##name##_of_table_end)) #define CLKSRC_OF_TABLES() OF_TABLE(CONFIG_CLKSRC_OF, clksrc) +#define CLKEVT_OF_TABLES() OF_TABLE(CONFIG_CLKEVT_OF, clkevt) #define IRQCHIP_OF_MATCH_TABLE() OF_TABLE(CONFIG_IRQCHIP, irqchip) #define CLK_OF_TABLES() OF_TABLE(CONFIG_COMMON_CLK, clk) #define IOMMU_OF_TABLES() OF_TABLE(CONFIG_OF_IOMMU, iommu) @@ -559,6 +560,7 @@ CLK_OF_TABLES() \ RESERVEDMEM_OF_TABLES() \ CLKSRC_OF_TABLES() \ + CLKEVT_OF_TABLES() \ IOMMU_OF_TABLES() \ CPU_METHOD_OF_TABLES() \ CPUIDLE_METHOD_OF_TABLES() \ -- cgit v1.2.3 From 658b299580da2830a69eea44a8b6da1c2579a32e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 28 Mar 2017 09:53:00 +0200 Subject: sched/headers: Remove duplicate #include line Vito Caputo reported that the sched.h split-up series introduced a duplicate #include line in drivers/tty/vt/keyboard.c. Remove it. Reported-by: Vito Caputo Cc: Greg Kroah-Hartman Cc: Jiri Slaby Cc: Alan Cox Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- drivers/tty/vt/keyboard.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index c5f0fc906136..8af8d9542663 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 0292e169b2d9c8377a168778f0b16eadb1f578fd Mon Sep 17 00:00:00 2001 From: "Herongguang (Stephen)" Date: Mon, 27 Mar 2017 15:21:17 +0800 Subject: KVM: pci-assign: do not map smm memory slot pages in vt-d page tables or VM memory are not put thus leaked in kvm_iommu_unmap_memslots() when destroy VM. This is consistent with current vfio implementation. Signed-off-by: herongguang Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ef1aa7f1ed7a..88257b311cb5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1065,7 +1065,7 @@ int __kvm_set_memory_region(struct kvm *kvm, * changes) is disallowed above, so any other attribute changes getting * here can be skipped. */ - if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) { + if (as_id == 0 && (change == KVM_MR_CREATE || change == KVM_MR_MOVE)) { r = kvm_iommu_map_pages(kvm, &new); return r; } -- cgit v1.2.3 From 7ad658b693536741c37b16aeb07840a2ce75f5b9 Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Thu, 23 Mar 2017 07:18:08 +0100 Subject: KVM: nVMX: fix nested EPT detection The nested_ept_enabled flag introduced in commit 7ca29de2136 was not computed correctly. We are interested only in L1's EPT state, not the the combined L0+L1 value. In particular, if L0 uses EPT but L1 does not, nested_ept_enabled must be false to make sure that PDPSTRs are loaded based on CR3 as usual, because the special case described in 26.3.2.4 Loading Page-Directory- Pointer-Table Entries does not apply. Fixes: 7ca29de21362 ("KVM: nVMX: fix CR3 load if L2 uses PAE paging and EPT") Cc: qemu-stable@nongnu.org Reported-by: Wanpeng Li Reviewed-by: David Hildenbrand Signed-off-by: Ladi Prosek Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index cd1ba62878f3..2ee00dbbbd51 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9992,7 +9992,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, { struct vcpu_vmx *vmx = to_vmx(vcpu); u32 exec_control; - bool nested_ept_enabled = false; vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); @@ -10139,8 +10138,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs12->guest_intr_status); } - nested_ept_enabled = (exec_control & SECONDARY_EXEC_ENABLE_EPT) != 0; - /* * Write an illegal value to APIC_ACCESS_ADDR. Later, * nested_get_vmcs12_pages will either fix it up or @@ -10303,7 +10300,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmx_set_efer(vcpu, vcpu->arch.efer); /* Shadow page tables on either EPT or shadow page tables. */ - if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_ept_enabled, + if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12), entry_failure_code)) return 1; -- cgit v1.2.3 From 7ed23e1bae8bf7e37fd555066550a00b95a3a98b Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 20 Mar 2017 17:49:03 +1100 Subject: powerpc: Disable HFSCR[TM] if TM is not supported On Power8 & Power9 the early CPU inititialisation in __init_HFSCR() turns on HFSCR[TM] (Hypervisor Facility Status and Control Register [Transactional Memory]), but that doesn't take into account that TM might be disabled by CPU features, or disabled by the kernel being built with CONFIG_PPC_TRANSACTIONAL_MEM=n. So later in boot, when we have setup the CPU features, clear HSCR[TM] if the TM CPU feature has been disabled. We use CPU_FTR_TM_COMP to account for the CONFIG_PPC_TRANSACTIONAL_MEM=n case. Without this a KVM guest might try use TM, even if told not to, and cause an oops in the host kernel. Typically the oops is seen in __kvmppc_vcore_entry() and may or may not be fatal to the host, but is always bad news. In practice all shipping CPU revisions do support TM, and all host kernels we are aware of build with TM support enabled, so no one should actually be able to hit this in the wild. Fixes: 2a3563b023e5 ("powerpc: Setup in HFSCR for POWER8") Cc: stable@vger.kernel.org # v3.10+ Signed-off-by: Benjamin Herrenschmidt Tested-by: Sam Bobroff [mpe: Rewrite change log with input from Sam, add Fixes/stable] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 9cfaa8b69b5f..f997154dfc41 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -236,6 +236,15 @@ static void cpu_ready_for_interrupts(void) mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); } + /* + * Fixup HFSCR:TM based on CPU features. The bit is set by our + * early asm init because at that point we haven't updated our + * CPU features from firmware and device-tree. Here we have, + * so let's do it. + */ + if (cpu_has_feature(CPU_FTR_HVMODE) && !cpu_has_feature(CPU_FTR_TM_COMP)) + mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) & ~HFSCR_TM); + /* Set IR and DR in PACA MSR */ get_paca()->kernel_msr = MSR_KERNEL; } -- cgit v1.2.3 From 2beb6dad2e8f95d710159d5befb390e4f62ab5cf Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 27 Mar 2017 17:53:50 +0200 Subject: KVM: x86: cleanup the page tracking SRCU instance SRCU uses a delayed work item. Skip cleaning it up, and the result is use-after-free in the work item callbacks. Reported-by: Dmitry Vyukov Suggested-by: Dmitry Vyukov Cc: stable@vger.kernel.org Fixes: 0eb05bf290cfe8610d9680b49abef37febd1c38a Reviewed-by: Xiao Guangrong Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_page_track.h | 1 + arch/x86/kvm/page_track.c | 8 ++++++++ arch/x86/kvm/x86.c | 1 + 3 files changed, 10 insertions(+) diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h index d74747b031ec..c4eda791f877 100644 --- a/arch/x86/include/asm/kvm_page_track.h +++ b/arch/x86/include/asm/kvm_page_track.h @@ -46,6 +46,7 @@ struct kvm_page_track_notifier_node { }; void kvm_page_track_init(struct kvm *kvm); +void kvm_page_track_cleanup(struct kvm *kvm); void kvm_page_track_free_memslot(struct kvm_memory_slot *free, struct kvm_memory_slot *dont); diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c index 37942e419c32..60168cdd0546 100644 --- a/arch/x86/kvm/page_track.c +++ b/arch/x86/kvm/page_track.c @@ -160,6 +160,14 @@ bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn, return !!ACCESS_ONCE(slot->arch.gfn_track[mode][index]); } +void kvm_page_track_cleanup(struct kvm *kvm) +{ + struct kvm_page_track_notifier_head *head; + + head = &kvm->arch.track_notifier_head; + cleanup_srcu_struct(&head->track_srcu); +} + void kvm_page_track_init(struct kvm *kvm) { struct kvm_page_track_notifier_head *head; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 64697fe475c3..ccbd45ecd41a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8158,6 +8158,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_free_vcpus(kvm); kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); kvm_mmu_uninit_vm(kvm); + kvm_page_track_cleanup(kvm); } void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, -- cgit v1.2.3 From 629dc8704b922f0c46f3025bd3486c2bc51eb7a6 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Mon, 20 Mar 2017 18:07:01 -0400 Subject: NFS store nfs4_deviceid in struct nfs4_filelayout_segment In preparation for moving the filelayout getdeviceinfo call from filelayout_alloc_lseg called by pnfs_process_layout Signed-off-by: Andy Adamson Signed-off-by: Anna Schumaker --- fs/nfs/filelayout/filelayout.c | 13 +++++-------- fs/nfs/filelayout/filelayout.h | 1 + 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 7aff350f15b1..cad74c1c79ff 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -572,7 +572,6 @@ static int filelayout_check_layout(struct pnfs_layout_hdr *lo, struct nfs4_filelayout_segment *fl, struct nfs4_layoutget_res *lgr, - struct nfs4_deviceid *id, gfp_t gfp_flags) { struct nfs4_deviceid_node *d; @@ -602,7 +601,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, } /* find and reference the deviceid */ - d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), id, + d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &fl->deviceid, lo->plh_lc_cred, gfp_flags); if (d == NULL) goto out; @@ -657,7 +656,6 @@ static int filelayout_decode_layout(struct pnfs_layout_hdr *flo, struct nfs4_filelayout_segment *fl, struct nfs4_layoutget_res *lgr, - struct nfs4_deviceid *id, gfp_t gfp_flags) { struct xdr_stream stream; @@ -682,9 +680,9 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, if (unlikely(!p)) goto out_err; - memcpy(id, p, sizeof(*id)); + memcpy(&fl->deviceid, p, sizeof(fl->deviceid)); p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); - nfs4_print_deviceid(id); + nfs4_print_deviceid(&fl->deviceid); nfl_util = be32_to_cpup(p++); if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS) @@ -831,15 +829,14 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, { struct nfs4_filelayout_segment *fl; int rc; - struct nfs4_deviceid id; dprintk("--> %s\n", __func__); fl = kzalloc(sizeof(*fl), gfp_flags); if (!fl) return NULL; - rc = filelayout_decode_layout(layoutid, fl, lgr, &id, gfp_flags); - if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id, gfp_flags)) { + rc = filelayout_decode_layout(layoutid, fl, lgr, gfp_flags); + if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, gfp_flags)) { _filelayout_free_lseg(fl); return NULL; } diff --git a/fs/nfs/filelayout/filelayout.h b/fs/nfs/filelayout/filelayout.h index 4c4d436a6796..79323b5dab0c 100644 --- a/fs/nfs/filelayout/filelayout.h +++ b/fs/nfs/filelayout/filelayout.h @@ -61,6 +61,7 @@ struct nfs4_filelayout_segment { u32 stripe_unit; u32 first_stripe_index; u64 pattern_offset; + struct nfs4_deviceid deviceid; struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ unsigned int num_fh; struct nfs_fh **fh_array; -- cgit v1.2.3 From 8d40b0f14846f7d45c7c72d343fe62cb866dda34 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Mon, 20 Mar 2017 18:07:02 -0400 Subject: NFS filelayout:call GETDEVICEINFO after pnfs_layout_process completes Fix a filelayout GETDEVICEINFO call hang triggered from the LAYOUTGET pnfs_layout_process where the GETDEVICEINFO call is waiting for a session slot, and the LAYOUGET call is waiting for pnfs_layout_process to complete before freeing the slot GETDEVICEINFO is waiting for.. This occurs in testing against the pynfs pNFS server where the the on-wire reply highest_slotid and slot id are zero, and the target high slot id is 8 (negotiated in CREATE_SESSION). The internal fore channel slot table max_slotid, the maximum allowed table slotid value, has been reduced via nfs41_set_max_slotid_locked from 8 to 1. Thus there is one slot (slotid 0) available for use but it has not been freed by LAYOUTGET proir to the GETDEVICEINFO request. In order to ensure that layoutrecall callbacks are processed in the correct order, nfs4_proc_layoutget processing needs to be finished e.g. pnfs_layout_process) before giving up the slot that identifies the layoutget (see referring_call_exists). Move the filelayout_check_layout nfs4_find_get_device call outside of the pnfs_layout_process call tree. Signed-off-by: Andy Adamson Signed-off-by: Anna Schumaker --- fs/nfs/filelayout/filelayout.c | 138 +++++++++++++++++++++++++++-------------- 1 file changed, 91 insertions(+), 47 deletions(-) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index cad74c1c79ff..367f8eb19bfa 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -560,6 +560,50 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync) return PNFS_ATTEMPTED; } +static int +filelayout_check_deviceid(struct pnfs_layout_hdr *lo, + struct nfs4_filelayout_segment *fl, + gfp_t gfp_flags) +{ + struct nfs4_deviceid_node *d; + struct nfs4_file_layout_dsaddr *dsaddr; + int status = -EINVAL; + + /* find and reference the deviceid */ + d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &fl->deviceid, + lo->plh_lc_cred, gfp_flags); + if (d == NULL) + goto out; + + dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); + /* Found deviceid is unavailable */ + if (filelayout_test_devid_unavailable(&dsaddr->id_node)) + goto out_put; + + fl->dsaddr = dsaddr; + + if (fl->first_stripe_index >= dsaddr->stripe_count) { + dprintk("%s Bad first_stripe_index %u\n", + __func__, fl->first_stripe_index); + goto out_put; + } + + if ((fl->stripe_type == STRIPE_SPARSE && + fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) || + (fl->stripe_type == STRIPE_DENSE && + fl->num_fh != dsaddr->stripe_count)) { + dprintk("%s num_fh %u not valid for given packing\n", + __func__, fl->num_fh); + goto out_put; + } + status = 0; +out: + return status; +out_put: + nfs4_fl_put_deviceid(dsaddr); + goto out; +} + /* * filelayout_check_layout() * @@ -574,8 +618,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags) { - struct nfs4_deviceid_node *d; - struct nfs4_file_layout_dsaddr *dsaddr; int status = -EINVAL; dprintk("--> %s\n", __func__); @@ -600,41 +642,10 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, goto out; } - /* find and reference the deviceid */ - d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &fl->deviceid, - lo->plh_lc_cred, gfp_flags); - if (d == NULL) - goto out; - - dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); - /* Found deviceid is unavailable */ - if (filelayout_test_devid_unavailable(&dsaddr->id_node)) - goto out_put; - - fl->dsaddr = dsaddr; - - if (fl->first_stripe_index >= dsaddr->stripe_count) { - dprintk("%s Bad first_stripe_index %u\n", - __func__, fl->first_stripe_index); - goto out_put; - } - - if ((fl->stripe_type == STRIPE_SPARSE && - fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) || - (fl->stripe_type == STRIPE_DENSE && - fl->num_fh != dsaddr->stripe_count)) { - dprintk("%s num_fh %u not valid for given packing\n", - __func__, fl->num_fh); - goto out_put; - } - status = 0; out: dprintk("--> %s returns %d\n", __func__, status); return status; -out_put: - nfs4_fl_put_deviceid(dsaddr); - goto out; } static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl) @@ -885,18 +896,51 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, return min(stripe_unit - (unsigned int)stripe_offset, size); } +static struct pnfs_layout_segment * +fl_pnfs_update_layout(struct inode *ino, + struct nfs_open_context *ctx, + loff_t pos, + u64 count, + enum pnfs_iomode iomode, + bool strict_iomode, + gfp_t gfp_flags) +{ + struct pnfs_layout_segment *lseg = NULL; + struct pnfs_layout_hdr *lo; + struct nfs4_filelayout_segment *fl; + int status; + + lseg = pnfs_update_layout(ino, ctx, pos, count, iomode, strict_iomode, + gfp_flags); + if (!lseg) + lseg = ERR_PTR(-ENOMEM); + if (IS_ERR(lseg)) + goto out; + + lo = NFS_I(ino)->layout; + fl = FILELAYOUT_LSEG(lseg); + + status = filelayout_check_deviceid(lo, fl, gfp_flags); + if (status) + lseg = ERR_PTR(status); +out: + if (IS_ERR(lseg)) + pnfs_put_lseg(lseg); + return lseg; +} + static void filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { if (!pgio->pg_lseg) { - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, - req->wb_context, - 0, - NFS4_MAX_UINT64, - IOMODE_READ, - false, - GFP_KERNEL); + pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode, + req->wb_context, + 0, + NFS4_MAX_UINT64, + IOMODE_READ, + false, + GFP_KERNEL); if (IS_ERR(pgio->pg_lseg)) { pgio->pg_error = PTR_ERR(pgio->pg_lseg); pgio->pg_lseg = NULL; @@ -916,13 +960,13 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, int status; if (!pgio->pg_lseg) { - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, - req->wb_context, - 0, - NFS4_MAX_UINT64, - IOMODE_RW, - false, - GFP_NOFS); + pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode, + req->wb_context, + 0, + NFS4_MAX_UINT64, + IOMODE_RW, + false, + GFP_NOFS); if (IS_ERR(pgio->pg_lseg)) { pgio->pg_error = PTR_ERR(pgio->pg_lseg); pgio->pg_lseg = NULL; -- cgit v1.2.3 From de85ec8b07f82c8c84de7687f769e74bf4c26a1e Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 23 Mar 2017 13:07:16 +0800 Subject: virtio_pci: fix out of bound access for msix_names Fedora has received multiple reports of crashes when running 4.11 as a guest https://bugzilla.redhat.com/show_bug.cgi?id=1430297 https://bugzilla.redhat.com/show_bug.cgi?id=1434462 https://bugzilla.kernel.org/show_bug.cgi?id=194911 https://bugzilla.redhat.com/show_bug.cgi?id=1433899 The crashes are not always consistent but they are generally some flavor of oops or GPF in virtio related code. Multiple people have done bisections (Thank you Thorsten Leemhuis and Richard W.M. Jones) and found this commit to be at fault 07ec51480b5eb1233f8c1b0f5d7a7c8d1247c507 is the first bad commit commit 07ec51480b5eb1233f8c1b0f5d7a7c8d1247c507 Author: Christoph Hellwig Date: Sun Feb 5 18:15:19 2017 +0100 virtio_pci: use shared interrupts for virtqueues The issue seems to be an out of bounds access to the msix_names array corrupting kernel memory. Fixes: 07ec51480b5e ("virtio_pci: use shared interrupts for virtqueues") Reported-by: Laura Abbott Signed-off-by: Jason Wang Signed-off-by: Michael S. Tsirkin Reviewed-by: Christoph Hellwig Tested-by: Richard W.M. Jones Tested-by: Thorsten Leemhuis --- drivers/virtio/virtio_pci_common.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index df548a6fb844..590534910dc6 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -147,7 +147,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, { struct virtio_pci_device *vp_dev = to_vp_device(vdev); const char *name = dev_name(&vp_dev->vdev.dev); - int i, err = -ENOMEM, allocated_vectors, nvectors; + int i, j, err = -ENOMEM, allocated_vectors, nvectors; unsigned flags = PCI_IRQ_MSIX; bool shared = false; u16 msix_vec; @@ -212,7 +212,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, if (!vp_dev->msix_vector_map) goto out_disable_config_irq; - allocated_vectors = 1; /* vector 0 is the config interrupt */ + allocated_vectors = j = 1; /* vector 0 is the config interrupt */ for (i = 0; i < nvqs; ++i) { if (!names[i]) { vqs[i] = NULL; @@ -236,18 +236,19 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, continue; } - snprintf(vp_dev->msix_names[i + 1], + snprintf(vp_dev->msix_names[j], sizeof(*vp_dev->msix_names), "%s-%s", dev_name(&vp_dev->vdev.dev), names[i]); err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec), vring_interrupt, IRQF_SHARED, - vp_dev->msix_names[i + 1], vqs[i]); + vp_dev->msix_names[j], vqs[i]); if (err) { /* don't free this irq on error */ vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR; goto out_remove_vqs; } vp_dev->msix_vector_map[i] = msix_vec; + j++; /* * Use a different vector for each queue if they are available, -- cgit v1.2.3 From fc8653228c8588a120f6b5dad6983b7b61ff669e Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Thu, 23 Mar 2017 08:04:18 +0100 Subject: virtio_balloon: init 1st buffer in stats vq When init_vqs runs, virtio_balloon.stats is either uninitialized or contains stale values. The host updates its state with garbage data because it has no way of knowing that this is just a marker buffer used for signaling. This patch updates the stats before pushing the initial buffer. Alternative fixes: * Push an empty buffer in init_vqs. Not easily done with the current virtio implementation and violates the spec "Driver MUST supply the same subset of statistics in all buffers submitted to the statsq". * Push a buffer with invalid tags in init_vqs. Violates the same spec clause, plus "invalid tag" is not really defined. Note: the spec says: When using the legacy interface, the device SHOULD ignore all values in the first buffer in the statsq supplied by the driver after device initialization. Note: Historically, drivers supplied an uninitialized buffer in the first buffer. Unfortunately QEMU does not seem to implement the recommendation even for the legacy interface. Cc: stable@vger.kernel.org Signed-off-by: Ladi Prosek Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_balloon.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 4e1191508228..fcd06e16a1a2 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -429,6 +429,8 @@ static int init_vqs(struct virtio_balloon *vb) * Prime this virtqueue with one buffer so the hypervisor can * use it to signal us later (it can't be broken yet!). */ + update_balloon_stats(vb); + sg_init_one(&sg, vb->stats, sizeof vb->stats); if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL) < 0) -- cgit v1.2.3 From 9646b26e85896ef0256e66649f7937f774dc18a6 Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Tue, 28 Mar 2017 18:46:58 +0200 Subject: virtio-balloon: use actual number of stats for stats queue buffers The virtio balloon driver contained a not-so-obvious invariant that update_balloon_stats has to update exactly VIRTIO_BALLOON_S_NR counters in order to send valid stats to the host. This commit fixes it by having update_balloon_stats return the actual number of counters, and its callers use it when pushing buffers to the stats virtqueue. Note that it is still out of spec to change the number of counters at run-time. "Driver MUST supply the same subset of statistics in all buffers submitted to the statsq." Suggested-by: Arnd Bergmann Signed-off-by: Ladi Prosek Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_balloon.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index fcd06e16a1a2..d9544db231ef 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -242,11 +242,11 @@ static inline void update_stat(struct virtio_balloon *vb, int idx, #define pages_to_bytes(x) ((u64)(x) << PAGE_SHIFT) -static void update_balloon_stats(struct virtio_balloon *vb) +static unsigned int update_balloon_stats(struct virtio_balloon *vb) { unsigned long events[NR_VM_EVENT_ITEMS]; struct sysinfo i; - int idx = 0; + unsigned int idx = 0; long available; all_vm_events(events); @@ -266,6 +266,8 @@ static void update_balloon_stats(struct virtio_balloon *vb) pages_to_bytes(i.totalram)); update_stat(vb, idx++, VIRTIO_BALLOON_S_AVAIL, pages_to_bytes(available)); + + return idx; } /* @@ -291,14 +293,14 @@ static void stats_handle_request(struct virtio_balloon *vb) { struct virtqueue *vq; struct scatterlist sg; - unsigned int len; + unsigned int len, num_stats; - update_balloon_stats(vb); + num_stats = update_balloon_stats(vb); vq = vb->stats_vq; if (!virtqueue_get_buf(vq, &len)) return; - sg_init_one(&sg, vb->stats, sizeof(vb->stats)); + sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats); virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL); virtqueue_kick(vq); } @@ -423,15 +425,16 @@ static int init_vqs(struct virtio_balloon *vb) vb->deflate_vq = vqs[1]; if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { struct scatterlist sg; + unsigned int num_stats; vb->stats_vq = vqs[2]; /* * Prime this virtqueue with one buffer so the hypervisor can * use it to signal us later (it can't be broken yet!). */ - update_balloon_stats(vb); + num_stats = update_balloon_stats(vb); - sg_init_one(&sg, vb->stats, sizeof vb->stats); + sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats); if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL) < 0) BUG(); -- cgit v1.2.3 From f0bb2d50dfcc519f06f901aac88502be6ff1df2c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 Mar 2017 18:46:59 +0200 Subject: virtio_balloon: prevent uninitialized variable use The latest gcc-7.0.1 snapshot reports a new warning: virtio/virtio_balloon.c: In function 'update_balloon_stats': virtio/virtio_balloon.c:258:26: error: 'events[2]' is used uninitialized in this function [-Werror=uninitialized] virtio/virtio_balloon.c:260:26: error: 'events[3]' is used uninitialized in this function [-Werror=uninitialized] virtio/virtio_balloon.c:261:56: error: 'events[18]' is used uninitialized in this function [-Werror=uninitialized] virtio/virtio_balloon.c:262:56: error: 'events[17]' is used uninitialized in this function [-Werror=uninitialized] This seems absolutely right, so we should add an extra check to prevent copying uninitialized stack data into the statistics. >From all I can tell, this has been broken since the statistics code was originally added in 2.6.34. Fixes: 9564e138b1f6 ("virtio: Add memory statistics reporting to the balloon driver (V4)") Signed-off-by: Arnd Bergmann Signed-off-by: Ladi Prosek Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_balloon.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index d9544db231ef..34adf9b9c053 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -254,12 +254,14 @@ static unsigned int update_balloon_stats(struct virtio_balloon *vb) available = si_mem_available(); +#ifdef CONFIG_VM_EVENT_COUNTERS update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN, pages_to_bytes(events[PSWPIN])); update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT, pages_to_bytes(events[PSWPOUT])); update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]); update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]); +#endif update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE, pages_to_bytes(i.freeram)); update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT, -- cgit v1.2.3 From e3d5092b6756b9e0b08f94bbeafcc7afe19f0996 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 22 Mar 2017 18:33:23 +0100 Subject: ACPI: ioapic: Clear on-stack resource before using it The on-stack resource-window 'win' in setup_res() is not properly initialized. This causes the pointers in the embedded 'struct resource' to contain stale addresses. These pointers (in my case the ->child pointer) later get propagated to the global iomem_resources list, causing a #GP exception when the list is traversed in iomem_map_sanity_check(). Fixes: c183619b63ec (x86/irq, ACPI: Implement ACPI driver to support IOAPIC hotplug) Signed-off-by: Joerg Roedel Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ioapic.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/acpi/ioapic.c b/drivers/acpi/ioapic.c index 1120dfd625b8..7e4fbf9a53a3 100644 --- a/drivers/acpi/ioapic.c +++ b/drivers/acpi/ioapic.c @@ -45,6 +45,12 @@ static acpi_status setup_res(struct acpi_resource *acpi_res, void *data) struct resource *res = data; struct resource_win win; + /* + * We might assign this to 'res' later, make sure all pointers are + * cleared before the resource is added to the global list + */ + memset(&win, 0, sizeof(win)); + res->flags = 0; if (acpi_dev_filter_resource_type(acpi_res, IORESOURCE_MEM)) return AE_OK; -- cgit v1.2.3 From 08f63d97749185fab942a3a47ed80f5bd89b8b7d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 22 Mar 2017 18:33:25 +0100 Subject: ACPI: Do not create a platform_device for IOAPIC/IOxAPIC No platform-device is required for IO(x)APICs, so don't even create them. [ rjw: This fixes a problem with leaking platform device objects after IOAPIC/IOxAPIC hot-removal events.] Signed-off-by: Joerg Roedel Cc: All applicable Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_platform.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c index b4c1a6a51da4..03250e1f1103 100644 --- a/drivers/acpi/acpi_platform.c +++ b/drivers/acpi/acpi_platform.c @@ -25,9 +25,11 @@ ACPI_MODULE_NAME("platform"); static const struct acpi_device_id forbidden_id_list[] = { - {"PNP0000", 0}, /* PIC */ - {"PNP0100", 0}, /* Timer */ - {"PNP0200", 0}, /* AT DMA Controller */ + {"PNP0000", 0}, /* PIC */ + {"PNP0100", 0}, /* Timer */ + {"PNP0200", 0}, /* AT DMA Controller */ + {"ACPI0009", 0}, /* IOxAPIC */ + {"ACPI000A", 0}, /* IOAPIC */ {"", 0}, }; -- cgit v1.2.3 From 7d64f82cceb21e6d95db312d284f5f195e120154 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 16 Mar 2017 14:30:39 +0000 Subject: ACPI / APEI: Add missing synchronize_rcu() on NOTIFY_SCI removal When removing a GHES device notified by SCI, list_del_rcu() is used, ghes_remove() should call synchronize_rcu() before it goes on to call kfree(ghes), otherwise concurrent RCU readers may still hold this list entry after it has been freed. Signed-off-by: James Morse Reviewed-by: "Huang, Ying" Fixes: 81e88fdc432a (ACPI, APEI, Generic Hardware Error Source POLL/IRQ/NMI notification type support) Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/ghes.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index b192b42a8351..79b3c9c5a3bc 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -1073,6 +1073,7 @@ static int ghes_remove(struct platform_device *ghes_dev) if (list_empty(&ghes_sci)) unregister_acpi_hed_notifier(&ghes_notifier_sci); mutex_unlock(&ghes_list_mutex); + synchronize_rcu(); break; case ACPI_HEST_NOTIFY_NMI: ghes_nmi_remove(ghes); -- cgit v1.2.3 From 61b79e16c68d703dde58c25d3935d67210b7d71b Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 16 Mar 2017 08:56:28 -0500 Subject: ACPI: Fix incompatibility with mcount-based function graph tracing Paul Menzel reported a warning: WARNING: CPU: 0 PID: 774 at /build/linux-ROBWaj/linux-4.9.13/kernel/trace/trace_functions_graph.c:233 ftrace_return_to_handler+0x1aa/0x1e0 Bad frame pointer: expected f6919d98, received f6919db0 from func acpi_pm_device_sleep_wake return to c43b6f9d The warning means that function graph tracing is broken for the acpi_pm_device_sleep_wake() function. That's because the ACPI Makefile unconditionally sets the '-Os' gcc flag to optimize for size. That's an issue because mcount-based function graph tracing is incompatible with '-Os' on x86, thanks to the following gcc bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=42109 I have another patch pending which will ensure that mcount-based function graph tracing is never used with CONFIG_CC_OPTIMIZE_FOR_SIZE on x86. But this patch is needed in addition to that one because the ACPI Makefile overrides that config option for no apparent reason. It has had this flag since the beginning of git history, and there's no related comment, so I don't know why it's there. As far as I can tell, there's no reason for it to be there. The appropriate behavior is for it to honor CONFIG_CC_OPTIMIZE_FOR_{SIZE,PERFORMANCE} like the rest of the kernel. Reported-by: Paul Menzel Signed-off-by: Josh Poimboeuf Acked-by: Steven Rostedt (VMware) Signed-off-by: Rafael J. Wysocki Cc: All applicable --- drivers/acpi/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index a391bbc48105..d94f92f88ca1 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -2,7 +2,6 @@ # Makefile for the Linux ACPI interpreter # -ccflags-y := -Os ccflags-$(CONFIG_ACPI_DEBUG) += -DACPI_DEBUG_OUTPUT # -- cgit v1.2.3 From f9ba3501d50317697811ff3c48f623f08d616fc8 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 27 Mar 2017 00:21:15 +0800 Subject: sctp: change to save MSG_MORE flag into assoc David Laight noticed the support for MSG_MORE with datamsg->force_delay didn't really work as we expected, as the first msg with MSG_MORE set would always block the following chunks' dequeuing. This Patch is to rewrite it by saving the MSG_MORE flag into assoc as David Laight suggested. asoc->force_delay is used to save MSG_MORE flag before a msg is sent. All chunks in queue would not be sent out if asoc->force_delay is set by the msg with MSG_MORE flag, until a new msg without MSG_MORE flag clears asoc->force_delay. Note that this change would not affect the flush is generated by other triggers, like asoc->state != ESTABLISHED, queue size > pmtu etc. v1->v2: Not clear asoc->force_delay after sending the msg with MSG_MORE flag. Fixes: 4ea0c32f5f42 ("sctp: add support for MSG_MORE") Signed-off-by: Xin Long Acked-by: David Laight Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 2 +- net/sctp/output.c | 2 +- net/sctp/socket.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 592decebac75..8caa5ee9e290 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -499,7 +499,6 @@ struct sctp_datamsg { /* Did the messenge fail to send? */ int send_error; u8 send_failed:1, - force_delay:1, can_delay; /* should this message be Nagle delayed */ }; @@ -1878,6 +1877,7 @@ struct sctp_association { __u8 need_ecne:1, /* Need to send an ECNE Chunk? */ temp:1, /* Is it a temporary association? */ + force_delay:1, prsctp_enable:1, reconf_enable:1; diff --git a/net/sctp/output.c b/net/sctp/output.c index 1224421036b3..73fd178007a3 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -704,7 +704,7 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, */ if ((sctp_sk(asoc->base.sk)->nodelay || inflight == 0) && - !chunk->msg->force_delay) + !asoc->force_delay) /* Nothing unacked */ return SCTP_XMIT_OK; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 0f378ea2ae38..baa269a0d52e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1965,7 +1965,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) err = PTR_ERR(datamsg); goto out_free; } - datamsg->force_delay = !!(msg->msg_flags & MSG_MORE); + asoc->force_delay = !!(msg->msg_flags & MSG_MORE); /* Now send the (possibly) fragmented message. */ list_for_each_entry(chunk, &datamsg->chunks, frag_list) { -- cgit v1.2.3 From af109a2cf6a9a6271fa420ae2d64d72d86c92b7d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 Mar 2017 12:11:07 +0200 Subject: isdn: kcapi: avoid uninitialized data gcc-7 points out that the AVMB1_ADDCARD ioctl results in an unintialized value ending up in the cardnr parameter: drivers/isdn/capi/kcapi.c: In function 'old_capi_manufacturer': drivers/isdn/capi/kcapi.c:1042:24: error: 'cdef.cardnr' may be used uninitialized in this function [-Werror=maybe-uninitialized] cparams.cardnr = cdef.cardnr; This has been broken since before the start of the git history, so either the value is not used for anything important, or the ioctl command doesn't get called in practice. Setting the cardnr to zero avoids the warning and makes sure we have consistent behavior. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/isdn/capi/kcapi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c index 1dfd1085a04f..9ca691d6c13b 100644 --- a/drivers/isdn/capi/kcapi.c +++ b/drivers/isdn/capi/kcapi.c @@ -1032,6 +1032,7 @@ static int old_capi_manufacturer(unsigned int cmd, void __user *data) sizeof(avmb1_carddef)))) return -EFAULT; cdef.cardtype = AVM_CARDTYPE_B1; + cdef.cardnr = 0; } else { if ((retval = copy_from_user(&cdef, data, sizeof(avmb1_extcarddef)))) -- cgit v1.2.3 From c2b341a620018d4eaeb0e85c16274ac4e5f153d4 Mon Sep 17 00:00:00 2001 From: Jonas Jensen Date: Tue, 28 Mar 2017 12:12:38 +0200 Subject: net: moxa: fix TX overrun memory leak moxart_mac_start_xmit() doesn't care where tx_tail is, tx_head can catch and pass tx_tail, which is bad because moxart_tx_finished() isn't guaranteed to catch up on freeing resources from tx_tail. Add a check in moxart_mac_start_xmit() stopping the queue at the end of the circular buffer. Also add a check in moxart_tx_finished() waking the queue if the buffer has TX_WAKE_THRESHOLD or more free descriptors. While we're at it, move spin_lock_irq() to happen before our descriptor pointer is assigned in moxart_mac_start_xmit(). Addresses https://bugzilla.kernel.org/show_bug.cgi?id=99451 Signed-off-by: Jonas Jensen Signed-off-by: David S. Miller --- drivers/net/ethernet/moxa/moxart_ether.c | 20 ++++++++++++++++++-- drivers/net/ethernet/moxa/moxart_ether.h | 1 + 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c index 06c9f4100cb9..6ad44be08b33 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.c +++ b/drivers/net/ethernet/moxa/moxart_ether.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "moxart_ether.h" @@ -278,6 +279,13 @@ rx_next: return rx; } +static int moxart_tx_queue_space(struct net_device *ndev) +{ + struct moxart_mac_priv_t *priv = netdev_priv(ndev); + + return CIRC_SPACE(priv->tx_head, priv->tx_tail, TX_DESC_NUM); +} + static void moxart_tx_finished(struct net_device *ndev) { struct moxart_mac_priv_t *priv = netdev_priv(ndev); @@ -297,6 +305,9 @@ static void moxart_tx_finished(struct net_device *ndev) tx_tail = TX_NEXT(tx_tail); } priv->tx_tail = tx_tail; + if (netif_queue_stopped(ndev) && + moxart_tx_queue_space(ndev) >= TX_WAKE_THRESHOLD) + netif_wake_queue(ndev); } static irqreturn_t moxart_mac_interrupt(int irq, void *dev_id) @@ -324,13 +335,18 @@ static int moxart_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev) struct moxart_mac_priv_t *priv = netdev_priv(ndev); void *desc; unsigned int len; - unsigned int tx_head = priv->tx_head; + unsigned int tx_head; u32 txdes1; int ret = NETDEV_TX_BUSY; + spin_lock_irq(&priv->txlock); + + tx_head = priv->tx_head; desc = priv->tx_desc_base + (TX_REG_DESC_SIZE * tx_head); - spin_lock_irq(&priv->txlock); + if (moxart_tx_queue_space(ndev) == 1) + netif_stop_queue(ndev); + if (moxart_desc_read(desc + TX_REG_OFFSET_DESC0) & TX_DESC0_DMA_OWN) { net_dbg_ratelimited("no TX space for packet\n"); priv->stats.tx_dropped++; diff --git a/drivers/net/ethernet/moxa/moxart_ether.h b/drivers/net/ethernet/moxa/moxart_ether.h index 93a9563ac7c6..afc32ec998c0 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.h +++ b/drivers/net/ethernet/moxa/moxart_ether.h @@ -59,6 +59,7 @@ #define TX_NEXT(N) (((N) + 1) & (TX_DESC_NUM_MASK)) #define TX_BUF_SIZE 1600 #define TX_BUF_SIZE_MAX (TX_DESC1_BUF_SIZE_MASK+1) +#define TX_WAKE_THRESHOLD 16 #define RX_DESC_NUM 64 #define RX_DESC_NUM_MASK (RX_DESC_NUM-1) -- cgit v1.2.3 From e497ec680c4cd51e76bfcdd49363d9ab8d32a757 Mon Sep 17 00:00:00 2001 From: Talat Batheesh Date: Tue, 28 Mar 2017 16:13:41 +0300 Subject: net/mlx5: Avoid dereferencing uninitialized pointer In NETDEV_CHANGEUPPER event the upper_info field is valid only when linking is true. Otherwise it should be ignored. Fixes: 7907f23adc18 (net/mlx5: Implement RoCE LAG feature) Signed-off-by: Talat Batheesh Reviewed-by: Aviv Heller Reviewed-by: Moni Shoua Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 55957246c0e8..b5d5519542e8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -294,7 +294,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, struct netdev_notifier_changeupper_info *info) { struct net_device *upper = info->upper_dev, *ndev_tmp; - struct netdev_lag_upper_info *lag_upper_info; + struct netdev_lag_upper_info *lag_upper_info = NULL; bool is_bonded; int bond_status = 0; int num_slaves = 0; @@ -303,7 +303,8 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, if (!netif_is_lag_master(upper)) return 0; - lag_upper_info = info->upper_info; + if (info->linking) + lag_upper_info = info->upper_info; /* The event may still be of interest if the slave does not belong to * us, but is enslaved to a master which has one or more of our netdevs -- cgit v1.2.3 From 23abec20aa7034c5d49b62c0a668f7291c819fab Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 26 Mar 2017 19:27:35 -0400 Subject: svcrdma: set XPT_CONG_CTRL flag for bc xprt Same change as Kinglong Mee's fix for the TCP backchannel service. Fixes: 5283b03ee5cd ("nfs/nfsd/sunrpc: enforce transport...") Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_transport.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index c13a5c35ce14..fc8f14c7bfec 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -127,6 +127,7 @@ static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *serv, xprt = &cma_xprt->sc_xprt; svc_xprt_init(net, &svc_rdma_bc_class, xprt, serv); + set_bit(XPT_CONG_CTRL, &xprt->xpt_flags); serv->sv_bc_xprt = xprt; dprintk("svcrdma: %s(%p)\n", __func__, xprt); -- cgit v1.2.3 From 16b8b6de32572207abeb4dfb74ab7bd8409a5690 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 Mar 2017 16:11:18 +0200 Subject: rocker: fix Wmaybe-uninitialized false-positive gcc-7 reports a warning that earlier versions did not have: drivers/net/ethernet/rocker/rocker_ofdpa.c: In function 'ofdpa_port_stp_update': arch/x86/include/asm/string_32.h:79:22: error: '*((void *)&prev_ctrls+4)' may be used uninitialized in this function [-Werror=maybe-uninitialized] *((short *)to + 2) = *((short *)from + 2); ~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~ drivers/net/ethernet/rocker/rocker_ofdpa.c:2218:7: note: '*((void *)&prev_ctrls+4)' was declared here This is clearly a variation of the warning about 'prev_state' that was shut up using uninitialized_var(). We can slightly simplify the code and get rid of the warning by unconditionally saving the prev_state and prev_ctrls variables. The inlined memcpy is not particularly expensive here, as it just has to read five bytes from one or two cache lines. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker_ofdpa.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index 7cd76b6b5cb9..2ae852454780 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -2216,18 +2216,15 @@ static int ofdpa_port_stp_update(struct ofdpa_port *ofdpa_port, { bool want[OFDPA_CTRL_MAX] = { 0, }; bool prev_ctrls[OFDPA_CTRL_MAX]; - u8 uninitialized_var(prev_state); + u8 prev_state; int err; int i; - if (switchdev_trans_ph_prepare(trans)) { - memcpy(prev_ctrls, ofdpa_port->ctrls, sizeof(prev_ctrls)); - prev_state = ofdpa_port->stp_state; - } - - if (ofdpa_port->stp_state == state) + prev_state = ofdpa_port->stp_state; + if (prev_state == state) return 0; + memcpy(prev_ctrls, ofdpa_port->ctrls, sizeof(prev_ctrls)); ofdpa_port->stp_state = state; switch (state) { -- cgit v1.2.3 From b768b16de58d5e0b1d7c3f936825b25327ced20c Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Tue, 28 Mar 2017 11:25:26 -0700 Subject: openvswitch: Fix refcount leak on force commit. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The reference count held for skb needs to be released when the skb's nfct pointer is cleared regardless of if nf_ct_delete() is called or not. Failing to release the skb's reference cound led to deferred conntrack cleanup spinning forever within nf_conntrack_cleanup_net_list() when cleaning up a network namespace:    kworker/u16:0-19025 [004] 45981067.173642: sched_switch: kworker/u16:0:19025 [120] R ==> rcu_preempt:7 [120]    kworker/u16:0-19025 [004] 45981067.173651: kernel_stack: => ___preempt_schedule (ffffffffa001ed36) => _raw_spin_unlock_bh (ffffffffa0713290) => nf_ct_iterate_cleanup (ffffffffc00a4454) => nf_conntrack_cleanup_net_list (ffffffffc00a5e1e) => nf_conntrack_pernet_exit (ffffffffc00a63dd) => ops_exit_list.isra.1 (ffffffffa06075f3) => cleanup_net (ffffffffa0607df0) => process_one_work (ffffffffa0084c31) => worker_thread (ffffffffa008592b) => kthread (ffffffffa008bee2) => ret_from_fork (ffffffffa071b67c) Fixes: dd41d33f0b03 ("openvswitch: Add force commit.") Reported-by: Yang Song Signed-off-by: Jarno Rajahalme Acked-by: Joe Stringer Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index e0a87776a010..7b2c2fce408a 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -643,8 +643,8 @@ static bool skb_nfct_cached(struct net *net, */ if (nf_ct_is_confirmed(ct)) nf_ct_delete(ct, 0, 0); - else - nf_conntrack_put(&ct->ct_general); + + nf_conntrack_put(&ct->ct_general); nf_ct_set(skb, NULL, 0); return false; } -- cgit v1.2.3 From b3ef5520c1eabb56064474043c7c55a1a65b8708 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Tue, 28 Mar 2017 09:11:31 +0100 Subject: cfg80211: check rdev resume callback only for registered wiphy We got the following use-after-free KASAN report: BUG: KASAN: use-after-free in wiphy_resume+0x591/0x5a0 [cfg80211] at addr ffff8803fc244090 Read of size 8 by task kworker/u16:24/2587 CPU: 6 PID: 2587 Comm: kworker/u16:24 Tainted: G B 4.9.13-debug+ Hardware name: Dell Inc. XPS 15 9550/0N7TVV, BIOS 1.2.19 12/22/2016 Workqueue: events_unbound async_run_entry_fn ffff880425d4f9d8 ffffffffaeedb541 ffff88042b80ef00 ffff8803fc244088 ffff880425d4fa00 ffffffffae84d7a1 ffff880425d4fa98 ffff8803fc244080 ffff88042b80ef00 ffff880425d4fa88 ffffffffae84da3a ffffffffc141f7d9 Call Trace: [] dump_stack+0x85/0xc4 [] kasan_object_err+0x21/0x70 [] kasan_report_error+0x1fa/0x500 [] ? cfg80211_bss_age+0x39/0xc0 [cfg80211] [] ? cfg80211_bss_age+0x9a/0xc0 [cfg80211] [] ? trace_hardirqs_on+0xd/0x10 [] ? wiphy_suspend+0xc70/0xc70 [cfg80211] [] __asan_report_load8_noabort+0x61/0x70 [] ? wiphy_suspend+0xbb0/0xc70 [cfg80211] [] ? wiphy_resume+0x591/0x5a0 [cfg80211] [] wiphy_resume+0x591/0x5a0 [cfg80211] [] ? wiphy_suspend+0xc70/0xc70 [cfg80211] [] dpm_run_callback+0x6e/0x4f0 [] device_resume+0x1c2/0x670 [] async_resume+0x1d/0x50 [] async_run_entry_fn+0xfe/0x610 [] process_one_work+0x716/0x1a50 [] ? process_one_work+0x679/0x1a50 [] ? _raw_spin_unlock_irq+0x3d/0x60 [] ? pwq_dec_nr_in_flight+0x2b0/0x2b0 [] worker_thread+0xe0/0x1460 [] ? process_one_work+0x1a50/0x1a50 [] kthread+0x222/0x2e0 [] ? kthread_park+0x80/0x80 [] ? kthread_park+0x80/0x80 [] ? kthread_park+0x80/0x80 [] ret_from_fork+0x2a/0x40 Object at ffff8803fc244088, in cache kmalloc-1024 size: 1024 Allocated: PID = 71 save_stack_trace+0x1b/0x20 save_stack+0x46/0xd0 kasan_kmalloc+0xad/0xe0 kasan_slab_alloc+0x12/0x20 __kmalloc_track_caller+0x134/0x360 kmemdup+0x20/0x50 brcmf_cfg80211_attach+0x10b/0x3a90 [brcmfmac] brcmf_bus_start+0x19a/0x9a0 [brcmfmac] brcmf_pcie_setup+0x1f1a/0x3680 [brcmfmac] brcmf_fw_request_nvram_done+0x44c/0x11b0 [brcmfmac] request_firmware_work_func+0x135/0x280 process_one_work+0x716/0x1a50 worker_thread+0xe0/0x1460 kthread+0x222/0x2e0 ret_from_fork+0x2a/0x40 Freed: PID = 2568 save_stack_trace+0x1b/0x20 save_stack+0x46/0xd0 kasan_slab_free+0x71/0xb0 kfree+0xe8/0x2e0 brcmf_cfg80211_detach+0x62/0xf0 [brcmfmac] brcmf_detach+0x14a/0x2b0 [brcmfmac] brcmf_pcie_remove+0x140/0x5d0 [brcmfmac] brcmf_pcie_pm_leave_D3+0x198/0x2e0 [brcmfmac] pci_pm_resume+0x186/0x220 dpm_run_callback+0x6e/0x4f0 device_resume+0x1c2/0x670 async_resume+0x1d/0x50 async_run_entry_fn+0xfe/0x610 process_one_work+0x716/0x1a50 worker_thread+0xe0/0x1460 kthread+0x222/0x2e0 ret_from_fork+0x2a/0x40 Memory state around the buggy address: ffff8803fc243f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8803fc244000: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff8803fc244080: fc fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8803fc244100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8803fc244180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb What is happening is that brcmf_pcie_resume() detects a device that is no longer responsive and it decides to unbind resulting in a wiphy_unregister() and wiphy_free() call. Now the wiphy instance remains allocated, because PM needs to call wiphy_resume() for it. However, brcmfmac already does a kfree() for the struct cfg80211_registered_device::ops field. Change the checks in wiphy_resume() to only access the struct cfg80211_registered_device::ops if the wiphy instance is still registered at this time. Cc: stable@vger.kernel.org # 4.10.x, 4.9.x Reported-by: Daniel J Blueman Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Johannes Berg --- net/wireless/sysfs.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 16b6b5988be9..570a2b67ca10 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -132,12 +132,10 @@ static int wiphy_resume(struct device *dev) /* Age scan results with time spent in suspend */ cfg80211_bss_age(rdev, get_seconds() - rdev->suspend_at); - if (rdev->ops->resume) { - rtnl_lock(); - if (rdev->wiphy.registered) - ret = rdev_resume(rdev); - rtnl_unlock(); - } + rtnl_lock(); + if (rdev->wiphy.registered && rdev->ops->resume) + ret = rdev_resume(rdev); + rtnl_unlock(); return ret; } -- cgit v1.2.3 From b07c12517f2aed0add8ce18146bb426b14099392 Mon Sep 17 00:00:00 2001 From: Adam Wallis Date: Tue, 28 Mar 2017 15:55:28 +0300 Subject: xhci: plat: Register shutdown for xhci_plat Shutdown should be called for xhci_plat devices especially for situations where kexec might be used by stopping DMA transactions. Signed-off-by: Adam Wallis Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index bd02a6cd8e2c..6ed468fa7d5e 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -344,6 +344,7 @@ MODULE_DEVICE_TABLE(acpi, usb_xhci_acpi_match); static struct platform_driver usb_xhci_driver = { .probe = xhci_plat_probe, .remove = xhci_plat_remove, + .shutdown = usb_hcd_platform_shutdown, .driver = { .name = "xhci-hcd", .pm = DEV_PM_OPS, -- cgit v1.2.3 From 0ab2881a406b9fd46224a3e8253bbc0141b4f844 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 28 Mar 2017 15:55:29 +0300 Subject: xhci: Set URB actual length for stopped control transfers A control transfer that stopped at the status stage incorrectly warned about a "unexpected TRB Type 4", and did not set the transferred actual_length for the URB. The URB actual_length for control transfers should contain the bytes transferred in the data stage. Bytes of a partially sent setup stage and missing bytes from status stage should be left out. Cc: Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index d9936c771fa0..a3309aa02993 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1989,6 +1989,9 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td, case TRB_NORMAL: td->urb->actual_length = requested - remaining; goto finish_td; + case TRB_STATUS: + td->urb->actual_length = requested; + goto finish_td; default: xhci_warn(xhci, "WARN: unexpected TRB Type %d\n", trb_type); -- cgit v1.2.3 From d3519b9d9606991a1305596348b6d690bfa3eb27 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 28 Mar 2017 15:55:30 +0300 Subject: xhci: Manually give back cancelled URB if we can't queue it for cancel xhci needs to take care of four scenarios when asked to cancel a URB. 1 URB is not queued or already given back. usb_hcd_check_unlink_urb() will return an error, we pass the error on 2 We fail to find xhci internal structures from urb private data such as virtual device and endpoint ring. Give back URB immediately, can't do anything about internal structures. 3 URB private data has valid pointers to xhci internal data, but host is not responding. give back URB immedately and remove the URB from the endpoint lists. 4 Everyting is working add URB to cancel list, queue a command to stop the endpoint, after which the URB can be turned to no-op or skipped, removed from lists, and given back. We failed to give back the urb in case 2 where the correct device and endpoint pointers could not be retrieved from URB private data. This caused a hang on Dell Inspiron 5558/0VNM2T at resume from suspend as urb was never returned. [ 245.270505] INFO: task rtsx_usb_ms_1:254 blocked for more than 120 seconds. [ 245.272244] Tainted: G W 4.11.0-rc3-ARCH #2 [ 245.273983] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 245.275737] rtsx_usb_ms_1 D 0 254 2 0x00000000 [ 245.277524] Call Trace: [ 245.279278] __schedule+0x2d3/0x8a0 [ 245.281077] schedule+0x3d/0x90 [ 245.281961] usb_kill_urb.part.3+0x6c/0xa0 [usbcore] [ 245.282861] ? wake_atomic_t_function+0x60/0x60 [ 245.283760] usb_kill_urb+0x21/0x30 [usbcore] [ 245.284649] usb_start_wait_urb+0xe5/0x170 [usbcore] [ 245.285541] ? try_to_del_timer_sync+0x53/0x80 [ 245.286434] usb_bulk_msg+0xbd/0x160 [usbcore] [ 245.287326] rtsx_usb_send_cmd+0x63/0x90 [rtsx_usb] Reported-by: diego.viola@gmail.com Tested-by: diego.viola@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 43 +++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 50aee8b7718b..953fd8f62df0 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1477,6 +1477,7 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) struct xhci_ring *ep_ring; struct xhci_virt_ep *ep; struct xhci_command *command; + struct xhci_virt_device *vdev; xhci = hcd_to_xhci(hcd); spin_lock_irqsave(&xhci->lock, flags); @@ -1485,15 +1486,27 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) /* Make sure the URB hasn't completed or been unlinked already */ ret = usb_hcd_check_unlink_urb(hcd, urb, status); - if (ret || !urb->hcpriv) + if (ret) goto done; + + /* give back URB now if we can't queue it for cancel */ + vdev = xhci->devs[urb->dev->slot_id]; + urb_priv = urb->hcpriv; + if (!vdev || !urb_priv) + goto err_giveback; + + ep_index = xhci_get_endpoint_index(&urb->ep->desc); + ep = &vdev->eps[ep_index]; + ep_ring = xhci_urb_to_transfer_ring(xhci, urb); + if (!ep || !ep_ring) + goto err_giveback; + temp = readl(&xhci->op_regs->status); if (temp == 0xffffffff || (xhci->xhc_state & XHCI_STATE_HALTED)) { xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, "HW died, freeing TD."); - urb_priv = urb->hcpriv; for (i = urb_priv->num_tds_done; - i < urb_priv->num_tds && xhci->devs[urb->dev->slot_id]; + i < urb_priv->num_tds; i++) { td = &urb_priv->td[i]; if (!list_empty(&td->td_list)) @@ -1501,23 +1514,9 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) if (!list_empty(&td->cancelled_td_list)) list_del_init(&td->cancelled_td_list); } - - usb_hcd_unlink_urb_from_ep(hcd, urb); - spin_unlock_irqrestore(&xhci->lock, flags); - usb_hcd_giveback_urb(hcd, urb, -ESHUTDOWN); - xhci_urb_free_priv(urb_priv); - return ret; + goto err_giveback; } - ep_index = xhci_get_endpoint_index(&urb->ep->desc); - ep = &xhci->devs[urb->dev->slot_id]->eps[ep_index]; - ep_ring = xhci_urb_to_transfer_ring(xhci, urb); - if (!ep_ring) { - ret = -EINVAL; - goto done; - } - - urb_priv = urb->hcpriv; i = urb_priv->num_tds_done; if (i < urb_priv->num_tds) xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, @@ -1554,6 +1553,14 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) done: spin_unlock_irqrestore(&xhci->lock, flags); return ret; + +err_giveback: + if (urb_priv) + xhci_urb_free_priv(urb_priv); + usb_hcd_unlink_urb_from_ep(hcd, urb); + spin_unlock_irqrestore(&xhci->lock, flags); + usb_hcd_giveback_urb(hcd, urb, -ESHUTDOWN); + return ret; } /* Drop an endpoint from a new bandwidth configuration for this device. -- cgit v1.2.3 From a7f12a21f6b32bdd8d76d3af81eef9e72ce41ec0 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 28 Mar 2017 15:07:38 -0400 Subject: usb: phy: isp1301: Fix build warning when CONFIG_OF is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit fd567653bdb9 ("usb: phy: isp1301: Add OF device ID table") added an OF device ID table, but used the of_match_ptr() macro that will lead to a build warning if CONFIG_OF symbol is disabled: drivers/usb/phy//phy-isp1301.c:36:34: warning: ‘isp1301_of_match’ defined but not used [-Wunused-const-variable=] static const struct of_device_id isp1301_of_match[] = { ^~~~~~~~~~~~~~~~ Fixes: fd567653bdb9 ("usb: phy: isp1301: Add OF device ID table") Reported-by: Arnd Bergmann Signed-off-by: Javier Martinez Canillas Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/phy-isp1301.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/phy/phy-isp1301.c b/drivers/usb/phy/phy-isp1301.c index b3b33cf7ddf6..f333024660b4 100644 --- a/drivers/usb/phy/phy-isp1301.c +++ b/drivers/usb/phy/phy-isp1301.c @@ -136,7 +136,7 @@ static int isp1301_remove(struct i2c_client *client) static struct i2c_driver isp1301_driver = { .driver = { .name = DRV_NAME, - .of_match_table = of_match_ptr(isp1301_of_match), + .of_match_table = isp1301_of_match, }, .probe = isp1301_probe, .remove = isp1301_remove, -- cgit v1.2.3 From 77c1c03c5b8ef28e55bb0aff29b1e006037ca645 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Tue, 28 Mar 2017 22:59:25 +0800 Subject: netfilter: nfnetlink_queue: fix secctx memory leak We must call security_release_secctx to free the memory returned by security_secid_to_secctx, otherwise memory may be leaked forever. Fixes: ef493bd930ae ("netfilter: nfnetlink_queue: add security context information") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 3ee0b8a000a4..933509ebf3d3 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -443,7 +443,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, skb = alloc_skb(size, GFP_ATOMIC); if (!skb) { skb_tx_error(entskb); - return NULL; + goto nlmsg_failure; } nlh = nlmsg_put(skb, 0, 0, @@ -452,7 +452,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (!nlh) { skb_tx_error(entskb); kfree_skb(skb); - return NULL; + goto nlmsg_failure; } nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = entry->state.pf; @@ -598,12 +598,17 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, } nlh->nlmsg_len = skb->len; + if (seclen) + security_release_secctx(secdata, seclen); return skb; nla_put_failure: skb_tx_error(entskb); kfree_skb(skb); net_err_ratelimited("nf_queue: error creating packet message\n"); +nlmsg_failure: + if (seclen) + security_release_secctx(secdata, seclen); return NULL; } -- cgit v1.2.3 From 9e1764309f577a88a0d5250fea6a080a6ad43556 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 25 Mar 2017 11:32:43 +0000 Subject: drm/i915: Align "unfenced" tiled access on gen2, early gen3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Old devices have quite severe restrictions for using fences, and unlike more recent device (anything from Pineview onwards) we need to enforce those restrictions even for unfenced tiled access from the render pipeline. Fixes: 944397f04f24 ("drm/i915: Store required fence size/alignment for GGTT vma") Reported-by: Ville Syrjälä Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Joonas Lahtinen Cc: # v4.11-rc1+ Link: http://patchwork.freedesktop.org/patch/msgid/20170325113243.16438-1-chris@chris-wilson.co.uk Reviewed-by: Daniel Vetter Reviewed-by: Joonas Lahtinen Tested-by: Ville Syrjälä (cherry picked from commit f4ce766f28cd0efa0cb4d869a84905d573ef7e70) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +++- drivers/gpu/drm/i915/i915_pci.c | 5 +++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1e53c31b6826..46fcd8b7080a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -806,6 +806,7 @@ struct intel_csr { func(has_resource_streamer); \ func(has_runtime_pm); \ func(has_snoop); \ + func(unfenced_needs_alignment); \ func(cursor_needs_physical); \ func(hws_needs_physical); \ func(overlay_needs_physical); \ diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 30e0675fd7da..15a15d00a6bf 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -888,6 +888,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine, struct list_head ordered_vmas; struct list_head pinned_vmas; bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4; + bool needs_unfenced_map = INTEL_INFO(engine->i915)->unfenced_needs_alignment; int retry; vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; @@ -908,7 +909,8 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine, if (!has_fenced_gpu_access) entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; need_fence = - entry->flags & EXEC_OBJECT_NEEDS_FENCE && + (entry->flags & EXEC_OBJECT_NEEDS_FENCE || + needs_unfenced_map) && i915_gem_object_is_tiled(obj); need_mappable = need_fence || need_reloc_mappable(vma); diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index ecb487b5356f..9bbbd4e83e3c 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -60,6 +60,7 @@ .has_overlay = 1, .overlay_needs_physical = 1, \ .has_gmch_display = 1, \ .hws_needs_physical = 1, \ + .unfenced_needs_alignment = 1, \ .ring_mask = RENDER_RING, \ GEN_DEFAULT_PIPEOFFSETS, \ CURSOR_OFFSETS @@ -101,6 +102,7 @@ static const struct intel_device_info intel_i915g_info = { .platform = INTEL_I915G, .cursor_needs_physical = 1, .has_overlay = 1, .overlay_needs_physical = 1, .hws_needs_physical = 1, + .unfenced_needs_alignment = 1, }; static const struct intel_device_info intel_i915gm_info = { @@ -112,6 +114,7 @@ static const struct intel_device_info intel_i915gm_info = { .supports_tv = 1, .has_fbc = 1, .hws_needs_physical = 1, + .unfenced_needs_alignment = 1, }; static const struct intel_device_info intel_i945g_info = { @@ -120,6 +123,7 @@ static const struct intel_device_info intel_i945g_info = { .has_hotplug = 1, .cursor_needs_physical = 1, .has_overlay = 1, .overlay_needs_physical = 1, .hws_needs_physical = 1, + .unfenced_needs_alignment = 1, }; static const struct intel_device_info intel_i945gm_info = { @@ -130,6 +134,7 @@ static const struct intel_device_info intel_i945gm_info = { .supports_tv = 1, .has_fbc = 1, .hws_needs_physical = 1, + .unfenced_needs_alignment = 1, }; static const struct intel_device_info intel_g33_info = { -- cgit v1.2.3 From 4e5f713ffc202c49a4374897cb0d2b218b391ff7 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 27 Mar 2017 21:34:59 +0100 Subject: drm/i915/perf: destroy stream on sample_flags mismatch If we were to ever encounter a sample_flags mismatch we need to ensure we destroy the stream when we bail. Fixes: d79651522e89 ("drm/i915: Enable i915 perf stream for Haswell OA unit") Signed-off-by: Matthew Auld Cc: Robert Bragg Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170327203459.18398-1-matthew.auld@intel.com (cherry picked from commit 22f880ca8246c6c80c4f48731c6a7d5d15042f56) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_perf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index a1b7eec58be2..f8fcb317042e 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1705,7 +1705,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, */ if (WARN_ON(stream->sample_flags != props->sample_flags)) { ret = -ENODEV; - goto err_alloc; + goto err_flags; } list_add(&stream->link, &dev_priv->perf.streams); @@ -1728,6 +1728,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, err_open: list_del(&stream->link); +err_flags: if (stream->ops->destroy) stream->ops->destroy(stream); err_alloc: -- cgit v1.2.3 From aa62acfd63e7367872291c15290cb9c29d140926 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 27 Mar 2017 21:32:36 +0100 Subject: drm/i915/perf: remove user triggerable warn Don't throw a warning if we are given an invalid property id. While here let's also bring back Robert' original idea of catching unhandled enumeration values at compile time. Fixes: eec688e1420d ("drm/i915: Add i915 perf infrastructure") Signed-off-by: Matthew Auld Cc: Robert Bragg Reviewed-by: Robert Bragg Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170327203236.18276-1-matthew.auld@intel.com (cherry picked from commit 0a309f9e3dfaa4f5db0bf1b0cab54571744b491a) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_perf.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index f8fcb317042e..70964ca9251e 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1794,6 +1794,11 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, if (ret) return ret; + if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) { + DRM_DEBUG("Unknown i915 perf property ID\n"); + return -EINVAL; + } + switch ((enum drm_i915_perf_property_id)id) { case DRM_I915_PERF_PROP_CTX_HANDLE: props->single_context = 1; @@ -1863,9 +1868,8 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, props->oa_periodic = true; props->oa_period_exponent = value; break; - default: + case DRM_I915_PERF_PROP_MAX: MISSING_CASE(id); - DRM_DEBUG("Unknown i915 perf property ID\n"); return -EINVAL; } -- cgit v1.2.3 From 7d65f82954dadbbe7b6e1aec7e07ad17bc6d958b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 29 Mar 2017 14:15:24 +0200 Subject: mac80211: unconditionally start new netdev queues with iTXQ support When internal mac80211 TXQs aren't supported, netdev queues must always started out started even when driver queues are stopped while the interface is added. This is necessary because with the internal TXQ support netdev queues are never stopped and packet scheduling/dropping is done in mac80211. Cc: stable@vger.kernel.org # 4.9+ Fixes: 80a83cfc434b1 ("mac80211: skip netdev queue control with software queuing") Reported-and-tested-by: Sven Eckelmann Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 40813dd3301c..5bb0c5012819 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -718,7 +718,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) ieee80211_recalc_ps(local); if (sdata->vif.type == NL80211_IFTYPE_MONITOR || - sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { + sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + local->ops->wake_tx_queue) { /* XXX: for AP_VLAN, actually track AP queues */ netif_tx_start_all_queues(dev); } else if (dev) { -- cgit v1.2.3 From 9d0d1c8b1c9d80b17cfa86ecd50c8933a742585c Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 24 Mar 2017 15:04:50 -0700 Subject: Btrfs: bring back repair during read Commit 20a7db8ab3f2 ("btrfs: add dummy callback for readpage_io_failed and drop checks") made a cleanup around readpage_io_failed_hook, and it was supposed to keep the original sematics, but it also unexpectedly disabled repair during read for dup, raid1 and raid10. This fixes the problem by letting data's inode call the generic readpage_io_failed callback by returning -EAGAIN from its readpage_io_failed_hook in order to notify end_bio_extent_readpage to do the rest. We don't call it directly because the generic one takes an offset from end_bio_extent_readpage() to calculate the index in the checksum array and inode's readpage_io_failed_hook doesn't offer that offset. Cc: David Sterba Signed-off-by: Liu Bo Reviewed-by: David Sterba [ keep the const function attribute ] Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 46 ++++++++++++++++++++++++++++------------------ fs/btrfs/inode.c | 6 +++--- 2 files changed, 31 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8df797432740..27fdb250b446 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2584,26 +2584,36 @@ static void end_bio_extent_readpage(struct bio *bio) if (tree->ops) { ret = tree->ops->readpage_io_failed_hook(page, mirror); - if (!ret && !bio->bi_error) - uptodate = 1; - } else { + if (ret == -EAGAIN) { + /* + * Data inode's readpage_io_failed_hook() always + * returns -EAGAIN. + * + * The generic bio_readpage_error handles errors + * the following way: If possible, new read + * requests are created and submitted and will + * end up in end_bio_extent_readpage as well (if + * we're lucky, not in the !uptodate case). In + * that case it returns 0 and we just go on with + * the next page in our bio. If it can't handle + * the error it will return -EIO and we remain + * responsible for that page. + */ + ret = bio_readpage_error(bio, offset, page, + start, end, mirror); + if (ret == 0) { + uptodate = !bio->bi_error; + offset += len; + continue; + } + } + /* - * The generic bio_readpage_error handles errors the - * following way: If possible, new read requests are - * created and submitted and will end up in - * end_bio_extent_readpage as well (if we're lucky, not - * in the !uptodate case). In that case it returns 0 and - * we just go on with the next page in our bio. If it - * can't handle the error it will return -EIO and we - * remain responsible for that page. + * metadata's readpage_io_failed_hook() always returns + * -EIO and fixes nothing. -EIO is also returned if + * data inode error could not be fixed. */ - ret = bio_readpage_error(bio, offset, page, start, end, - mirror); - if (ret == 0) { - uptodate = !bio->bi_error; - offset += len; - continue; - } + ASSERT(ret == -EIO); } readpage_ok: if (likely(uptodate)) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e57191072aa3..876f1d36030c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -10523,9 +10523,9 @@ out_inode: } __attribute__((const)) -static int dummy_readpage_io_failed_hook(struct page *page, int failed_mirror) +static int btrfs_readpage_io_failed_hook(struct page *page, int failed_mirror) { - return 0; + return -EAGAIN; } static const struct inode_operations btrfs_dir_inode_operations = { @@ -10570,7 +10570,7 @@ static const struct extent_io_ops btrfs_extent_io_ops = { .submit_bio_hook = btrfs_submit_bio_hook, .readpage_end_io_hook = btrfs_readpage_end_io_hook, .merge_bio_hook = btrfs_merge_bio_hook, - .readpage_io_failed_hook = dummy_readpage_io_failed_hook, + .readpage_io_failed_hook = btrfs_readpage_io_failed_hook, /* optional callbacks */ .fill_delalloc = run_delalloc_range, -- cgit v1.2.3 From ce0dcee626c482183b42d45b6ea43198c7223fc7 Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Tue, 14 Mar 2017 05:25:09 -0500 Subject: btrfs: Change qgroup_meta_rsv to 64bit Using an int value is causing qg->reserved to become negative and exclusive -EDQUOT to be reached prematurely. This affects exclusive qgroups only. TEST CASE: DEVICE=/dev/vdb MOUNTPOINT=/mnt SUBVOL=$MOUNTPOINT/tmp umount $SUBVOL umount $MOUNTPOINT mkfs.btrfs -f $DEVICE mount /dev/vdb $MOUNTPOINT btrfs quota enable $MOUNTPOINT btrfs subvol create $SUBVOL umount $MOUNTPOINT mount /dev/vdb $MOUNTPOINT mount -o subvol=tmp $DEVICE $SUBVOL btrfs qgroup limit -e 3G $SUBVOL btrfs quota rescan /mnt -w for i in `seq 1 44000`; do dd if=/dev/zero of=/mnt/tmp/test_$i bs=10k count=1 if [[ $? > 0 ]]; then btrfs qgroup show -pcref $SUBVOL exit 1 fi done Signed-off-by: Goldwyn Rodrigues [ add reproducer to changelog ] Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 2 +- fs/btrfs/disk-io.c | 2 +- fs/btrfs/qgroup.c | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f03c2f285eb1..660d485b6e8b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1258,7 +1258,7 @@ struct btrfs_root { atomic_t will_be_snapshoted; /* For qgroup metadata space reserve */ - atomic_t qgroup_meta_rsv; + atomic64_t qgroup_meta_rsv; }; static inline u32 btrfs_inode_sectorsize(const struct inode *inode) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 73fdc6bdaea9..982c56f79515 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1342,7 +1342,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, atomic_set(&root->orphan_inodes, 0); atomic_set(&root->refs, 1); atomic_set(&root->will_be_snapshoted, 0); - atomic_set(&root->qgroup_meta_rsv, 0); + atomic64_set(&root->qgroup_meta_rsv, 0); root->log_transid = 0; root->log_transid_committed = -1; root->last_log_commit = 0; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index a5da750c1087..a59801dc2a34 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2948,20 +2948,20 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, ret = qgroup_reserve(root, num_bytes, enforce); if (ret < 0) return ret; - atomic_add(num_bytes, &root->qgroup_meta_rsv); + atomic64_add(num_bytes, &root->qgroup_meta_rsv); return ret; } void btrfs_qgroup_free_meta_all(struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; - int reserved; + u64 reserved; if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) || !is_fstree(root->objectid)) return; - reserved = atomic_xchg(&root->qgroup_meta_rsv, 0); + reserved = atomic64_xchg(&root->qgroup_meta_rsv, 0); if (reserved == 0) return; btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved); @@ -2976,8 +2976,8 @@ void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes) return; BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize)); - WARN_ON(atomic_read(&root->qgroup_meta_rsv) < num_bytes); - atomic_sub(num_bytes, &root->qgroup_meta_rsv); + WARN_ON(atomic64_read(&root->qgroup_meta_rsv) < num_bytes); + atomic64_sub(num_bytes, &root->qgroup_meta_rsv); btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes); } -- cgit v1.2.3 From 457ae7268b29c33dee1c0feb143a15f6029d177b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 17 Mar 2017 23:51:20 +0300 Subject: Btrfs: fix an integer overflow check This isn't super serious because you need CAP_ADMIN to run this code. I added this integer overflow check last year but apparently I am rubbish at writing integer overflow checks... There are two issues. First, access_ok() works on unsigned long type and not u64 so on 32 bit systems the access_ok() could be checking a truncated size. The other issue is that we should be using a stricter limit so we don't overflow the kzalloc() setting ctx->clone_roots later in the function after the access_ok(): alloc_size = sizeof(struct clone_root) * (arg->clone_sources_count + 1); sctx->clone_roots = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN); Fixes: f5ecec3ce21f ("btrfs: send: silence an integer overflow warning") Signed-off-by: Dan Carpenter Reviewed-by: David Sterba [ added comment ] Signed-off-by: David Sterba --- fs/btrfs/send.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 456c8901489b..a60d5bfb8a49 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -6305,8 +6305,13 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) goto out; } + /* + * Check that we don't overflow at later allocations, we request + * clone_sources_count + 1 items, and compare to unsigned long inside + * access_ok. + */ if (arg->clone_sources_count > - ULLONG_MAX / sizeof(*arg->clone_sources)) { + ULONG_MAX / sizeof(struct clone_root) - 1) { ret = -EINVAL; goto out; } -- cgit v1.2.3 From dd68f2ba0720e76c3a5bfa3f639c546f926792f5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 29 Mar 2017 13:13:15 +0100 Subject: drm/i915/execlists: Wrap tail pointer after reset tweaking If the request->wa_tail is 0 (because it landed exactly on the end of the ringbuffer), when we reconstruct request->tail following a reset we fill in an illegal value (-8 or 0x001ffff8). As a result, RING_HEAD is never able to catch up with RING_TAIL and the GPU spins endlessly. If the ring contains a couple of breadcrumbs, even our hangcheck is unable to catch the busy-looping as the ACTHD and seqno continually advance. v2: Move the wrap into a common intel_ring_wrap(). Fixes: a3aabe86a340 ("drm/i915/execlists: Reinitialise context image after GPU hang") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: # v4.10+ Link: http://patchwork.freedesktop.org/patch/msgid/20170327130009.4678-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala (cherry picked from commit 450362d3fe866b14304f309b5fffba0c33fbfbc3) Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170329121315.1290-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 4 +++- drivers/gpu/drm/i915/intel_ringbuffer.h | 8 +++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 471af3b480ad..91555d4e9129 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1440,7 +1440,9 @@ static void reset_common_ring(struct intel_engine_cs *engine, GEM_BUG_ON(request->ctx != port[0].request->ctx); /* Reset WaIdleLiteRestore:bdw,skl as well */ - request->tail = request->wa_tail - WA_TAIL_DWORDS * sizeof(u32); + request->tail = + intel_ring_wrap(request->ring, + request->wa_tail - WA_TAIL_DWORDS*sizeof(u32)); } static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 13dccb18cd43..8cb2078c5bfc 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -521,11 +521,17 @@ static inline void intel_ring_advance(struct intel_ring *ring) */ } +static inline u32 +intel_ring_wrap(const struct intel_ring *ring, u32 pos) +{ + return pos & (ring->size - 1); +} + static inline u32 intel_ring_offset(struct intel_ring *ring, void *addr) { /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ u32 offset = addr - ring->vaddr; - return offset & (ring->size - 1); + return intel_ring_wrap(ring, offset); } int __intel_ring_space(int head, int tail, int size); -- cgit v1.2.3 From f3cd1b064f1179d9e6188c6d67297a2360880e10 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 22 Mar 2017 12:07:23 +0100 Subject: drm/etnaviv: (re-)protect fence allocation with GPU mutex The fence allocation needs to be protected by the GPU mutex, otherwise the fence seqnos of concurrent submits might not match the insertion order of the jobs in the kernel ring. This breaks the assumption that jobs complete with monotonically increasing fence seqnos. Fixes: d9853490176c (drm/etnaviv: take GPU lock later in the submit process) CC: stable@vger.kernel.org #4.9+ Signed-off-by: Lucas Stach --- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 130d7d517a19..da48819ff2e6 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -1311,6 +1311,8 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, goto out_pm_put; } + mutex_lock(&gpu->lock); + fence = etnaviv_gpu_fence_alloc(gpu); if (!fence) { event_free(gpu, event); @@ -1318,8 +1320,6 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, goto out_pm_put; } - mutex_lock(&gpu->lock); - gpu->event[event].fence = fence; submit->fence = fence->seqno; gpu->active_fence = submit->fence; -- cgit v1.2.3 From 677e806da4d916052585301785d847c3b3e6186a Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Wed, 22 Mar 2017 07:29:31 +0000 Subject: xfrm_user: validate XFRM_MSG_NEWAE XFRMA_REPLAY_ESN_VAL replay_window When a new xfrm state is created during an XFRM_MSG_NEWSA call we validate the user supplied replay_esn to ensure that the size is valid and to ensure that the replay_window size is within the allocated buffer. However later it is possible to update this replay_esn via a XFRM_MSG_NEWAE call. There we again validate the size of the supplied buffer matches the existing state and if so inject the contents. We do not at this point check that the replay_window is within the allocated memory. This leads to out-of-bounds reads and writes triggered by netlink packets. This leads to memory corruption and the potential for priviledge escalation. We already attempt to validate the incoming replay information in xfrm_new_ae() via xfrm_replay_verify_len(). This confirms that the user is not trying to change the size of the replay state buffer which includes the replay_esn. It however does not check the replay_window remains within that buffer. Add validation of the contained replay_window. CVE-2017-7184 Signed-off-by: Andy Whitcroft Acked-by: Steffen Klassert Signed-off-by: Linus Torvalds --- net/xfrm/xfrm_user.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 9705c279494b..cdf887fa61d5 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -415,6 +415,9 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen) return -EINVAL; + if (up->replay_window > up->bmp_len * sizeof(__u32) * 8) + return -EINVAL; + return 0; } -- cgit v1.2.3 From f843ee6dd019bcece3e74e76ad9df0155655d0df Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Thu, 23 Mar 2017 07:45:44 +0000 Subject: xfrm_user: validate XFRM_MSG_NEWAE incoming ESN size harder Kees Cook has pointed out that xfrm_replay_state_esn_len() is subject to wrapping issues. To ensure we are correctly ensuring that the two ESN structures are the same size compare both the overall size as reported by xfrm_replay_state_esn_len() and the internal length are the same. CVE-2017-7184 Signed-off-by: Andy Whitcroft Acked-by: Steffen Klassert Signed-off-by: Linus Torvalds --- net/xfrm/xfrm_user.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index cdf887fa61d5..40a8aa39220d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -412,7 +412,11 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es up = nla_data(rp); ulen = xfrm_replay_state_esn_len(up); - if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen) + /* Check the overall length and the internal bitmap length to avoid + * potential overflow. */ + if (nla_len(rp) < ulen || + xfrm_replay_state_esn_len(replay_esn) != ulen || + replay_esn->bmp_len != up->bmp_len) return -EINVAL; if (up->replay_window > up->bmp_len * sizeof(__u32) * 8) -- cgit v1.2.3 From fb411b837b587a32046dc4f369acb93a10b1def8 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 27 Mar 2017 15:10:53 +0100 Subject: c6x/ptrace: Remove useless PTRACE_SETREGSET implementation gpr_set won't work correctly and can never have been tested, and the correct behaviour is not clear due to the endianness-dependent task layout. So, just remove it. The core code will now return -EOPNOTSUPPORT when trying to set NT_PRSTATUS on this architecture until/unless a correct implementation is supplied. Signed-off-by: Dave Martin Signed-off-by: Linus Torvalds --- arch/c6x/kernel/ptrace.c | 41 ----------------------------------------- 1 file changed, 41 deletions(-) diff --git a/arch/c6x/kernel/ptrace.c b/arch/c6x/kernel/ptrace.c index 3c494e84444d..a511ac16a8e3 100644 --- a/arch/c6x/kernel/ptrace.c +++ b/arch/c6x/kernel/ptrace.c @@ -69,46 +69,6 @@ static int gpr_get(struct task_struct *target, 0, sizeof(*regs)); } -static int gpr_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - struct pt_regs *regs = task_pt_regs(target); - - /* Don't copyin TSR or CSR */ - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - ®s, - 0, PT_TSR * sizeof(long)); - if (ret) - return ret; - - ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, - PT_TSR * sizeof(long), - (PT_TSR + 1) * sizeof(long)); - if (ret) - return ret; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - ®s, - (PT_TSR + 1) * sizeof(long), - PT_CSR * sizeof(long)); - if (ret) - return ret; - - ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, - PT_CSR * sizeof(long), - (PT_CSR + 1) * sizeof(long)); - if (ret) - return ret; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - ®s, - (PT_CSR + 1) * sizeof(long), -1); - return ret; -} - enum c6x_regset { REGSET_GPR, }; @@ -120,7 +80,6 @@ static const struct user_regset c6x_regsets[] = { .size = sizeof(u32), .align = sizeof(u32), .get = gpr_get, - .set = gpr_set }, }; -- cgit v1.2.3 From 502585c7555083d4a949c08350306b9ec196779e Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 27 Mar 2017 15:10:54 +0100 Subject: h8300/ptrace: Fix incorrect register transfer count regs_set() and regs_get() are vulnerable to an off-by-1 buffer overrun if CONFIG_CPU_H8S is set, since this adds an extra entry to register_offset[] but not to user_regs_struct. So, iterate over user_regs_struct based on its actual size, not based on the length of register_offset[]. Signed-off-by: Dave Martin Signed-off-by: Linus Torvalds --- arch/h8300/kernel/ptrace.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/h8300/kernel/ptrace.c b/arch/h8300/kernel/ptrace.c index 92075544a19a..0dc1c8f622bc 100644 --- a/arch/h8300/kernel/ptrace.c +++ b/arch/h8300/kernel/ptrace.c @@ -95,7 +95,8 @@ static int regs_get(struct task_struct *target, long *reg = (long *)®s; /* build user regs in buffer */ - for (r = 0; r < ARRAY_SIZE(register_offset); r++) + BUILD_BUG_ON(sizeof(regs) % sizeof(long) != 0); + for (r = 0; r < sizeof(regs) / sizeof(long); r++) *reg++ = h8300_get_reg(target, r); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, @@ -113,7 +114,8 @@ static int regs_set(struct task_struct *target, long *reg; /* build user regs in buffer */ - for (reg = (long *)®s, r = 0; r < ARRAY_SIZE(register_offset); r++) + BUILD_BUG_ON(sizeof(regs) % sizeof(long) != 0); + for (reg = (long *)®s, r = 0; r < sizeof(regs) / sizeof(long); r++) *reg++ = h8300_get_reg(target, r); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, @@ -122,7 +124,7 @@ static int regs_set(struct task_struct *target, return ret; /* write back to pt_regs */ - for (reg = (long *)®s, r = 0; r < ARRAY_SIZE(register_offset); r++) + for (reg = (long *)®s, r = 0; r < sizeof(regs) / sizeof(long); r++) h8300_put_reg(target, r, *reg++); return 0; } -- cgit v1.2.3 From a78ce80d2c9178351b34d78fec805140c29c193e Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 27 Mar 2017 15:10:55 +0100 Subject: metag/ptrace: Preserve previous registers for short regset write Ensure that if userspace supplies insufficient data to PTRACE_SETREGSET to fill all the registers, the thread's old registers are preserved. Signed-off-by: Dave Martin Acked-by: James Hogan Signed-off-by: Linus Torvalds --- arch/metag/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/metag/kernel/ptrace.c b/arch/metag/kernel/ptrace.c index 7563628822bd..ae659ba61948 100644 --- a/arch/metag/kernel/ptrace.c +++ b/arch/metag/kernel/ptrace.c @@ -303,7 +303,7 @@ static int metag_tls_set(struct task_struct *target, const void *kbuf, const void __user *ubuf) { int ret; - void __user *tls; + void __user *tls = target->thread.tls_ptr; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); if (ret) -- cgit v1.2.3 From 5fe81fe98123ce41265c65e95d34418d30d005d1 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 27 Mar 2017 15:10:56 +0100 Subject: metag/ptrace: Provide default TXSTATUS for short NT_PRSTATUS Ensure that if userspace supplies insufficient data to PTRACE_SETREGSET to fill TXSTATUS, a well-defined default value is used, based on the task's current value. Suggested-by: James Hogan Signed-off-by: Dave Martin Signed-off-by: Linus Torvalds --- arch/metag/kernel/ptrace.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/metag/kernel/ptrace.c b/arch/metag/kernel/ptrace.c index ae659ba61948..2e4dfc15abd3 100644 --- a/arch/metag/kernel/ptrace.c +++ b/arch/metag/kernel/ptrace.c @@ -24,6 +24,16 @@ * user_regset definitions. */ +static unsigned long user_txstatus(const struct pt_regs *regs) +{ + unsigned long data = (unsigned long)regs->ctx.Flags; + + if (regs->ctx.SaveMask & TBICTX_CBUF_BIT) + data |= USER_GP_REGS_STATUS_CATCH_BIT; + + return data; +} + int metag_gp_regs_copyout(const struct pt_regs *regs, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) @@ -62,9 +72,7 @@ int metag_gp_regs_copyout(const struct pt_regs *regs, if (ret) goto out; /* TXSTATUS */ - data = (unsigned long)regs->ctx.Flags; - if (regs->ctx.SaveMask & TBICTX_CBUF_BIT) - data |= USER_GP_REGS_STATUS_CATCH_BIT; + data = user_txstatus(regs); ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &data, 4*25, 4*26); if (ret) @@ -119,6 +127,7 @@ int metag_gp_regs_copyin(struct pt_regs *regs, if (ret) goto out; /* TXSTATUS */ + data = user_txstatus(regs); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &data, 4*25, 4*26); if (ret) -- cgit v1.2.3 From 7195ee3120d878259e8d94a5d9f808116f34d5ea Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 27 Mar 2017 15:10:57 +0100 Subject: metag/ptrace: Reject partial NT_METAG_RPIPE writes It's not clear what behaviour is sensible when doing partial write of NT_METAG_RPIPE, so just don't bother. This patch assumes that userspace will never rely on a partial SETREGSET in this case, since it's not clear what should happen anyway. Signed-off-by: Dave Martin Acked-by: James Hogan Signed-off-by: Linus Torvalds --- arch/metag/kernel/ptrace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/metag/kernel/ptrace.c b/arch/metag/kernel/ptrace.c index 2e4dfc15abd3..5e2dc7defd2c 100644 --- a/arch/metag/kernel/ptrace.c +++ b/arch/metag/kernel/ptrace.c @@ -253,6 +253,8 @@ int metag_rp_state_copyin(struct pt_regs *regs, unsigned long long *ptr; int ret, i; + if (count < 4*13) + return -EINVAL; /* Read the entire pipeline before making any changes */ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &rp, 0, 4*13); -- cgit v1.2.3 From d614fd58a2834cfe4efa472c33c8f3ce2338b09b Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 27 Mar 2017 15:10:58 +0100 Subject: mips/ptrace: Preserve previous registers for short regset write Ensure that if userspace supplies insufficient data to PTRACE_SETREGSET to fill all the registers, the thread's old registers are preserved. Signed-off-by: Dave Martin Signed-off-by: Linus Torvalds --- arch/mips/kernel/ptrace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index c8ba26072132..5d2498eb2340 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -485,7 +485,8 @@ static int fpr_set(struct task_struct *target, &target->thread.fpu, 0, sizeof(elf_fpregset_t)); - for (i = 0; i < NUM_FPU_REGS; i++) { + BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t)); + for (i = 0; i < NUM_FPU_REGS && count >= sizeof(elf_fpreg_t); i++) { err = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpr_val, i * sizeof(elf_fpreg_t), (i + 1) * sizeof(elf_fpreg_t)); -- cgit v1.2.3 From d3805c546b275c8cc7d40f759d029ae92c7175f2 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 27 Mar 2017 15:10:59 +0100 Subject: sparc/ptrace: Preserve previous registers for short regset write Ensure that if userspace supplies insufficient data to PTRACE_SETREGSET to fill all the registers, the thread's old registers are preserved. Signed-off-by: Dave Martin Acked-by: David S. Miller Signed-off-by: Linus Torvalds --- arch/sparc/kernel/ptrace_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c index 901063c1cf7e..341129a40e94 100644 --- a/arch/sparc/kernel/ptrace_64.c +++ b/arch/sparc/kernel/ptrace_64.c @@ -350,7 +350,7 @@ static int genregs64_set(struct task_struct *target, } if (!ret) { - unsigned long y; + unsigned long y = regs->y; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &y, -- cgit v1.2.3 From 94d7ee0baa8b764cf64ad91ed69464c1a6a0066b Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 29 Mar 2017 08:44:59 +0200 Subject: l2tp: hold tunnel socket when handling control frames in l2tp_ip and l2tp_ip6 The code following l2tp_tunnel_find() expects that a new reference is held on sk. Either sk_receive_skb() or the discard_put error path will drop a reference from the tunnel's socket. This issue exists in both l2tp_ip and l2tp_ip6. Fixes: a3c18422a4b4 ("l2tp: hold socket before dropping lock in l2tp_ip{, 6}_recv()") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 5 +++-- net/l2tp/l2tp_ip6.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index d25038cfd64e..7208fbe5856b 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -178,9 +178,10 @@ pass_up: tunnel_id = ntohl(*(__be32 *) &skb->data[4]); tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel != NULL) + if (tunnel) { sk = tunnel->sock; - else { + sock_hold(sk); + } else { struct iphdr *iph = (struct iphdr *) skb_network_header(skb); read_lock_bh(&l2tp_ip_lock); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index a4abcbc4c09a..516d7ce24ba7 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -191,9 +191,10 @@ pass_up: tunnel_id = ntohl(*(__be32 *) &skb->data[4]); tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel != NULL) + if (tunnel) { sk = tunnel->sock; - else { + sock_hold(sk); + } else { struct ipv6hdr *iph = ipv6_hdr(skb); read_lock_bh(&l2tp_ip6_lock); -- cgit v1.2.3 From e91793bb615cf6cdd59c0b6749fe173687bb0947 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 29 Mar 2017 08:45:29 +0200 Subject: l2tp: purge socket queues in the .destruct() callback The Rx path may grab the socket right before pppol2tp_release(), but nothing guarantees that it will enqueue packets before skb_queue_purge(). Therefore, the socket can be destroyed without its queues fully purged. Fix this by purging queues in pppol2tp_session_destruct() where we're guaranteed nothing is still referencing the socket. Fixes: 9e9cb6221aa7 ("l2tp: fix userspace reception on plain L2TP sockets") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 36cc56fd0418..123b6a2411a0 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -450,6 +450,10 @@ static void pppol2tp_session_close(struct l2tp_session *session) static void pppol2tp_session_destruct(struct sock *sk) { struct l2tp_session *session = sk->sk_user_data; + + skb_queue_purge(&sk->sk_receive_queue); + skb_queue_purge(&sk->sk_write_queue); + if (session) { sk->sk_user_data = NULL; BUG_ON(session->magic != L2TP_SESSION_MAGIC); @@ -488,9 +492,6 @@ static int pppol2tp_release(struct socket *sock) l2tp_session_queue_purge(session); sock_put(sk); } - skb_queue_purge(&sk->sk_receive_queue); - skb_queue_purge(&sk->sk_write_queue); - release_sock(sk); /* This will delete the session context via -- cgit v1.2.3 From 916a008b4b8ecc02fbd035cfb133773dba1ff3d7 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 29 Mar 2017 17:12:47 +0100 Subject: ARM: dma-mapping: disallow dma_get_sgtable() for non-kernel managed memory dma_get_sgtable() tries to create a scatterlist table containing valid struct page pointers for the coherent memory allocation passed in to it. However, memory can be declared via dma_declare_coherent_memory(), or via other reservation schemes which means that coherent memory is not guaranteed to be backed by struct pages. In such cases, the resulting scatterlist table contains pointers to invalid pages, which causes kernel oops later. This patch adds detection of such memory, and refuses to create a scatterlist table for such memory. Reported-by: Shuah Khan Signed-off-by: Russell King --- arch/arm/mm/dma-mapping.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 63eabb06f9f1..475811f5383a 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -935,13 +935,31 @@ static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_add __arm_dma_free(dev, size, cpu_addr, handle, attrs, true); } +/* + * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems + * that the intention is to allow exporting memory allocated via the + * coherent DMA APIs through the dma_buf API, which only accepts a + * scattertable. This presents a couple of problems: + * 1. Not all memory allocated via the coherent DMA APIs is backed by + * a struct page + * 2. Passing coherent DMA memory into the streaming APIs is not allowed + * as we will try to flush the memory through a different alias to that + * actually being used (and the flushes are redundant.) + */ int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t handle, size_t size, unsigned long attrs) { - struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); + unsigned long pfn = dma_to_pfn(dev, handle); + struct page *page; int ret; + /* If the PFN is not valid, we do not have a struct page */ + if (!pfn_valid(pfn)) + return -ENXIO; + + page = pfn_to_page(pfn); + ret = sg_alloc_table(sgt, 1, GFP_KERNEL); if (unlikely(ret)) return ret; -- cgit v1.2.3 From 3cc070c1c81948b33ebe2ea68cd39307ce2b312d Mon Sep 17 00:00:00 2001 From: afzal mohammed Date: Thu, 23 Mar 2017 13:49:32 +0100 Subject: ARM: 8665/1: nommu: access ID_PFR1 only if CPUID scheme Greg upon trying to boot no-MMU Kernel on ARM926EJ reported boot failure. He root caused it to ID_PFR1 access introduced by the commit mentioned in the fixes tag below. All CP15 processors need not have processor feature registers, only for architectures defined by CPUID scheme would have it. Hence check for it before accessing processor feature register, ID_PFR1. Fixes: f8300a0b5de0 ("ARM: 8647/2: nommu: dynamic exception base address setting") Reported-by: Greg Ungerer Signed-off-by: afzal mohammed Tested-by: Greg Ungerer Signed-off-by: Russell King --- arch/arm/mm/nommu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 3b5c7aaf9c76..33a45bd96860 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -303,7 +303,10 @@ static inline void set_vbar(unsigned long val) */ static inline bool security_extensions_enabled(void) { - return !!cpuid_feature_extract(CPUID_EXT_PFR1, 4); + /* Check CPUID Identification Scheme before ID_PFR1 read */ + if ((read_cpuid_id() & 0x000f0000) == 0x000f0000) + return !!cpuid_feature_extract(CPUID_EXT_PFR1, 4); + return 0; } static unsigned long __init setup_vectors_base(void) -- cgit v1.2.3 From 554bfeceb8a22d448cd986fc9efce25e833278a1 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 29 Mar 2017 21:41:05 +0200 Subject: parisc: Fix access fault handling in pa_memcpy() pa_memcpy() is the major memcpy implementation in the parisc kernel which is used to do any kind of userspace/kernel memory copies. Al Viro noticed various bugs in the implementation of pa_mempcy(), most notably that in case of faults it may report back to have copied more bytes than it actually did. Fixing those bugs is quite hard in the C-implementation, because the compiler is messing around with the registers and we are not guaranteed that specific variables are always in the same processor registers. This makes proper fault handling complicated. This patch implements pa_memcpy() in assembler. That way we have correct fault handling and adding a 64-bit copy routine was quite easy. Runtime tested with 32- and 64bit kernels. Reported-by: Al Viro Cc: # v4.9+ Signed-off-by: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/lib/lusercopy.S | 318 ++++++++++++++++++++++++++++++ arch/parisc/lib/memcpy.c | 461 +------------------------------------------- 2 files changed, 321 insertions(+), 458 deletions(-) diff --git a/arch/parisc/lib/lusercopy.S b/arch/parisc/lib/lusercopy.S index 56845de6b5df..f01188c044ee 100644 --- a/arch/parisc/lib/lusercopy.S +++ b/arch/parisc/lib/lusercopy.S @@ -5,6 +5,8 @@ * Copyright (C) 2000 Richard Hirst * Copyright (C) 2001 Matthieu Delahaye * Copyright (C) 2003 Randolph Chung + * Copyright (C) 2017 Helge Deller + * Copyright (C) 2017 John David Anglin * * * This program is free software; you can redistribute it and/or modify @@ -132,4 +134,320 @@ ENDPROC_CFI(lstrnlen_user) .procend + + +/* + * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) + * + * Inputs: + * - sr1 already contains space of source region + * - sr2 already contains space of destination region + * + * Returns: + * - number of bytes that could not be copied. + * On success, this will be zero. + * + * This code is based on a C-implementation of a copy routine written by + * Randolph Chung, which in turn was derived from the glibc. + * + * Several strategies are tried to try to get the best performance for various + * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes + * at a time using general registers. Unaligned copies are handled either by + * aligning the destination and then using shift-and-write method, or in a few + * cases by falling back to a byte-at-a-time copy. + * + * Testing with various alignments and buffer sizes shows that this code is + * often >10x faster than a simple byte-at-a-time copy, even for strangely + * aligned operands. It is interesting to note that the glibc version of memcpy + * (written in C) is actually quite fast already. This routine is able to beat + * it by 30-40% for aligned copies because of the loop unrolling, but in some + * cases the glibc version is still slightly faster. This lends more + * credibility that gcc can generate very good code as long as we are careful. + * + * Possible optimizations: + * - add cache prefetching + * - try not to use the post-increment address modifiers; they may create + * additional interlocks. Assumption is that those were only efficient on old + * machines (pre PA8000 processors) + */ + + dst = arg0 + src = arg1 + len = arg2 + end = arg3 + t1 = r19 + t2 = r20 + t3 = r21 + t4 = r22 + srcspc = sr1 + dstspc = sr2 + + t0 = r1 + a1 = t1 + a2 = t2 + a3 = t3 + a0 = t4 + + save_src = ret0 + save_dst = ret1 + save_len = r31 + +ENTRY_CFI(pa_memcpy) + .proc + .callinfo NO_CALLS + .entry + + /* Last destination address */ + add dst,len,end + + /* short copy with less than 16 bytes? */ + cmpib,>>=,n 15,len,.Lbyte_loop + + /* same alignment? */ + xor src,dst,t0 + extru t0,31,2,t1 + cmpib,<>,n 0,t1,.Lunaligned_copy + +#ifdef CONFIG_64BIT + /* only do 64-bit copies if we can get aligned. */ + extru t0,31,3,t1 + cmpib,<>,n 0,t1,.Lalign_loop32 + + /* loop until we are 64-bit aligned */ +.Lalign_loop64: + extru dst,31,3,t1 + cmpib,=,n 0,t1,.Lcopy_loop_16 +20: ldb,ma 1(srcspc,src),t1 +21: stb,ma t1,1(dstspc,dst) + b .Lalign_loop64 + ldo -1(len),len + + ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) + ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) + + ldi 31,t0 +.Lcopy_loop_16: + cmpb,COND(>>=),n t0,len,.Lword_loop + +10: ldd 0(srcspc,src),t1 +11: ldd 8(srcspc,src),t2 + ldo 16(src),src +12: std,ma t1,8(dstspc,dst) +13: std,ma t2,8(dstspc,dst) +14: ldd 0(srcspc,src),t1 +15: ldd 8(srcspc,src),t2 + ldo 16(src),src +16: std,ma t1,8(dstspc,dst) +17: std,ma t2,8(dstspc,dst) + + ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) + ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault) + ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done) + ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done) + ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done) + ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault) + ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done) + ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done) + + b .Lcopy_loop_16 + ldo -32(len),len + +.Lword_loop: + cmpib,COND(>>=),n 3,len,.Lbyte_loop +20: ldw,ma 4(srcspc,src),t1 +21: stw,ma t1,4(dstspc,dst) + b .Lword_loop + ldo -4(len),len + + ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) + ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) + +#endif /* CONFIG_64BIT */ + + /* loop until we are 32-bit aligned */ +.Lalign_loop32: + extru dst,31,2,t1 + cmpib,=,n 0,t1,.Lcopy_loop_4 +20: ldb,ma 1(srcspc,src),t1 +21: stb,ma t1,1(dstspc,dst) + b .Lalign_loop32 + ldo -1(len),len + + ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) + ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) + + +.Lcopy_loop_4: + cmpib,COND(>>=),n 15,len,.Lbyte_loop + +10: ldw 0(srcspc,src),t1 +11: ldw 4(srcspc,src),t2 +12: stw,ma t1,4(dstspc,dst) +13: stw,ma t2,4(dstspc,dst) +14: ldw 8(srcspc,src),t1 +15: ldw 12(srcspc,src),t2 + ldo 16(src),src +16: stw,ma t1,4(dstspc,dst) +17: stw,ma t2,4(dstspc,dst) + + ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) + ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault) + ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done) + ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done) + ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done) + ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault) + ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done) + ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done) + + b .Lcopy_loop_4 + ldo -16(len),len + +.Lbyte_loop: + cmpclr,COND(<>) len,%r0,%r0 + b,n .Lcopy_done +20: ldb 0(srcspc,src),t1 + ldo 1(src),src +21: stb,ma t1,1(dstspc,dst) + b .Lbyte_loop + ldo -1(len),len + + ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) + ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) + +.Lcopy_done: + bv %r0(%r2) + sub end,dst,ret0 + + + /* src and dst are not aligned the same way. */ + /* need to go the hard way */ +.Lunaligned_copy: + /* align until dst is 32bit-word-aligned */ + extru dst,31,2,t1 + cmpib,COND(=),n 0,t1,.Lcopy_dstaligned +20: ldb 0(srcspc,src),t1 + ldo 1(src),src +21: stb,ma t1,1(dstspc,dst) + b .Lunaligned_copy + ldo -1(len),len + + ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) + ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) + +.Lcopy_dstaligned: + + /* store src, dst and len in safe place */ + copy src,save_src + copy dst,save_dst + copy len,save_len + + /* len now needs give number of words to copy */ + SHRREG len,2,len + + /* + * Copy from a not-aligned src to an aligned dst using shifts. + * Handles 4 words per loop. + */ + + depw,z src,28,2,t0 + subi 32,t0,t0 + mtsar t0 + extru len,31,2,t0 + cmpib,= 2,t0,.Lcase2 + /* Make src aligned by rounding it down. */ + depi 0,31,2,src + + cmpiclr,<> 3,t0,%r0 + b,n .Lcase3 + cmpiclr,<> 1,t0,%r0 + b,n .Lcase1 +.Lcase0: + cmpb,= %r0,len,.Lcda_finish + nop + +1: ldw,ma 4(srcspc,src), a3 + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) +1: ldw,ma 4(srcspc,src), a0 + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) + b,n .Ldo3 +.Lcase1: +1: ldw,ma 4(srcspc,src), a2 + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) +1: ldw,ma 4(srcspc,src), a3 + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) + ldo -1(len),len + cmpb,=,n %r0,len,.Ldo0 +.Ldo4: +1: ldw,ma 4(srcspc,src), a0 + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) + shrpw a2, a3, %sar, t0 +1: stw,ma t0, 4(dstspc,dst) + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) +.Ldo3: +1: ldw,ma 4(srcspc,src), a1 + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) + shrpw a3, a0, %sar, t0 +1: stw,ma t0, 4(dstspc,dst) + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) +.Ldo2: +1: ldw,ma 4(srcspc,src), a2 + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) + shrpw a0, a1, %sar, t0 +1: stw,ma t0, 4(dstspc,dst) + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) +.Ldo1: +1: ldw,ma 4(srcspc,src), a3 + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) + shrpw a1, a2, %sar, t0 +1: stw,ma t0, 4(dstspc,dst) + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) + ldo -4(len),len + cmpb,<> %r0,len,.Ldo4 + nop +.Ldo0: + shrpw a2, a3, %sar, t0 +1: stw,ma t0, 4(dstspc,dst) + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) + +.Lcda_rdfault: +.Lcda_finish: + /* calculate new src, dst and len and jump to byte-copy loop */ + sub dst,save_dst,t0 + add save_src,t0,src + b .Lbyte_loop + sub save_len,t0,len + +.Lcase3: +1: ldw,ma 4(srcspc,src), a0 + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) +1: ldw,ma 4(srcspc,src), a1 + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) + b .Ldo2 + ldo 1(len),len +.Lcase2: +1: ldw,ma 4(srcspc,src), a1 + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) +1: ldw,ma 4(srcspc,src), a2 + ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) + b .Ldo1 + ldo 2(len),len + + + /* fault exception fixup handlers: */ +#ifdef CONFIG_64BIT +.Lcopy16_fault: +10: b .Lcopy_done + std,ma t1,8(dstspc,dst) + ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) +#endif + +.Lcopy8_fault: +10: b .Lcopy_done + stw,ma t1,4(dstspc,dst) + ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) + + .exit +ENDPROC_CFI(pa_memcpy) + .procend + .end diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c index f82ff10ed974..b3d47ec1d80a 100644 --- a/arch/parisc/lib/memcpy.c +++ b/arch/parisc/lib/memcpy.c @@ -2,7 +2,7 @@ * Optimized memory copy routines. * * Copyright (C) 2004 Randolph Chung - * Copyright (C) 2013 Helge Deller + * Copyright (C) 2013-2017 Helge Deller * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -21,474 +21,21 @@ * Portions derived from the GNU C Library * Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc. * - * Several strategies are tried to try to get the best performance for various - * conditions. In the optimal case, we copy 64-bytes in an unrolled loop using - * fp regs. This is followed by loops that copy 32- or 16-bytes at a time using - * general registers. Unaligned copies are handled either by aligning the - * destination and then using shift-and-write method, or in a few cases by - * falling back to a byte-at-a-time copy. - * - * I chose to implement this in C because it is easier to maintain and debug, - * and in my experiments it appears that the C code generated by gcc (3.3/3.4 - * at the time of writing) is fairly optimal. Unfortunately some of the - * semantics of the copy routine (exception handling) is difficult to express - * in C, so we have to play some tricks to get it to work. - * - * All the loads and stores are done via explicit asm() code in order to use - * the right space registers. - * - * Testing with various alignments and buffer sizes shows that this code is - * often >10x faster than a simple byte-at-a-time copy, even for strangely - * aligned operands. It is interesting to note that the glibc version - * of memcpy (written in C) is actually quite fast already. This routine is - * able to beat it by 30-40% for aligned copies because of the loop unrolling, - * but in some cases the glibc version is still slightly faster. This lends - * more credibility that gcc can generate very good code as long as we are - * careful. - * - * TODO: - * - cache prefetching needs more experimentation to get optimal settings - * - try not to use the post-increment address modifiers; they create additional - * interlocks - * - replace byte-copy loops with stybs sequences */ -#ifdef __KERNEL__ #include #include #include -#define s_space "%%sr1" -#define d_space "%%sr2" -#else -#include "memcpy.h" -#define s_space "%%sr0" -#define d_space "%%sr0" -#define pa_memcpy new2_copy -#endif DECLARE_PER_CPU(struct exception_data, exception_data); -#define preserve_branch(label) do { \ - volatile int dummy = 0; \ - /* The following branch is never taken, it's just here to */ \ - /* prevent gcc from optimizing away our exception code. */ \ - if (unlikely(dummy != dummy)) \ - goto label; \ -} while (0) - #define get_user_space() (segment_eq(get_fs(), KERNEL_DS) ? 0 : mfsp(3)) #define get_kernel_space() (0) -#define MERGE(w0, sh_1, w1, sh_2) ({ \ - unsigned int _r; \ - asm volatile ( \ - "mtsar %3\n" \ - "shrpw %1, %2, %%sar, %0\n" \ - : "=r"(_r) \ - : "r"(w0), "r"(w1), "r"(sh_2) \ - ); \ - _r; \ -}) -#define THRESHOLD 16 - -#ifdef DEBUG_MEMCPY -#define DPRINTF(fmt, args...) do { printk(KERN_DEBUG "%s:%d:%s ", __FILE__, __LINE__, __func__ ); printk(KERN_DEBUG fmt, ##args ); } while (0) -#else -#define DPRINTF(fmt, args...) -#endif - -#define def_load_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) \ - __asm__ __volatile__ ( \ - "1:\t" #_insn ",ma " #_sz "(" _s ",%1), %0\n\t" \ - ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \ - : _tt(_t), "+r"(_a) \ - : \ - : "r8") - -#define def_store_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) \ - __asm__ __volatile__ ( \ - "1:\t" #_insn ",ma %1, " #_sz "(" _s ",%0)\n\t" \ - ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \ - : "+r"(_a) \ - : _tt(_t) \ - : "r8") - -#define ldbma(_s, _a, _t, _e) def_load_ai_insn(ldbs,1,"=r",_s,_a,_t,_e) -#define stbma(_s, _t, _a, _e) def_store_ai_insn(stbs,1,"r",_s,_a,_t,_e) -#define ldwma(_s, _a, _t, _e) def_load_ai_insn(ldw,4,"=r",_s,_a,_t,_e) -#define stwma(_s, _t, _a, _e) def_store_ai_insn(stw,4,"r",_s,_a,_t,_e) -#define flddma(_s, _a, _t, _e) def_load_ai_insn(fldd,8,"=f",_s,_a,_t,_e) -#define fstdma(_s, _t, _a, _e) def_store_ai_insn(fstd,8,"f",_s,_a,_t,_e) - -#define def_load_insn(_insn,_tt,_s,_o,_a,_t,_e) \ - __asm__ __volatile__ ( \ - "1:\t" #_insn " " #_o "(" _s ",%1), %0\n\t" \ - ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \ - : _tt(_t) \ - : "r"(_a) \ - : "r8") - -#define def_store_insn(_insn,_tt,_s,_t,_o,_a,_e) \ - __asm__ __volatile__ ( \ - "1:\t" #_insn " %0, " #_o "(" _s ",%1)\n\t" \ - ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \ - : \ - : _tt(_t), "r"(_a) \ - : "r8") - -#define ldw(_s,_o,_a,_t,_e) def_load_insn(ldw,"=r",_s,_o,_a,_t,_e) -#define stw(_s,_t,_o,_a,_e) def_store_insn(stw,"r",_s,_t,_o,_a,_e) - -#ifdef CONFIG_PREFETCH -static inline void prefetch_src(const void *addr) -{ - __asm__("ldw 0(" s_space ",%0), %%r0" : : "r" (addr)); -} - -static inline void prefetch_dst(const void *addr) -{ - __asm__("ldd 0(" d_space ",%0), %%r0" : : "r" (addr)); -} -#else -#define prefetch_src(addr) do { } while(0) -#define prefetch_dst(addr) do { } while(0) -#endif - -#define PA_MEMCPY_OK 0 -#define PA_MEMCPY_LOAD_ERROR 1 -#define PA_MEMCPY_STORE_ERROR 2 - -/* Copy from a not-aligned src to an aligned dst, using shifts. Handles 4 words - * per loop. This code is derived from glibc. - */ -static noinline unsigned long copy_dstaligned(unsigned long dst, - unsigned long src, unsigned long len) -{ - /* gcc complains that a2 and a3 may be uninitialized, but actually - * they cannot be. Initialize a2/a3 to shut gcc up. - */ - register unsigned int a0, a1, a2 = 0, a3 = 0; - int sh_1, sh_2; - - /* prefetch_src((const void *)src); */ - - /* Calculate how to shift a word read at the memory operation - aligned srcp to make it aligned for copy. */ - sh_1 = 8 * (src % sizeof(unsigned int)); - sh_2 = 8 * sizeof(unsigned int) - sh_1; - - /* Make src aligned by rounding it down. */ - src &= -sizeof(unsigned int); - - switch (len % 4) - { - case 2: - /* a1 = ((unsigned int *) src)[0]; - a2 = ((unsigned int *) src)[1]; */ - ldw(s_space, 0, src, a1, cda_ldw_exc); - ldw(s_space, 4, src, a2, cda_ldw_exc); - src -= 1 * sizeof(unsigned int); - dst -= 3 * sizeof(unsigned int); - len += 2; - goto do1; - case 3: - /* a0 = ((unsigned int *) src)[0]; - a1 = ((unsigned int *) src)[1]; */ - ldw(s_space, 0, src, a0, cda_ldw_exc); - ldw(s_space, 4, src, a1, cda_ldw_exc); - src -= 0 * sizeof(unsigned int); - dst -= 2 * sizeof(unsigned int); - len += 1; - goto do2; - case 0: - if (len == 0) - return PA_MEMCPY_OK; - /* a3 = ((unsigned int *) src)[0]; - a0 = ((unsigned int *) src)[1]; */ - ldw(s_space, 0, src, a3, cda_ldw_exc); - ldw(s_space, 4, src, a0, cda_ldw_exc); - src -=-1 * sizeof(unsigned int); - dst -= 1 * sizeof(unsigned int); - len += 0; - goto do3; - case 1: - /* a2 = ((unsigned int *) src)[0]; - a3 = ((unsigned int *) src)[1]; */ - ldw(s_space, 0, src, a2, cda_ldw_exc); - ldw(s_space, 4, src, a3, cda_ldw_exc); - src -=-2 * sizeof(unsigned int); - dst -= 0 * sizeof(unsigned int); - len -= 1; - if (len == 0) - goto do0; - goto do4; /* No-op. */ - } - - do - { - /* prefetch_src((const void *)(src + 4 * sizeof(unsigned int))); */ -do4: - /* a0 = ((unsigned int *) src)[0]; */ - ldw(s_space, 0, src, a0, cda_ldw_exc); - /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */ - stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc); -do3: - /* a1 = ((unsigned int *) src)[1]; */ - ldw(s_space, 4, src, a1, cda_ldw_exc); - /* ((unsigned int *) dst)[1] = MERGE (a3, sh_1, a0, sh_2); */ - stw(d_space, MERGE (a3, sh_1, a0, sh_2), 4, dst, cda_stw_exc); -do2: - /* a2 = ((unsigned int *) src)[2]; */ - ldw(s_space, 8, src, a2, cda_ldw_exc); - /* ((unsigned int *) dst)[2] = MERGE (a0, sh_1, a1, sh_2); */ - stw(d_space, MERGE (a0, sh_1, a1, sh_2), 8, dst, cda_stw_exc); -do1: - /* a3 = ((unsigned int *) src)[3]; */ - ldw(s_space, 12, src, a3, cda_ldw_exc); - /* ((unsigned int *) dst)[3] = MERGE (a1, sh_1, a2, sh_2); */ - stw(d_space, MERGE (a1, sh_1, a2, sh_2), 12, dst, cda_stw_exc); - - src += 4 * sizeof(unsigned int); - dst += 4 * sizeof(unsigned int); - len -= 4; - } - while (len != 0); - -do0: - /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */ - stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc); - - preserve_branch(handle_load_error); - preserve_branch(handle_store_error); - - return PA_MEMCPY_OK; - -handle_load_error: - __asm__ __volatile__ ("cda_ldw_exc:\n"); - return PA_MEMCPY_LOAD_ERROR; - -handle_store_error: - __asm__ __volatile__ ("cda_stw_exc:\n"); - return PA_MEMCPY_STORE_ERROR; -} - - -/* Returns PA_MEMCPY_OK, PA_MEMCPY_LOAD_ERROR or PA_MEMCPY_STORE_ERROR. - * In case of an access fault the faulty address can be read from the per_cpu - * exception data struct. */ -static noinline unsigned long pa_memcpy_internal(void *dstp, const void *srcp, - unsigned long len) -{ - register unsigned long src, dst, t1, t2, t3; - register unsigned char *pcs, *pcd; - register unsigned int *pws, *pwd; - register double *pds, *pdd; - unsigned long ret; - - src = (unsigned long)srcp; - dst = (unsigned long)dstp; - pcs = (unsigned char *)srcp; - pcd = (unsigned char *)dstp; - - /* prefetch_src((const void *)srcp); */ - - if (len < THRESHOLD) - goto byte_copy; - - /* Check alignment */ - t1 = (src ^ dst); - if (unlikely(t1 & (sizeof(double)-1))) - goto unaligned_copy; - - /* src and dst have same alignment. */ - - /* Copy bytes till we are double-aligned. */ - t2 = src & (sizeof(double) - 1); - if (unlikely(t2 != 0)) { - t2 = sizeof(double) - t2; - while (t2 && len) { - /* *pcd++ = *pcs++; */ - ldbma(s_space, pcs, t3, pmc_load_exc); - len--; - stbma(d_space, t3, pcd, pmc_store_exc); - t2--; - } - } - - pds = (double *)pcs; - pdd = (double *)pcd; - -#if 0 - /* Copy 8 doubles at a time */ - while (len >= 8*sizeof(double)) { - register double r1, r2, r3, r4, r5, r6, r7, r8; - /* prefetch_src((char *)pds + L1_CACHE_BYTES); */ - flddma(s_space, pds, r1, pmc_load_exc); - flddma(s_space, pds, r2, pmc_load_exc); - flddma(s_space, pds, r3, pmc_load_exc); - flddma(s_space, pds, r4, pmc_load_exc); - fstdma(d_space, r1, pdd, pmc_store_exc); - fstdma(d_space, r2, pdd, pmc_store_exc); - fstdma(d_space, r3, pdd, pmc_store_exc); - fstdma(d_space, r4, pdd, pmc_store_exc); - -#if 0 - if (L1_CACHE_BYTES <= 32) - prefetch_src((char *)pds + L1_CACHE_BYTES); -#endif - flddma(s_space, pds, r5, pmc_load_exc); - flddma(s_space, pds, r6, pmc_load_exc); - flddma(s_space, pds, r7, pmc_load_exc); - flddma(s_space, pds, r8, pmc_load_exc); - fstdma(d_space, r5, pdd, pmc_store_exc); - fstdma(d_space, r6, pdd, pmc_store_exc); - fstdma(d_space, r7, pdd, pmc_store_exc); - fstdma(d_space, r8, pdd, pmc_store_exc); - len -= 8*sizeof(double); - } -#endif - - pws = (unsigned int *)pds; - pwd = (unsigned int *)pdd; - -word_copy: - while (len >= 8*sizeof(unsigned int)) { - register unsigned int r1,r2,r3,r4,r5,r6,r7,r8; - /* prefetch_src((char *)pws + L1_CACHE_BYTES); */ - ldwma(s_space, pws, r1, pmc_load_exc); - ldwma(s_space, pws, r2, pmc_load_exc); - ldwma(s_space, pws, r3, pmc_load_exc); - ldwma(s_space, pws, r4, pmc_load_exc); - stwma(d_space, r1, pwd, pmc_store_exc); - stwma(d_space, r2, pwd, pmc_store_exc); - stwma(d_space, r3, pwd, pmc_store_exc); - stwma(d_space, r4, pwd, pmc_store_exc); - - ldwma(s_space, pws, r5, pmc_load_exc); - ldwma(s_space, pws, r6, pmc_load_exc); - ldwma(s_space, pws, r7, pmc_load_exc); - ldwma(s_space, pws, r8, pmc_load_exc); - stwma(d_space, r5, pwd, pmc_store_exc); - stwma(d_space, r6, pwd, pmc_store_exc); - stwma(d_space, r7, pwd, pmc_store_exc); - stwma(d_space, r8, pwd, pmc_store_exc); - len -= 8*sizeof(unsigned int); - } - - while (len >= 4*sizeof(unsigned int)) { - register unsigned int r1,r2,r3,r4; - ldwma(s_space, pws, r1, pmc_load_exc); - ldwma(s_space, pws, r2, pmc_load_exc); - ldwma(s_space, pws, r3, pmc_load_exc); - ldwma(s_space, pws, r4, pmc_load_exc); - stwma(d_space, r1, pwd, pmc_store_exc); - stwma(d_space, r2, pwd, pmc_store_exc); - stwma(d_space, r3, pwd, pmc_store_exc); - stwma(d_space, r4, pwd, pmc_store_exc); - len -= 4*sizeof(unsigned int); - } - - pcs = (unsigned char *)pws; - pcd = (unsigned char *)pwd; - -byte_copy: - while (len) { - /* *pcd++ = *pcs++; */ - ldbma(s_space, pcs, t3, pmc_load_exc); - stbma(d_space, t3, pcd, pmc_store_exc); - len--; - } - - return PA_MEMCPY_OK; - -unaligned_copy: - /* possibly we are aligned on a word, but not on a double... */ - if (likely((t1 & (sizeof(unsigned int)-1)) == 0)) { - t2 = src & (sizeof(unsigned int) - 1); - - if (unlikely(t2 != 0)) { - t2 = sizeof(unsigned int) - t2; - while (t2) { - /* *pcd++ = *pcs++; */ - ldbma(s_space, pcs, t3, pmc_load_exc); - stbma(d_space, t3, pcd, pmc_store_exc); - len--; - t2--; - } - } - - pws = (unsigned int *)pcs; - pwd = (unsigned int *)pcd; - goto word_copy; - } - - /* Align the destination. */ - if (unlikely((dst & (sizeof(unsigned int) - 1)) != 0)) { - t2 = sizeof(unsigned int) - (dst & (sizeof(unsigned int) - 1)); - while (t2) { - /* *pcd++ = *pcs++; */ - ldbma(s_space, pcs, t3, pmc_load_exc); - stbma(d_space, t3, pcd, pmc_store_exc); - len--; - t2--; - } - dst = (unsigned long)pcd; - src = (unsigned long)pcs; - } - - ret = copy_dstaligned(dst, src, len / sizeof(unsigned int)); - if (ret) - return ret; - - pcs += (len & -sizeof(unsigned int)); - pcd += (len & -sizeof(unsigned int)); - len %= sizeof(unsigned int); - - preserve_branch(handle_load_error); - preserve_branch(handle_store_error); - - goto byte_copy; - -handle_load_error: - __asm__ __volatile__ ("pmc_load_exc:\n"); - return PA_MEMCPY_LOAD_ERROR; - -handle_store_error: - __asm__ __volatile__ ("pmc_store_exc:\n"); - return PA_MEMCPY_STORE_ERROR; -} - - /* Returns 0 for success, otherwise, returns number of bytes not transferred. */ -static unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) -{ - unsigned long ret, fault_addr, reference; - struct exception_data *d; - - ret = pa_memcpy_internal(dstp, srcp, len); - if (likely(ret == PA_MEMCPY_OK)) - return 0; - - /* if a load or store fault occured we can get the faulty addr */ - d = this_cpu_ptr(&exception_data); - fault_addr = d->fault_addr; - - /* error in load or store? */ - if (ret == PA_MEMCPY_LOAD_ERROR) - reference = (unsigned long) srcp; - else - reference = (unsigned long) dstp; +extern unsigned long pa_memcpy(void *dst, const void *src, + unsigned long len); - DPRINTF("pa_memcpy: fault type = %lu, len=%lu fault_addr=%lu ref=%lu\n", - ret, len, fault_addr, reference); - - if (fault_addr >= reference) - return len - (fault_addr - reference); - else - return len; -} - -#ifdef __KERNEL__ unsigned long __copy_to_user(void __user *dst, const void *src, unsigned long len) { @@ -537,5 +84,3 @@ long probe_kernel_read(void *dst, const void *src, size_t size) return __probe_kernel_read(dst, src, size); } - -#endif -- cgit v1.2.3 From d19f5e41b344a057bb2450024a807476f30978d2 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 25 Mar 2017 11:59:15 +0100 Subject: parisc: Clean up fixup routines for get_user()/put_user() Al Viro noticed that userspace accesses via get_user()/put_user() can be simplified a lot with regard to usage of the exception handling. This patch implements a fixup routine for get_user() and put_user() in such that the exception handler will automatically load -EFAULT into the register %r8 (the error value) in case on a fault on userspace. Additionally the fixup routine will zero the target register on fault in case of a get_user() call. The target register is extracted out of the faulting assembly instruction. This patch brings a few benefits over the old implementation: 1. Exception handling gets much cleaner, easier and smaller in size. 2. Helper functions like fixup_get_user_skip_1 (all of fixup.S) can be dropped. 3. No need to hardcode %r9 as target register for get_user() any longer. This helps the compiler register allocator and thus creates less assembler statements. 4. No dependency on the exception_data contents any longer. 5. Nested faults will be handled cleanly. Reported-by: Al Viro Cc: # v4.9+ Signed-off-by: Helge Deller --- arch/parisc/include/asm/uaccess.h | 59 +++++++++++++---------- arch/parisc/kernel/parisc_ksyms.c | 10 ---- arch/parisc/lib/Makefile | 2 +- arch/parisc/lib/fixup.S | 98 --------------------------------------- arch/parisc/mm/fault.c | 17 +++++++ 5 files changed, 52 insertions(+), 134 deletions(-) delete mode 100644 arch/parisc/lib/fixup.S diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index edfbf9d6a6dd..8442727f28d2 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -64,6 +64,15 @@ struct exception_table_entry { ".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \ ".previous\n" +/* + * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry + * (with lowest bit set) for which the fault handler in fixup_exception() will + * load -EFAULT into %r8 for a read or write fault, and zeroes the target + * register in case of a read fault in get_user(). + */ +#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\ + ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1) + /* * The page fault handler stores, in a per-cpu area, the following information * if a fixup routine is available. @@ -91,7 +100,7 @@ struct exception_data { #define __get_user(x, ptr) \ ({ \ register long __gu_err __asm__ ("r8") = 0; \ - register long __gu_val __asm__ ("r9") = 0; \ + register long __gu_val; \ \ load_sr2(); \ switch (sizeof(*(ptr))) { \ @@ -107,22 +116,23 @@ struct exception_data { }) #define __get_user_asm(ldx, ptr) \ - __asm__("\n1:\t" ldx "\t0(%%sr2,%2),%0\n\t" \ - ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_get_user_skip_1)\ + __asm__("1: " ldx " 0(%%sr2,%2),%0\n" \ + "9:\n" \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ : "=r"(__gu_val), "=r"(__gu_err) \ - : "r"(ptr), "1"(__gu_err) \ - : "r1"); + : "r"(ptr), "1"(__gu_err)); #if !defined(CONFIG_64BIT) #define __get_user_asm64(ptr) \ - __asm__("\n1:\tldw 0(%%sr2,%2),%0" \ - "\n2:\tldw 4(%%sr2,%2),%R0\n\t" \ - ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_get_user_skip_2)\ - ASM_EXCEPTIONTABLE_ENTRY(2b, fixup_get_user_skip_1)\ + __asm__(" copy %%r0,%R0\n" \ + "1: ldw 0(%%sr2,%2),%0\n" \ + "2: ldw 4(%%sr2,%2),%R0\n" \ + "9:\n" \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ : "=r"(__gu_val), "=r"(__gu_err) \ - : "r"(ptr), "1"(__gu_err) \ - : "r1"); + : "r"(ptr), "1"(__gu_err)); #endif /* !defined(CONFIG_64BIT) */ @@ -148,32 +158,31 @@ struct exception_data { * The "__put_user/kernel_asm()" macros tell gcc they read from memory * instead of writing. This is because they do not write to any memory * gcc knows about, so there are no aliasing issues. These macros must - * also be aware that "fixup_put_user_skip_[12]" are executed in the - * context of the fault, and any registers used there must be listed - * as clobbers. In this case only "r1" is used by the current routines. - * r8/r9 are already listed as err/val. + * also be aware that fixups are executed in the context of the fault, + * and any registers used there must be listed as clobbers. + * r8 is already listed as err. */ #define __put_user_asm(stx, x, ptr) \ __asm__ __volatile__ ( \ - "\n1:\t" stx "\t%2,0(%%sr2,%1)\n\t" \ - ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_put_user_skip_1)\ + "1: " stx " %2,0(%%sr2,%1)\n" \ + "9:\n" \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ : "=r"(__pu_err) \ - : "r"(ptr), "r"(x), "0"(__pu_err) \ - : "r1") + : "r"(ptr), "r"(x), "0"(__pu_err)) #if !defined(CONFIG_64BIT) #define __put_user_asm64(__val, ptr) do { \ __asm__ __volatile__ ( \ - "\n1:\tstw %2,0(%%sr2,%1)" \ - "\n2:\tstw %R2,4(%%sr2,%1)\n\t" \ - ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_put_user_skip_2)\ - ASM_EXCEPTIONTABLE_ENTRY(2b, fixup_put_user_skip_1)\ + "1: stw %2,0(%%sr2,%1)\n" \ + "2: stw %R2,4(%%sr2,%1)\n" \ + "9:\n" \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ : "=r"(__pu_err) \ - : "r"(ptr), "r"(__val), "0"(__pu_err) \ - : "r1"); \ + : "r"(ptr), "r"(__val), "0"(__pu_err)); \ } while (0) #endif /* !defined(CONFIG_64BIT) */ diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c index 7484b3d11e0d..c6d6272a934f 100644 --- a/arch/parisc/kernel/parisc_ksyms.c +++ b/arch/parisc/kernel/parisc_ksyms.c @@ -47,16 +47,6 @@ EXPORT_SYMBOL(__cmpxchg_u64); EXPORT_SYMBOL(lclear_user); EXPORT_SYMBOL(lstrnlen_user); -/* Global fixups - defined as int to avoid creation of function pointers */ -extern int fixup_get_user_skip_1; -extern int fixup_get_user_skip_2; -extern int fixup_put_user_skip_1; -extern int fixup_put_user_skip_2; -EXPORT_SYMBOL(fixup_get_user_skip_1); -EXPORT_SYMBOL(fixup_get_user_skip_2); -EXPORT_SYMBOL(fixup_put_user_skip_1); -EXPORT_SYMBOL(fixup_put_user_skip_2); - #ifndef CONFIG_64BIT /* Needed so insmod can set dp value */ extern int $global$; diff --git a/arch/parisc/lib/Makefile b/arch/parisc/lib/Makefile index 8fa92b8d839a..f2dac4d73b1b 100644 --- a/arch/parisc/lib/Makefile +++ b/arch/parisc/lib/Makefile @@ -2,7 +2,7 @@ # Makefile for parisc-specific library files # -lib-y := lusercopy.o bitops.o checksum.o io.o memset.o fixup.o memcpy.o \ +lib-y := lusercopy.o bitops.o checksum.o io.o memset.o memcpy.o \ ucmpdi2.o delay.o obj-y := iomap.o diff --git a/arch/parisc/lib/fixup.S b/arch/parisc/lib/fixup.S deleted file mode 100644 index a5b72f22c7a6..000000000000 --- a/arch/parisc/lib/fixup.S +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Linux/PA-RISC Project (http://www.parisc-linux.org/) - * - * Copyright (C) 2004 Randolph Chung - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Fixup routines for kernel exception handling. - */ -#include -#include -#include -#include - -#ifdef CONFIG_SMP - .macro get_fault_ip t1 t2 - loadgp - addil LT%__per_cpu_offset,%r27 - LDREG RT%__per_cpu_offset(%r1),\t1 - /* t2 = smp_processor_id() */ - mfctl 30,\t2 - ldw TI_CPU(\t2),\t2 -#ifdef CONFIG_64BIT - extrd,u \t2,63,32,\t2 -#endif - /* t2 = &__per_cpu_offset[smp_processor_id()]; */ - LDREGX \t2(\t1),\t2 - addil LT%exception_data,%r27 - LDREG RT%exception_data(%r1),\t1 - /* t1 = this_cpu_ptr(&exception_data) */ - add,l \t1,\t2,\t1 - /* %r27 = t1->fault_gp - restore gp */ - LDREG EXCDATA_GP(\t1), %r27 - /* t1 = t1->fault_ip */ - LDREG EXCDATA_IP(\t1), \t1 - .endm -#else - .macro get_fault_ip t1 t2 - loadgp - /* t1 = this_cpu_ptr(&exception_data) */ - addil LT%exception_data,%r27 - LDREG RT%exception_data(%r1),\t2 - /* %r27 = t2->fault_gp - restore gp */ - LDREG EXCDATA_GP(\t2), %r27 - /* t1 = t2->fault_ip */ - LDREG EXCDATA_IP(\t2), \t1 - .endm -#endif - - .level LEVEL - - .text - .section .fixup, "ax" - - /* get_user() fixups, store -EFAULT in r8, and 0 in r9 */ -ENTRY_CFI(fixup_get_user_skip_1) - get_fault_ip %r1,%r8 - ldo 4(%r1), %r1 - ldi -EFAULT, %r8 - bv %r0(%r1) - copy %r0, %r9 -ENDPROC_CFI(fixup_get_user_skip_1) - -ENTRY_CFI(fixup_get_user_skip_2) - get_fault_ip %r1,%r8 - ldo 8(%r1), %r1 - ldi -EFAULT, %r8 - bv %r0(%r1) - copy %r0, %r9 -ENDPROC_CFI(fixup_get_user_skip_2) - - /* put_user() fixups, store -EFAULT in r8 */ -ENTRY_CFI(fixup_put_user_skip_1) - get_fault_ip %r1,%r8 - ldo 4(%r1), %r1 - bv %r0(%r1) - ldi -EFAULT, %r8 -ENDPROC_CFI(fixup_put_user_skip_1) - -ENTRY_CFI(fixup_put_user_skip_2) - get_fault_ip %r1,%r8 - ldo 8(%r1), %r1 - bv %r0(%r1) - ldi -EFAULT, %r8 -ENDPROC_CFI(fixup_put_user_skip_2) - diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index deab89a8915a..32ec22146141 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -150,6 +150,23 @@ int fixup_exception(struct pt_regs *regs) d->fault_space = regs->isr; d->fault_addr = regs->ior; + /* + * Fix up get_user() and put_user(). + * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() sets the least-significant + * bit in the relative address of the fixup routine to indicate + * that %r8 should be loaded with -EFAULT to report a userspace + * access error. + */ + if (fix->fixup & 1) { + regs->gr[8] = -EFAULT; + + /* zero target register for get_user() */ + if (parisc_acctyp(0, regs->iir) == VM_READ) { + int treg = regs->iir & 0x1f; + regs->gr[treg] = 0; + } + } + regs->iaoq[0] = (unsigned long)&fix->fixup + fix->fixup; regs->iaoq[0] &= ~3; /* -- cgit v1.2.3 From 476e75a44b56038bee9207242d4bc718f6b4de06 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 29 Mar 2017 08:25:30 +0200 Subject: parisc: Avoid stalled CPU warnings after system shutdown Commit 73580dac7618 ("parisc: Fix system shutdown halt") introduced an endless loop for systems which don't provide a software power off function. But the soft lockup detector will detect this and report stalled CPUs after some time. Avoid those unwanted warnings by disabling the soft lockup detector. Fixes: 73580dac7618 ("parisc: Fix system shutdown halt") Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # 4.9+ --- arch/parisc/kernel/process.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index b76f503eee4a..4516a5b53f38 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -143,6 +143,8 @@ void machine_power_off(void) printk(KERN_EMERG "System shut down completed.\n" "Please power this system off now."); + /* prevent soft lockup/stalled CPU messages for endless loop. */ + rcu_sysrq_start(); for (;;); } -- cgit v1.2.3 From 293d264f13cbde328d5477f49e3103edbc1dc191 Mon Sep 17 00:00:00 2001 From: Vaidyanathan Srinivasan Date: Thu, 23 Mar 2017 20:52:46 +0530 Subject: cpuidle: powernv: Pass correct drv->cpumask for registration drv->cpumask defaults to cpu_possible_mask in __cpuidle_driver_init(). On PowerNV platform cpu_present could be less than cpu_possible in cases where firmware detects the cpu, but it is not available to the OS. When CONFIG_HOTPLUG_CPU=n, such cpus are not hotplugable at runtime and hence we skip creating cpu_device. This breaks cpuidle on powernv where register_cpu() is not called for cpus in cpu_possible_mask that cannot be hot-added at runtime. Trying cpuidle_register_device() on cpu without cpu_device will cause crash like this: cpu 0xf: Vector: 380 (Data SLB Access) at [c000000ff1503490] pc: c00000000022c8bc: string+0x34/0x60 lr: c00000000022ed78: vsnprintf+0x284/0x42c sp: c000000ff1503710 msr: 9000000000009033 dar: 6000000060000000 current = 0xc000000ff1480000 paca = 0xc00000000fe82d00 softe: 0 irq_happened: 0x01 pid = 1, comm = swapper/8 Linux version 4.11.0-rc2 (sv@sagarika) (gcc version 4.9.4 (Buildroot 2017.02-00004-gc28573e) ) #15 SMP Fri Mar 17 19:32:02 IST 2017 enter ? for help [link register ] c00000000022ed78 vsnprintf+0x284/0x42c [c000000ff1503710] c00000000022ebb8 vsnprintf+0xc4/0x42c (unreliable) [c000000ff1503800] c00000000022ef40 vscnprintf+0x20/0x44 [c000000ff1503830] c0000000000ab61c vprintk_emit+0x94/0x2cc [c000000ff15038a0] c0000000000acc9c vprintk_func+0x60/0x74 [c000000ff15038c0] c000000000619694 printk+0x38/0x4c [c000000ff15038e0] c000000000224950 kobject_get+0x40/0x60 [c000000ff1503950] c00000000022507c kobject_add_internal+0x60/0x2c4 [c000000ff15039e0] c000000000225350 kobject_init_and_add+0x70/0x78 [c000000ff1503a60] c00000000053c288 cpuidle_add_sysfs+0x9c/0xe0 [c000000ff1503ae0] c00000000053aeac cpuidle_register_device+0xd4/0x12c [c000000ff1503b30] c00000000053b108 cpuidle_register+0x98/0xcc [c000000ff1503bc0] c00000000085eaf0 powernv_processor_idle_init+0x140/0x1e0 [c000000ff1503c60] c00000000000cd60 do_one_initcall+0xc0/0x15c [c000000ff1503d20] c000000000833e84 kernel_init_freeable+0x1a0/0x25c [c000000ff1503dc0] c00000000000d478 kernel_init+0x24/0x12c [c000000ff1503e30] c00000000000b564 ret_from_kernel_thread+0x5c/0x78 This patch fixes the bug by passing correct cpumask from powernv-cpuidle driver. Signed-off-by: Vaidyanathan Srinivasan Reviewed-by: Gautham R. Shenoy Acked-by: Michael Ellerman [ rjw: Comment massage ] Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle-powernv.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 370593006f5f..cda8f62d555b 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -175,6 +175,24 @@ static int powernv_cpuidle_driver_init(void) drv->state_count += 1; } + /* + * On the PowerNV platform cpu_present may be less than cpu_possible in + * cases when firmware detects the CPU, but it is not available to the + * OS. If CONFIG_HOTPLUG_CPU=n, then such CPUs are not hotplugable at + * run time and hence cpu_devices are not created for those CPUs by the + * generic topology_init(). + * + * drv->cpumask defaults to cpu_possible_mask in + * __cpuidle_driver_init(). This breaks cpuidle on PowerNV where + * cpu_devices are not created for CPUs in cpu_possible_mask that + * cannot be hot-added later at run time. + * + * Trying cpuidle_register_device() on a CPU without a cpu_device is + * incorrect, so pass a correct CPU mask to the generic cpuidle driver. + */ + + drv->cpumask = (struct cpumask *)cpu_present_mask; + return 0; } -- cgit v1.2.3 From 2247925f0942dc4e7c09b1cde45ca18461d94c5f Mon Sep 17 00:00:00 2001 From: Sankar Patchineelam Date: Tue, 28 Mar 2017 19:47:29 -0400 Subject: bnxt_en: Fix NULL pointer dereference in reopen failure path Net device reset can fail when the h/w or f/w is in a bad state. Subsequent netdevice open fails in bnxt_hwrm_stat_ctx_alloc(). The cleanup invokes bnxt_hwrm_resource_free() which inturn calls bnxt_disable_int(). In this routine, the code segment if (ring->fw_ring_id != INVALID_HW_RING_ID) BNXT_CP_DB(cpr->cp_doorbell, cpr->cp_raw_cons); results in NULL pointer dereference as cpr->cp_doorbell is not yet initialized, and fw_ring_id is zero. The fix is to initialize cpr fw_ring_id to INVALID_HW_RING_ID before bnxt_init_chip() is invoked. Signed-off-by: Sankar Patchineelam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 32de4589d16a..7ee772483f26 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2455,6 +2455,18 @@ static int bnxt_init_one_rx_ring(struct bnxt *bp, int ring_nr) return 0; } +static void bnxt_init_cp_rings(struct bnxt *bp) +{ + int i; + + for (i = 0; i < bp->cp_nr_rings; i++) { + struct bnxt_cp_ring_info *cpr = &bp->bnapi[i]->cp_ring; + struct bnxt_ring_struct *ring = &cpr->cp_ring_struct; + + ring->fw_ring_id = INVALID_HW_RING_ID; + } +} + static int bnxt_init_rx_rings(struct bnxt *bp) { int i, rc = 0; @@ -5006,6 +5018,7 @@ static int bnxt_shutdown_nic(struct bnxt *bp, bool irq_re_init) static int bnxt_init_nic(struct bnxt *bp, bool irq_re_init) { + bnxt_init_cp_rings(bp); bnxt_init_rx_rings(bp); bnxt_init_tx_rings(bp); bnxt_init_ring_grps(bp, irq_re_init); -- cgit v1.2.3 From 23e12c893489ed12ecfccbf866fc62af1bead4b0 Mon Sep 17 00:00:00 2001 From: Sankar Patchineelam Date: Tue, 28 Mar 2017 19:47:30 -0400 Subject: bnxt_en: Correct the order of arguments to netdev_err() in bnxt_set_tpa() Signed-off-by: Sankar Patchineelam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 7ee772483f26..6c8568780a6a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4744,7 +4744,7 @@ static int bnxt_set_tpa(struct bnxt *bp, bool set_tpa) rc = bnxt_hwrm_vnic_set_tpa(bp, i, tpa_flags); if (rc) { netdev_err(bp->dev, "hwrm vnic set tpa failure rc for vnic %d: %x\n", - rc, i); + i, rc); return rc; } } -- cgit v1.2.3 From 3ed3a83e3f3871c57b18cef09b148e96921236ed Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 28 Mar 2017 19:47:31 -0400 Subject: bnxt_en: Fix DMA unmapping of the RX buffers in XDP mode during shutdown. In bnxt_free_rx_skbs(), which is called to free up all RX buffers during shutdown, we need to unmap the page if we are running in XDP mode. Fixes: c61fb99cae51 ("bnxt_en: Add RX page mode support.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 6c8568780a6a..1f1e54ba0ecb 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1983,20 +1983,25 @@ static void bnxt_free_rx_skbs(struct bnxt *bp) for (j = 0; j < max_idx; j++) { struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[j]; + dma_addr_t mapping = rx_buf->mapping; void *data = rx_buf->data; if (!data) continue; - dma_unmap_single(&pdev->dev, rx_buf->mapping, - bp->rx_buf_use_size, bp->rx_dir); - rx_buf->data = NULL; - if (BNXT_RX_PAGE_MODE(bp)) + if (BNXT_RX_PAGE_MODE(bp)) { + mapping -= bp->rx_dma_offset; + dma_unmap_page(&pdev->dev, mapping, + PAGE_SIZE, bp->rx_dir); __free_page(data); - else + } else { + dma_unmap_single(&pdev->dev, mapping, + bp->rx_buf_use_size, + bp->rx_dir); kfree(data); + } } for (j = 0; j < max_agg_idx; j++) { -- cgit v1.2.3 From 358e78b5f445c35f5b7f9241425fb499ee9fe3e2 Mon Sep 17 00:00:00 2001 From: Zakharov Vlad Date: Wed, 29 Mar 2017 13:41:46 +0300 Subject: ezchip: nps_enet: check if napi has been completed After a new NAPI_STATE_MISSED state was added to NAPI we can get into this state and in such case we have to reschedule NAPI as some work is still pending and we have to process it. napi_complete_done() function returns false if we have to reschedule something (e.g. in case we were in MISSED state) as current polling have not been completed yet. nps_enet driver hasn't been verifying the return value of napi_complete_done() and has been forcibly enabling interrupts. That is not correct as we should not enable interrupts before we have processed all scheduled work. As a result we were getting trapped in interrupt hanlder chain as we had never been able to disabale ethernet interrupts again. So this patch makes nps_enet_poll() func verify return value of napi_complete_done() and enable interrupts only in case all scheduled work has been completed. Signed-off-by: Vlad Zakharov Signed-off-by: David S. Miller --- drivers/net/ethernet/ezchip/nps_enet.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c index 992ebe973d25..f819843e2bae 100644 --- a/drivers/net/ethernet/ezchip/nps_enet.c +++ b/drivers/net/ethernet/ezchip/nps_enet.c @@ -189,11 +189,9 @@ static int nps_enet_poll(struct napi_struct *napi, int budget) nps_enet_tx_handler(ndev); work_done = nps_enet_rx_handler(ndev); - if (work_done < budget) { + if ((work_done < budget) && napi_complete_done(napi, work_done)) { u32 buf_int_enable_value = 0; - napi_complete_done(napi, work_done); - /* set tx_done and rx_rdy bits */ buf_int_enable_value |= NPS_ENET_ENABLE << RX_RDY_SHIFT; buf_int_enable_value |= NPS_ENET_ENABLE << TX_DONE_SHIFT; -- cgit v1.2.3 From b79c52aef3cdee903017c1e9834b53996d70010e Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Thu, 30 Mar 2017 01:48:39 +0800 Subject: drm/i915/gvt: Activate/de-activate vGPU in mdev ops. This patch introduces two functions for activating/de-activating vGPU in mdev ops. A racing condition was found between virtual vblank emulation and KVGMT mdev release path. V-blank emulation will emulate and inject V-blank interrupt for every active vGPU with holding gvt->lock, while in mdev release path, it will directly release hypervisor handle without changing vGPU status or taking gvt->lock, so a kernel oops is encountered when vblank emulation is injecting a interrupt with a invalid hypervisor handle. (Reported by Terrence) To solve this problem, we factor out vGPU activation/de-activation from vGPU creation/destruction path and let KVMGT mdev release ops de-activate the vGPU before release hypervisor handle. Once a vGPU is de-activated, GVT-g will not emulate v-blank for it or touch the hypervisor handle. Fixes: 659643f ("drm/i915/gvt/kvmgt: add vfio/mdev support to KVMGT") Signed-off-by: Zhi Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.c | 2 ++ drivers/gpu/drm/i915/gvt/gvt.h | 5 ++++- drivers/gpu/drm/i915/gvt/kvmgt.c | 4 ++++ drivers/gpu/drm/i915/gvt/vgpu.c | 43 +++++++++++++++++++++++++++++++++++----- 4 files changed, 48 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index 3b9d59e457ba..ef3baa0c4754 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -52,6 +52,8 @@ static const struct intel_gvt_ops intel_gvt_ops = { .vgpu_create = intel_gvt_create_vgpu, .vgpu_destroy = intel_gvt_destroy_vgpu, .vgpu_reset = intel_gvt_reset_vgpu, + .vgpu_activate = intel_gvt_activate_vgpu, + .vgpu_deactivate = intel_gvt_deactivate_vgpu, }; /** diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 23791920ced1..2387eacf74bb 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -382,7 +382,8 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu); void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, unsigned int engine_mask); void intel_gvt_reset_vgpu(struct intel_vgpu *vgpu); - +void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu); +void intel_gvt_deactivate_vgpu(struct intel_vgpu *vgpu); /* validating GM functions */ #define vgpu_gmadr_is_aperture(vgpu, gmadr) \ @@ -449,6 +450,8 @@ struct intel_gvt_ops { struct intel_vgpu_type *); void (*vgpu_destroy)(struct intel_vgpu *); void (*vgpu_reset)(struct intel_vgpu *); + void (*vgpu_activate)(struct intel_vgpu *); + void (*vgpu_deactivate)(struct intel_vgpu *); }; diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index d641214578a7..9843d74056a8 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -544,6 +544,8 @@ static int intel_vgpu_open(struct mdev_device *mdev) if (ret) goto undo_group; + intel_gvt_ops->vgpu_activate(vgpu); + atomic_set(&vgpu->vdev.released, 0); return ret; @@ -569,6 +571,8 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu) if (atomic_cmpxchg(&vgpu->vdev.released, 0, 1)) return; + intel_gvt_ops->vgpu_deactivate(vgpu); + ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_IOMMU_NOTIFY, &vgpu->vdev.iommu_notifier); WARN(ret, "vfio_unregister_notifier for iommu failed: %d\n", ret); diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 41cfa5ccae84..2f5792a1ce38 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -179,20 +179,34 @@ static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt) } /** - * intel_gvt_destroy_vgpu - destroy a virtual GPU + * intel_gvt_active_vgpu - activate a virtual GPU * @vgpu: virtual GPU * - * This function is called when user wants to destroy a virtual GPU. + * This function is called when user wants to activate a virtual GPU. * */ -void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu) +void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu) +{ + mutex_lock(&vgpu->gvt->lock); + vgpu->active = true; + mutex_unlock(&vgpu->gvt->lock); +} + +/** + * intel_gvt_deactive_vgpu - deactivate a virtual GPU + * @vgpu: virtual GPU + * + * This function is called when user wants to deactivate a virtual GPU. + * All virtual GPU runtime information will be destroyed. + * + */ +void intel_gvt_deactivate_vgpu(struct intel_vgpu *vgpu) { struct intel_gvt *gvt = vgpu->gvt; mutex_lock(&gvt->lock); vgpu->active = false; - idr_remove(&gvt->vgpu_idr, vgpu->id); if (atomic_read(&vgpu->running_workload_num)) { mutex_unlock(&gvt->lock); @@ -201,6 +215,26 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu) } intel_vgpu_stop_schedule(vgpu); + + mutex_unlock(&gvt->lock); +} + +/** + * intel_gvt_destroy_vgpu - destroy a virtual GPU + * @vgpu: virtual GPU + * + * This function is called when user wants to destroy a virtual GPU. + * + */ +void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu) +{ + struct intel_gvt *gvt = vgpu->gvt; + + mutex_lock(&gvt->lock); + + WARN(vgpu->active, "vGPU is still active!\n"); + + idr_remove(&gvt->vgpu_idr, vgpu->id); intel_vgpu_clean_sched_policy(vgpu); intel_vgpu_clean_gvt_context(vgpu); intel_vgpu_clean_execlist(vgpu); @@ -277,7 +311,6 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, if (ret) goto out_clean_shadow_ctx; - vgpu->active = true; mutex_unlock(&gvt->lock); return vgpu; -- cgit v1.2.3 From d09c5373e8e4eaaa09233552cbf75dc4c4f21203 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 27 Mar 2017 09:48:04 +0200 Subject: s390/uaccess: get_user() should zero on failure (again) Commit fd2d2b191fe7 ("s390: get_user() should zero on failure") intended to fix s390's get_user() implementation which did not zero the target operand if the read from user space faulted. Unfortunately the patch has no effect: the corresponding inline assembly specifies that the operand is only written to ("=") and the previous value is discarded. Therefore the compiler is free to and actually does omit the zero initialization. To fix this simply change the contraint modifier to "+", so the compiler cannot omit the initialization anymore. Fixes: c9ca78415ac1 ("s390/uaccess: provide inline variants of get_user/put_user") Fixes: fd2d2b191fe7 ("s390: get_user() should zero on failure") Cc: stable@vger.kernel.org Cc: Al Viro Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 136932ff4250..3ea1554d04b3 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -147,7 +147,7 @@ unsigned long __must_check __copy_to_user(void __user *to, const void *from, " jg 2b\n" \ ".popsection\n" \ EX_TABLE(0b,3b) EX_TABLE(1b,3b) \ - : "=d" (__rc), "=Q" (*(to)) \ + : "=d" (__rc), "+Q" (*(to)) \ : "d" (size), "Q" (*(from)), \ "d" (__reg0), "K" (-EFAULT) \ : "cc"); \ -- cgit v1.2.3 From 740372b76e7966604e0f4dd0de13135513024f0d Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Mon, 20 Mar 2017 21:04:05 -0700 Subject: tcmu: Allow cmd_time_out to be set to zero (disabled) The new cmd_time_out configfs attribute for TCMU is allowed to be disabled, so go ahead and drop the tcmu_cmd_time_out_store() check. Reported-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index c6874c38a10b..6a17c78e4662 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1196,11 +1196,6 @@ static ssize_t tcmu_cmd_time_out_store(struct config_item *item, const char *pag if (ret < 0) return ret; - if (!val) { - pr_err("Illegal value for cmd_time_out\n"); - return -EINVAL; - } - udev->cmd_time_out = val * MSEC_PER_SEC; return count; } -- cgit v1.2.3 From afea03fcf3d5db8a968f4b97797b8b83ada0dba3 Mon Sep 17 00:00:00 2001 From: Manish Narani Date: Mon, 20 Mar 2017 15:05:29 +0530 Subject: usb: gadget: Correct usb EP argument for BOT status request This patch corrects the argument in usb_ep_free_request as it is mistakenly set to ep_out. It should be ep_in for status request. Signed-off-by: Manish Narani Acked-by: Felipe Balbi Signed-off-by: Nicholas Bellinger --- drivers/usb/gadget/function/f_tcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_tcm.c b/drivers/usb/gadget/function/f_tcm.c index d2351139342f..a82e2bd5ea34 100644 --- a/drivers/usb/gadget/function/f_tcm.c +++ b/drivers/usb/gadget/function/f_tcm.c @@ -373,7 +373,7 @@ static void bot_cleanup_old_alt(struct f_uas *fu) usb_ep_free_request(fu->ep_in, fu->bot_req_in); usb_ep_free_request(fu->ep_out, fu->bot_req_out); usb_ep_free_request(fu->ep_out, fu->cmd.req); - usb_ep_free_request(fu->ep_out, fu->bot_status.req); + usb_ep_free_request(fu->ep_in, fu->bot_status.req); kfree(fu->cmd.buf); -- cgit v1.2.3 From efb2ea770bb3b0f40007530bc8b0c22f36e1c5eb Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 23 Mar 2017 17:19:24 -0700 Subject: iscsi-target: Fix TMR reference leak during session shutdown This patch fixes a iscsi-target specific TMR reference leak during session shutdown, that could occur when a TMR was quiesced before the hand-off back to iscsi-target code via transport_cmd_check_stop_to_fabric(). The reference leak happens because iscsit_free_cmd() was incorrectly skipping the final target_put_sess_cmd() for TMRs when transport_generic_free_cmd() returned zero because the se_cmd->cmd_kref did not reach zero, due to the missing se_cmd assignment in original code. The result was iscsi_cmd and it's associated se_cmd memory would be freed once se_sess->sess_cmd_map where released, but the associated se_tmr_req was leaked and remained part of se_device->dev_tmr_list. This bug would manfiest itself as kernel paging request OOPsen in core_tmr_lun_reset(), when a left-over se_tmr_req attempted to dereference it's se_cmd pointer that had already been released during normal session shutdown. To address this bug, go ahead and treat ISCSI_OP_SCSI_CMD and ISCSI_OP_SCSI_TMFUNC the same when there is an extra se_cmd->cmd_kref to drop in iscsit_free_cmd(), and use op_scsi to signal __iscsit_free_cmd() when the former needs to clear any further iscsi related I/O state. Reported-by: Rob Millner Cc: Rob Millner Reported-by: Chu Yuan Lin Cc: Chu Yuan Lin Tested-by: Chu Yuan Lin Cc: stable@vger.kernel.org # 3.10+ Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target_util.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index 5041a9c8bdcb..b4640338f8d8 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -737,21 +737,23 @@ void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown) { struct se_cmd *se_cmd = NULL; int rc; + bool op_scsi = false; /* * Determine if a struct se_cmd is associated with * this struct iscsi_cmd. */ switch (cmd->iscsi_opcode) { case ISCSI_OP_SCSI_CMD: - se_cmd = &cmd->se_cmd; - __iscsit_free_cmd(cmd, true, shutdown); + op_scsi = true; /* * Fallthrough */ case ISCSI_OP_SCSI_TMFUNC: - rc = transport_generic_free_cmd(&cmd->se_cmd, shutdown); - if (!rc && shutdown && se_cmd && se_cmd->se_sess) { - __iscsit_free_cmd(cmd, true, shutdown); + se_cmd = &cmd->se_cmd; + __iscsit_free_cmd(cmd, op_scsi, shutdown); + rc = transport_generic_free_cmd(se_cmd, shutdown); + if (!rc && shutdown && se_cmd->se_sess) { + __iscsit_free_cmd(cmd, op_scsi, shutdown); target_put_sess_cmd(se_cmd); } break; -- cgit v1.2.3 From 49cb77e297dc611a1b795cfeb79452b3002bd331 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Mon, 27 Mar 2017 16:12:43 -0700 Subject: target: Avoid mappedlun symlink creation during lun shutdown This patch closes a race between se_lun deletion during configfs unlink in target_fabric_port_unlink() -> core_dev_del_lun() -> core_tpg_remove_lun(), when transport_clear_lun_ref() blocks waiting for percpu_ref RCU grace period to finish, but a new NodeACL mappedlun is added before the RCU grace period has completed. This can happen in target_fabric_mappedlun_link() because it only checks for se_lun->lun_se_dev, which is not cleared until after transport_clear_lun_ref() percpu_ref RCU grace period finishes. This bug originally manifested as NULL pointer dereference OOPsen in target_stat_scsi_att_intr_port_show_attr_dev() on v4.1.y code, because it dereferences lun->lun_se_dev without a explicit NULL pointer check. In post v4.1 code with target-core RCU conversion, the code in target_stat_scsi_att_intr_port_show_attr_dev() no longer uses se_lun->lun_se_dev, but the same race still exists. To address the bug, go ahead and set se_lun>lun_shutdown as early as possible in core_tpg_remove_lun(), and ensure new NodeACL mappedlun creation in target_fabric_mappedlun_link() fails during se_lun shutdown. Reported-by: James Shen Cc: James Shen Tested-by: James Shen Cc: stable@vger.kernel.org # 3.10+ Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_fabric_configfs.c | 5 +++++ drivers/target/target_core_tpg.c | 4 ++++ include/target/target_core_base.h | 1 + 3 files changed, 10 insertions(+) diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c index d8a16ca6baa5..d1e6cab8e3d3 100644 --- a/drivers/target/target_core_fabric_configfs.c +++ b/drivers/target/target_core_fabric_configfs.c @@ -92,6 +92,11 @@ static int target_fabric_mappedlun_link( pr_err("Source se_lun->lun_se_dev does not exist\n"); return -EINVAL; } + if (lun->lun_shutdown) { + pr_err("Unable to create mappedlun symlink because" + " lun->lun_shutdown=true\n"); + return -EINVAL; + } se_tpg = lun->lun_tpg; nacl_ci = &lun_acl_ci->ci_parent->ci_group->cg_item; diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 6fb191914f45..dfaef4d3b2d2 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -642,6 +642,8 @@ void core_tpg_remove_lun( */ struct se_device *dev = rcu_dereference_raw(lun->lun_se_dev); + lun->lun_shutdown = true; + core_clear_lun_from_tpg(lun, tpg); /* * Wait for any active I/O references to percpu se_lun->lun_ref to @@ -663,6 +665,8 @@ void core_tpg_remove_lun( } if (!(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) hlist_del_rcu(&lun->link); + + lun->lun_shutdown = false; mutex_unlock(&tpg->tpg_lun_mutex); percpu_ref_exit(&lun->lun_ref); diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 4b784b6e21c0..2e282461cfa5 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -705,6 +705,7 @@ struct se_lun { u64 unpacked_lun; #define SE_LUN_LINK_MAGIC 0xffff7771 u32 lun_link_magic; + bool lun_shutdown; bool lun_access_ro; u32 lun_index; -- cgit v1.2.3 From ab22d2604c86ceb01bb2725c9860b88a7dd383bb Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 27 Mar 2017 17:07:40 +0800 Subject: tcmu: Fix possible overwrite of t_data_sg's last iov[] If there has BIDI data, its first iov[] will overwrite the last iov[] for se_cmd->t_data_sg. To fix this, we can just increase the iov pointer, but this may introuduce a new memory leakage bug: If the se_cmd->data_length and se_cmd->t_bidi_data_sg->length are all not aligned up to the DATA_BLOCK_SIZE, the actual length needed maybe larger than just sum of them. So, this could be avoided by rounding all the data lengthes up to DATA_BLOCK_SIZE. Reviewed-by: Mike Christie Tested-by: Ilias Tsitsimpis Reviewed-by: Bryant G. Ly Signed-off-by: Xiubo Li Cc: stable@vger.kernel.org # 3.18+ Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 6a17c78e4662..e58dfd4fe448 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -394,6 +394,20 @@ static bool is_ring_space_avail(struct tcmu_dev *udev, size_t cmd_size, size_t d return true; } +static inline size_t tcmu_cmd_get_data_length(struct tcmu_cmd *tcmu_cmd) +{ + struct se_cmd *se_cmd = tcmu_cmd->se_cmd; + size_t data_length = round_up(se_cmd->data_length, DATA_BLOCK_SIZE); + + if (se_cmd->se_cmd_flags & SCF_BIDI) { + BUG_ON(!(se_cmd->t_bidi_data_sg && se_cmd->t_bidi_data_nents)); + data_length += round_up(se_cmd->t_bidi_data_sg->length, + DATA_BLOCK_SIZE); + } + + return data_length; +} + static sense_reason_t tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) { @@ -407,7 +421,7 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) uint32_t cmd_head; uint64_t cdb_off; bool copy_to_data_area; - size_t data_length; + size_t data_length = tcmu_cmd_get_data_length(tcmu_cmd); DECLARE_BITMAP(old_bitmap, DATA_BLOCK_BITS); if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags)) @@ -433,11 +447,6 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) mb = udev->mb_addr; cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */ - data_length = se_cmd->data_length; - if (se_cmd->se_cmd_flags & SCF_BIDI) { - BUG_ON(!(se_cmd->t_bidi_data_sg && se_cmd->t_bidi_data_nents)); - data_length += se_cmd->t_bidi_data_sg->length; - } if ((command_size > (udev->cmdr_size / 2)) || data_length > udev->data_size) { pr_warn("TCMU: Request of size %zu/%zu is too big for %u/%zu " @@ -511,11 +520,14 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) entry->req.iov_dif_cnt = 0; /* Handle BIDI commands */ - iov_cnt = 0; - alloc_and_scatter_data_area(udev, se_cmd->t_bidi_data_sg, - se_cmd->t_bidi_data_nents, &iov, &iov_cnt, false); - entry->req.iov_bidi_cnt = iov_cnt; - + if (se_cmd->se_cmd_flags & SCF_BIDI) { + iov_cnt = 0; + iov++; + alloc_and_scatter_data_area(udev, se_cmd->t_bidi_data_sg, + se_cmd->t_bidi_data_nents, &iov, &iov_cnt, + false); + entry->req.iov_bidi_cnt = iov_cnt; + } /* cmd's data_bitmap is what changed in process */ bitmap_xor(tcmu_cmd->data_bitmap, old_bitmap, udev->data_bitmap, DATA_BLOCK_BITS); -- cgit v1.2.3 From abe342a5b4b5aa579f6bf40ba73447c699e6b579 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Mon, 27 Mar 2017 17:07:41 +0800 Subject: tcmu: Fix wrongly calculating of the base_command_size The t_data_nents and t_bidi_data_nents are the numbers of the segments, but it couldn't be sure the block size equals to size of the segment. For the worst case, all the blocks are discontiguous and there will need the same number of iovecs, that's to say: blocks == iovs. So here just set the number of iovs to block count needed by tcmu cmd. Tested-by: Ilias Tsitsimpis Reviewed-by: Mike Christie Signed-off-by: Xiubo Li Cc: stable@vger.kernel.org # 3.18+ Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index e58dfd4fe448..9885d1b521fe 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -408,6 +408,13 @@ static inline size_t tcmu_cmd_get_data_length(struct tcmu_cmd *tcmu_cmd) return data_length; } +static inline uint32_t tcmu_cmd_get_block_cnt(struct tcmu_cmd *tcmu_cmd) +{ + size_t data_length = tcmu_cmd_get_data_length(tcmu_cmd); + + return data_length / DATA_BLOCK_SIZE; +} + static sense_reason_t tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) { @@ -435,8 +442,7 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) * expensive to tell how many regions are freed in the bitmap */ base_command_size = max(offsetof(struct tcmu_cmd_entry, - req.iov[se_cmd->t_bidi_data_nents + - se_cmd->t_data_nents]), + req.iov[tcmu_cmd_get_block_cnt(tcmu_cmd)]), sizeof(struct tcmu_cmd_entry)); command_size = base_command_size + round_up(scsi_command_size(se_cmd->t_task_cdb), TCMU_OP_ALIGN_SIZE); -- cgit v1.2.3 From 8a146fbe1f1461aebc9b8f06a0f22e1a8a4f0dd1 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 24 Mar 2017 11:08:47 +0100 Subject: gpio: acpi: Call enable_irq_wake for _IAE GpioInts with Wake set On Bay Trail / Cherry Trail systems with a LID switch, the LID switch is often connect to a gpioint handled by an _IAE event handler. Before this commit such systems would not wake up when opening the lid, requiring the powerbutton to be pressed after opening the lid to wakeup. Note that Bay Trail / Cherry Trail systems use suspend-to-idle, so the interrupts are generated anyway on those lines on lid switch changes, but they are treated by the IRQ subsystem as spurious while suspended if not marked as wakeup IRQs. This commit calls enable_irq_wake() for _IAE GpioInts with a valid event handler which have their Wake flag set. This fixes such systems not waking up when opening the lid. Signed-off-by: Hans de Goede Acked-by: Mika Westerberg Reviewed-by: Andy Shevchenko Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-acpi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 9b37a3692b3f..8e318f449d23 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -266,6 +266,9 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares, goto fail_free_event; } + if (agpio->wake_capable == ACPI_WAKE_CAPABLE) + enable_irq_wake(irq); + list_add_tail(&event->node, &acpi_gpio->events); return AE_OK; @@ -339,6 +342,9 @@ void acpi_gpiochip_free_interrupts(struct gpio_chip *chip) list_for_each_entry_safe_reverse(event, ep, &acpi_gpio->events, node) { struct gpio_desc *desc; + if (irqd_is_wakeup_set(irq_get_irq_data(event->irq))) + disable_irq_wake(event->irq); + free_irq(event->irq, event); desc = event->desc; if (WARN_ON(IS_ERR(desc))) -- cgit v1.2.3 From 693bdaa164b40b7aa6018b98af6f7e40dbd52457 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 23 Mar 2017 13:21:38 -0700 Subject: ACPI / gpio: do not fall back to parsing _CRS when we get a deferral If, while locating GPIOs by name, we get probe deferral, we should immediately report it to caller rather than trying to fall back to parsing unnamed GPIOs from _CRS block. Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Acked-by: Mika Westerberg Acked-and-Tested-by: Hans de Goede Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-acpi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 8e318f449d23..2bd683e2be02 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -577,8 +577,10 @@ struct gpio_desc *acpi_find_gpio(struct device *dev, } desc = acpi_get_gpiod_by_index(adev, propname, idx, &info); - if (!IS_ERR(desc) || (PTR_ERR(desc) == -EPROBE_DEFER)) + if (!IS_ERR(desc)) break; + if (PTR_ERR(desc) == -EPROBE_DEFER) + return ERR_CAST(desc); } /* Then from plain _CRS GPIOs */ -- cgit v1.2.3 From 8b4073596997f2ccbf68d8e72e07b827388a4536 Mon Sep 17 00:00:00 2001 From: Aaron Armstrong Skomra Date: Wed, 29 Mar 2017 10:35:39 -0700 Subject: HID: wacom: Don't add ghost interface as shared data A previous commit (below) adds a check for already probed interfaces to Wacom's matching heuristic. Unfortunately this causes the Bamboo Pen (CTL-460) to match itself to its 'ghost' touch interface. After subsequent changes to the driver this match to the ghost causes the kernel to crash. This patch avoids calling wacom_add_shared_data() for the BAMBOO_PEN's ghost touch interface. Fixes: 41372d5d40e7 ("HID: wacom: Augment 'oVid' and 'oPid' with heuristics for HID_GENERIC") Cc: stable # 4.9 Signed-off-by: Aaron Armstrong Skomra Signed-off-by: Jiri Kosina --- drivers/hid/wacom_sys.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index 994bddc55b82..b676f02d4b7f 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -2165,6 +2165,14 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless) wacom_update_name(wacom, wireless ? " (WL)" : ""); + /* pen only Bamboo neither support touch nor pad */ + if ((features->type == BAMBOO_PEN) && + ((features->device_type & WACOM_DEVICETYPE_TOUCH) || + (features->device_type & WACOM_DEVICETYPE_PAD))) { + error = -ENODEV; + goto fail; + } + error = wacom_add_shared_data(hdev); if (error) goto fail; @@ -2212,14 +2220,6 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless) goto fail_quirks; } - /* pen only Bamboo neither support touch nor pad */ - if ((features->type == BAMBOO_PEN) && - ((features->device_type & WACOM_DEVICETYPE_TOUCH) || - (features->device_type & WACOM_DEVICETYPE_PAD))) { - error = -ENODEV; - goto fail_quirks; - } - if (features->device_type & WACOM_DEVICETYPE_WL_MONITOR) error = hid_hw_open(hdev); -- cgit v1.2.3 From 4d20c332de377fa5b06c46f6f34174c10dd998e4 Mon Sep 17 00:00:00 2001 From: Aaron Armstrong Skomra Date: Wed, 29 Mar 2017 11:41:28 -0700 Subject: HID: wacom: call _query_tablet_data() for BAMBOO_TOUCH Commit a544c619a54b ("HID: wacom: do not attempt to switch mode while in probe") introduces delayed work for querying (setting the mode) on all tablets. Bamboo Touch (056a:00d0) has a ghost interface which claims to be a pen device. Though this device can be removed, we have to set the mode on the ghost pen interface before we remove it. After the aforementioned delay was introduced the device was being removed before the mode setting could be executed. Signed-off-by: Aaron Armstrong Skomra Signed-off-by: Jiri Kosina --- drivers/hid/wacom_sys.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index b676f02d4b7f..e2666ef84dc1 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -2216,6 +2216,8 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless) /* touch only Bamboo doesn't support pen */ if ((features->type == BAMBOO_TOUCH) && (features->device_type & WACOM_DEVICETYPE_PEN)) { + cancel_delayed_work_sync(&wacom->init_work); + _wacom_query_tablet_data(wacom); error = -ENODEV; goto fail_quirks; } -- cgit v1.2.3 From f7652afa8eadb416b23eb57dec6f158529942041 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 27 Mar 2017 11:09:08 +0200 Subject: drm/vmwgfx: Type-check lookups of fence objects A malicious caller could otherwise hand over handles to other objects causing all sorts of interesting problems. Testing done: Ran a Fedora 25 desktop using both Xorg and gnome-shell/Wayland. Cc: Signed-off-by: Thomas Hellstrom Reviewed-by: Sinclair Yeh --- drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 77 +++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 6541dd8b82dc..4076063e0fdd 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -538,7 +538,7 @@ int vmw_fence_create(struct vmw_fence_manager *fman, struct vmw_fence_obj **p_fence) { struct vmw_fence_obj *fence; - int ret; + int ret; fence = kzalloc(sizeof(*fence), GFP_KERNEL); if (unlikely(fence == NULL)) @@ -701,6 +701,41 @@ void vmw_fence_fifo_up(struct vmw_fence_manager *fman) } +/** + * vmw_fence_obj_lookup - Look up a user-space fence object + * + * @tfile: A struct ttm_object_file identifying the caller. + * @handle: A handle identifying the fence object. + * @return: A struct vmw_user_fence base ttm object on success or + * an error pointer on failure. + * + * The fence object is looked up and type-checked. The caller needs + * to have opened the fence object first, but since that happens on + * creation and fence objects aren't shareable, that's not an + * issue currently. + */ +static struct ttm_base_object * +vmw_fence_obj_lookup(struct ttm_object_file *tfile, u32 handle) +{ + struct ttm_base_object *base = ttm_base_object_lookup(tfile, handle); + + if (!base) { + pr_err("Invalid fence object handle 0x%08lx.\n", + (unsigned long)handle); + return ERR_PTR(-EINVAL); + } + + if (base->refcount_release != vmw_user_fence_base_release) { + pr_err("Invalid fence object handle 0x%08lx.\n", + (unsigned long)handle); + ttm_base_object_unref(&base); + return ERR_PTR(-EINVAL); + } + + return base; +} + + int vmw_fence_obj_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { @@ -726,13 +761,9 @@ int vmw_fence_obj_wait_ioctl(struct drm_device *dev, void *data, arg->kernel_cookie = jiffies + wait_timeout; } - base = ttm_base_object_lookup(tfile, arg->handle); - if (unlikely(base == NULL)) { - printk(KERN_ERR "Wait invalid fence object handle " - "0x%08lx.\n", - (unsigned long)arg->handle); - return -EINVAL; - } + base = vmw_fence_obj_lookup(tfile, arg->handle); + if (IS_ERR(base)) + return PTR_ERR(base); fence = &(container_of(base, struct vmw_user_fence, base)->fence); @@ -771,13 +802,9 @@ int vmw_fence_obj_signaled_ioctl(struct drm_device *dev, void *data, struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; struct vmw_private *dev_priv = vmw_priv(dev); - base = ttm_base_object_lookup(tfile, arg->handle); - if (unlikely(base == NULL)) { - printk(KERN_ERR "Fence signaled invalid fence object handle " - "0x%08lx.\n", - (unsigned long)arg->handle); - return -EINVAL; - } + base = vmw_fence_obj_lookup(tfile, arg->handle); + if (IS_ERR(base)) + return PTR_ERR(base); fence = &(container_of(base, struct vmw_user_fence, base)->fence); fman = fman_from_fence(fence); @@ -1024,6 +1051,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data, (struct drm_vmw_fence_event_arg *) data; struct vmw_fence_obj *fence = NULL; struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv); + struct ttm_object_file *tfile = vmw_fp->tfile; struct drm_vmw_fence_rep __user *user_fence_rep = (struct drm_vmw_fence_rep __user *)(unsigned long) arg->fence_rep; @@ -1037,15 +1065,11 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data, */ if (arg->handle) { struct ttm_base_object *base = - ttm_base_object_lookup_for_ref(dev_priv->tdev, - arg->handle); - - if (unlikely(base == NULL)) { - DRM_ERROR("Fence event invalid fence object handle " - "0x%08lx.\n", - (unsigned long)arg->handle); - return -EINVAL; - } + vmw_fence_obj_lookup(tfile, arg->handle); + + if (IS_ERR(base)) + return PTR_ERR(base); + fence = &(container_of(base, struct vmw_user_fence, base)->fence); (void) vmw_fence_obj_reference(fence); @@ -1053,7 +1077,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data, if (user_fence_rep != NULL) { bool existed; - ret = ttm_ref_object_add(vmw_fp->tfile, base, + ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, &existed); if (unlikely(ret != 0)) { DRM_ERROR("Failed to reference a fence " @@ -1097,8 +1121,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data, return 0; out_no_create: if (user_fence_rep != NULL) - ttm_ref_object_base_unref(vmw_fpriv(file_priv)->tfile, - handle, TTM_REF_USAGE); + ttm_ref_object_base_unref(tfile, handle, TTM_REF_USAGE); out_no_ref_obj: vmw_fence_obj_unreference(&fence); return ret; -- cgit v1.2.3 From 36274ab8c596f1240c606bb514da329add2a1bcd Mon Sep 17 00:00:00 2001 From: Murray McAllister Date: Mon, 27 Mar 2017 11:12:53 +0200 Subject: drm/vmwgfx: NULL pointer dereference in vmw_surface_define_ioctl() Before memory allocations vmw_surface_define_ioctl() checks the upper-bounds of a user-supplied size, but does not check if the supplied size is 0. Add check to avoid NULL pointer dereferences. Cc: Signed-off-by: Murray McAllister Reviewed-by: Sinclair Yeh --- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index b445ce9b9757..f410502cb075 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -716,8 +716,8 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i) num_sizes += req->mip_levels[i]; - if (num_sizes > DRM_VMW_MAX_SURFACE_FACES * - DRM_VMW_MAX_MIP_LEVELS) + if (num_sizes > DRM_VMW_MAX_SURFACE_FACES * DRM_VMW_MAX_MIP_LEVELS || + num_sizes == 0) return -EINVAL; size = vmw_user_surface_size + 128 + -- cgit v1.2.3 From 63774069d9527a1aeaa4aa20e929ef5e8e9ecc38 Mon Sep 17 00:00:00 2001 From: Murray McAllister Date: Mon, 27 Mar 2017 11:15:12 +0200 Subject: drm/vmwgfx: avoid calling vzalloc with a 0 size in vmw_get_cap_3d_ioctl() In vmw_get_cap_3d_ioctl(), a user can supply 0 for a size that is used in vzalloc(). This eventually calls dump_stack() (in warn_alloc()), which can leak useful addresses to dmesg. Add check to avoid a size of 0. Cc: Signed-off-by: Murray McAllister Reviewed-by: Sinclair Yeh --- drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c index b8c6a03c8c54..1802d0e7fab8 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c @@ -186,7 +186,7 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data, bool gb_objects = !!(dev_priv->capabilities & SVGA_CAP_GBOBJECTS); struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv); - if (unlikely(arg->pad64 != 0)) { + if (unlikely(arg->pad64 != 0 || arg->max_size == 0)) { DRM_ERROR("Illegal GET_3D_CAP argument.\n"); return -EINVAL; } -- cgit v1.2.3 From fe25deb7737ce6c0879ccf79c99fa1221d428bf2 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 27 Mar 2017 11:21:25 +0200 Subject: drm/ttm, drm/vmwgfx: Relax permission checking when opening surfaces Previously, when a surface was opened using a legacy (non prime) handle, it was verified to have been created by a client in the same master realm. Relax this so that opening is also allowed recursively if the client already has the surface open. This works around a regression in svga mesa where opening of a shared surface is used recursively to obtain surface information. Cc: Signed-off-by: Thomas Hellstrom Reviewed-by: Sinclair Yeh --- drivers/gpu/drm/ttm/ttm_object.c | 10 +++++++--- drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 6 ++---- drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 4 ++-- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 22 +++++++++------------- include/drm/ttm/ttm_object.h | 5 ++++- 5 files changed, 24 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c index fdb451e3ec01..d750140bafbc 100644 --- a/drivers/gpu/drm/ttm/ttm_object.c +++ b/drivers/gpu/drm/ttm/ttm_object.c @@ -179,7 +179,7 @@ int ttm_base_object_init(struct ttm_object_file *tfile, if (unlikely(ret != 0)) goto out_err0; - ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL); + ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL, false); if (unlikely(ret != 0)) goto out_err1; @@ -318,7 +318,8 @@ EXPORT_SYMBOL(ttm_ref_object_exists); int ttm_ref_object_add(struct ttm_object_file *tfile, struct ttm_base_object *base, - enum ttm_ref_type ref_type, bool *existed) + enum ttm_ref_type ref_type, bool *existed, + bool require_existed) { struct drm_open_hash *ht = &tfile->ref_hash[ref_type]; struct ttm_ref_object *ref; @@ -345,6 +346,9 @@ int ttm_ref_object_add(struct ttm_object_file *tfile, } rcu_read_unlock(); + if (require_existed) + return -EPERM; + ret = ttm_mem_global_alloc(mem_glob, sizeof(*ref), false, false); if (unlikely(ret != 0)) @@ -635,7 +639,7 @@ int ttm_prime_fd_to_handle(struct ttm_object_file *tfile, prime = (struct ttm_prime_object *) dma_buf->priv; base = &prime->base; *handle = base->hash.key; - ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL); + ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL, false); dma_buf_put(dma_buf); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 4076063e0fdd..6b2708b4eafe 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -1075,10 +1075,8 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data, (void) vmw_fence_obj_reference(fence); if (user_fence_rep != NULL) { - bool existed; - - ret = ttm_ref_object_add(tfile, base, - TTM_REF_USAGE, &existed); + ret = ttm_ref_object_add(vmw_fp->tfile, base, + TTM_REF_USAGE, NULL, false); if (unlikely(ret != 0)) { DRM_ERROR("Failed to reference a fence " "object.\n"); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 65b3f0369636..bf23153d4f55 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -589,7 +589,7 @@ static int vmw_user_dmabuf_synccpu_grab(struct vmw_user_dma_buffer *user_bo, return ret; ret = ttm_ref_object_add(tfile, &user_bo->prime.base, - TTM_REF_SYNCCPU_WRITE, &existed); + TTM_REF_SYNCCPU_WRITE, &existed, false); if (ret != 0 || existed) ttm_bo_synccpu_write_release(&user_bo->dma.base); @@ -773,7 +773,7 @@ int vmw_user_dmabuf_reference(struct ttm_object_file *tfile, *handle = user_bo->prime.base.hash.key; return ttm_ref_object_add(tfile, &user_bo->prime.base, - TTM_REF_USAGE, NULL); + TTM_REF_USAGE, NULL, false); } /* diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index f410502cb075..adc023fe67f3 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -891,17 +891,16 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv, uint32_t handle; struct ttm_base_object *base; int ret; + bool require_exist = false; if (handle_type == DRM_VMW_HANDLE_PRIME) { ret = ttm_prime_fd_to_handle(tfile, u_handle, &handle); if (unlikely(ret != 0)) return ret; } else { - if (unlikely(drm_is_render_client(file_priv))) { - DRM_ERROR("Render client refused legacy " - "surface reference.\n"); - return -EACCES; - } + if (unlikely(drm_is_render_client(file_priv))) + require_exist = true; + if (ACCESS_ONCE(vmw_fpriv(file_priv)->locked_master)) { DRM_ERROR("Locked master refused legacy " "surface reference.\n"); @@ -929,17 +928,14 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv, /* * Make sure the surface creator has the same - * authenticating master. + * authenticating master, or is already registered with us. */ if (drm_is_primary_client(file_priv) && - user_srf->master != file_priv->master) { - DRM_ERROR("Trying to reference surface outside of" - " master domain.\n"); - ret = -EACCES; - goto out_bad_resource; - } + user_srf->master != file_priv->master) + require_exist = true; - ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL); + ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL, + require_exist); if (unlikely(ret != 0)) { DRM_ERROR("Could not add a reference to a surface.\n"); goto out_bad_resource; diff --git a/include/drm/ttm/ttm_object.h b/include/drm/ttm/ttm_object.h index ed953f98f0e1..1487011fe057 100644 --- a/include/drm/ttm/ttm_object.h +++ b/include/drm/ttm/ttm_object.h @@ -229,6 +229,8 @@ extern void ttm_base_object_unref(struct ttm_base_object **p_base); * @ref_type: The type of reference. * @existed: Upon completion, indicates that an identical reference object * already existed, and the refcount was upped on that object instead. + * @require_existed: Fail with -EPERM if an identical ref object didn't + * already exist. * * Checks that the base object is shareable and adds a ref object to it. * @@ -243,7 +245,8 @@ extern void ttm_base_object_unref(struct ttm_base_object **p_base); */ extern int ttm_ref_object_add(struct ttm_object_file *tfile, struct ttm_base_object *base, - enum ttm_ref_type ref_type, bool *existed); + enum ttm_ref_type ref_type, bool *existed, + bool require_existed); extern bool ttm_ref_object_exists(struct ttm_object_file *tfile, struct ttm_base_object *base); -- cgit v1.2.3 From 3ce7803cf3a7bc52c0eb9f516de8b72a0305ad57 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 27 Mar 2017 12:38:25 +0200 Subject: drm/ttm: Avoid calling drm_ht_remove from atomic context On recent kernels, calling drm_ht_remove triggers a might_sleep() warning from within vfree(). So avoid calling it from atomic context. The use-cases we fix here are both from destructors so there should be no concurrent use of the hash tables. Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul Reviewed-by: Sinclair Yeh --- drivers/gpu/drm/ttm/ttm_object.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c index d750140bafbc..26a7ad0f4789 100644 --- a/drivers/gpu/drm/ttm/ttm_object.c +++ b/drivers/gpu/drm/ttm/ttm_object.c @@ -453,10 +453,10 @@ void ttm_object_file_release(struct ttm_object_file **p_tfile) ttm_ref_object_release(&ref->kref); } + spin_unlock(&tfile->lock); for (i = 0; i < TTM_REF_NUM; ++i) drm_ht_remove(&tfile->ref_hash[i]); - spin_unlock(&tfile->lock); ttm_object_file_unref(&tfile); } EXPORT_SYMBOL(ttm_object_file_release); @@ -533,9 +533,7 @@ void ttm_object_device_release(struct ttm_object_device **p_tdev) *p_tdev = NULL; - spin_lock(&tdev->object_lock); drm_ht_remove(&tdev->object_hash); - spin_unlock(&tdev->object_lock); kfree(tdev); } -- cgit v1.2.3 From 53e16798b0864464c5444a204e1bb93ae246c429 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 27 Mar 2017 13:06:05 +0200 Subject: drm/vmwgfx: Remove getparam error message The mesa winsys sometimes uses unimplemented parameter requests to check for features. Remove the error message to avoid bloating the kernel log. Cc: Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul Reviewed-by: Sinclair Yeh --- drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c index 1802d0e7fab8..5ec24fd801cd 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c @@ -114,8 +114,6 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data, param->value = dev_priv->has_dx; break; default: - DRM_ERROR("Illegal vmwgfx get param request: %d\n", - param->param); return -EINVAL; } -- cgit v1.2.3 From e7e11f99564222d82f0ce84bd521e57d78a6b678 Mon Sep 17 00:00:00 2001 From: Li Qiang Date: Mon, 27 Mar 2017 20:10:53 -0700 Subject: drm/vmwgfx: fix integer overflow in vmw_surface_define_ioctl() In vmw_surface_define_ioctl(), the 'num_sizes' is the sum of the 'req->mip_levels' array. This array can be assigned any value from the user space. As both the 'num_sizes' and the array is uint32_t, it is easy to make 'num_sizes' overflow. The later 'mip_levels' is used as the loop count. This can lead an oob write. Add the check of 'req->mip_levels' to avoid this. Cc: Signed-off-by: Li Qiang Reviewed-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index adc023fe67f3..05fa092c942b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -713,8 +713,11 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, 128; num_sizes = 0; - for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i) + for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i) { + if (req->mip_levels[i] > DRM_VMW_MAX_MIP_LEVELS) + return -EINVAL; num_sizes += req->mip_levels[i]; + } if (num_sizes > DRM_VMW_MAX_SURFACE_FACES * DRM_VMW_MAX_MIP_LEVELS || num_sizes == 0) -- cgit v1.2.3 From f85726905745fb4f6e15c68e2ade9da5390f8d89 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Thu, 30 Mar 2017 11:32:05 +0800 Subject: drm/i915/gvt: exclude cfg space from failsafe mode When test GVTg as below scenario: VM boot --> failsafe --> kill qemu --> VM boot. Qemu report error at the second boot: ERROR: PCI region size must be pow2 type=0x0, size=0x1fa1000 Qemu need access PCI_ROM_ADDRESS reg to determine the size of expansion PCI rom. The mechanism just like the BAR reg (write-read) and we should return the size 0 since we have no rom. If we reject the write to PCI_ROM_ADDRESS, Qemu cannot get the correct size of rom. Essentially, GVTg failsafe mode should not break PCI function. So we exclude cfg space from failsafe mode. This can fix above issue. v2: add Fixes and Bugzilla link. Fixes: fd64be636708d ("drm/i915/gvt: introduced failsafe mode into vgpu") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100296 Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cfg_space.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c index b7d7721e72fa..40af17ec6312 100644 --- a/drivers/gpu/drm/i915/gvt/cfg_space.c +++ b/drivers/gpu/drm/i915/gvt/cfg_space.c @@ -285,9 +285,6 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, { int ret; - if (vgpu->failsafe) - return 0; - if (WARN_ON(bytes > 4)) return -EINVAL; -- cgit v1.2.3 From 3f135e57a4f76d24ae8d8a490314331f0ced40c5 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 16 Mar 2017 14:31:33 -0500 Subject: x86/build: Mostly disable '-maccumulate-outgoing-args' The GCC '-maccumulate-outgoing-args' flag is enabled for most configs, mostly because of issues which are no longer relevant. For most configs, and with most recent versions of GCC, it's no longer needed. Clarify which cases need it, and only enable it for those cases. Also produce a compile-time error for the ftrace graph + mcount + '-Os' case, which will otherwise cause runtime failures. The main benefit of '-maccumulate-outgoing-args' is that it prevents an ugly prologue for functions which have aligned stacks. But removing the option also has some benefits: more readable argument saves, smaller text size, and (presumably) slightly improved performance. Here are the object size savings for 32-bit and 64-bit defconfig kernels: text data bss dec hex filename 10006710 3543328 1773568 15323606 e9d1d6 vmlinux.x86-32.before 9706358 3547424 1773568 15027350 e54c96 vmlinux.x86-32.after text data bss dec hex filename 10652105 4537576 843776 16033457 f4a6b1 vmlinux.x86-64.before 10639629 4537576 843776 16020981 f475f5 vmlinux.x86-64.after That comes out to a 3% text size improvement on x86-32 and a 0.1% text size improvement on x86-64. Signed-off-by: Josh Poimboeuf Cc: Andrew Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Linus Torvalds Cc: Pavel Machek Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170316193133.zrj6gug53766m6nn@treble Signed-off-by: Ingo Molnar --- arch/x86/Makefile | 35 +++++++++++++++++++++++++++++++---- arch/x86/Makefile_32.cpu | 18 ------------------ arch/x86/kernel/ftrace.c | 6 ++++++ scripts/Kbuild.include | 4 ++++ 4 files changed, 41 insertions(+), 22 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 2d449337a360..a94a4d10f2df 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -120,10 +120,6 @@ else # -funit-at-a-time shrinks the kernel .text considerably # unfortunately it makes reading oopses harder. KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time) - - # this works around some issues with generating unwind tables in older gccs - # newer gccs do it by default - KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args) endif ifdef CONFIG_X86_X32 @@ -147,6 +143,37 @@ ifeq ($(CONFIG_KMEMCHECK),y) KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy) endif +# +# If the function graph tracer is used with mcount instead of fentry, +# '-maccumulate-outgoing-args' is needed to prevent a GCC bug +# (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=42109) +# +ifdef CONFIG_FUNCTION_GRAPH_TRACER + ifndef CONFIG_HAVE_FENTRY + ACCUMULATE_OUTGOING_ARGS := 1 + else + ifeq ($(call cc-option-yn, -mfentry), n) + ACCUMULATE_OUTGOING_ARGS := 1 + endif + endif +endif + +# +# Jump labels need '-maccumulate-outgoing-args' for gcc < 4.5.2 to prevent a +# GCC bug (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=46226). There's no way +# to test for this bug at compile-time because the test case needs to execute, +# which is a no-go for cross compilers. So check the GCC version instead. +# +ifdef CONFIG_JUMP_LABEL + ifneq ($(ACCUMULATE_OUTGOING_ARGS), 1) + ACCUMULATE_OUTGOING_ARGS = $(call cc-if-fullversion, -lt, 040502, 1) + endif +endif + +ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1) + KBUILD_CFLAGS += -maccumulate-outgoing-args +endif + # Stackpointer is addressed different for 32 bit and 64 bit x86 sp-$(CONFIG_X86_32) := esp sp-$(CONFIG_X86_64) := rsp diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index 6647ed49c66c..a45eb15b7cf2 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu @@ -45,24 +45,6 @@ cflags-$(CONFIG_MGEODE_LX) += $(call cc-option,-march=geode,-march=pentium-mmx) # cpu entries cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686)) -# Work around the pentium-mmx code generator madness of gcc4.4.x which -# does stack alignment by generating horrible code _before_ the mcount -# prologue (push %ebp, mov %esp, %ebp) which breaks the function graph -# tracer assumptions. For i686, generic, core2 this is set by the -# compiler anyway -ifeq ($(CONFIG_FUNCTION_GRAPH_TRACER), y) -ADD_ACCUMULATE_OUTGOING_ARGS := y -endif - -# Work around to a bug with asm goto with first implementations of it -# in gcc causing gcc to mess up the push and pop of the stack in some -# uses of asm goto. -ifeq ($(CONFIG_JUMP_LABEL), y) -ADD_ACCUMULATE_OUTGOING_ARGS := y -endif - -cflags-$(ADD_ACCUMULATE_OUTGOING_ARGS) += $(call cc-option,-maccumulate-outgoing-args) - # Bug fix for binutils: this option is required in order to keep # binutils from generating NOPL instructions against our will. ifneq ($(CONFIG_X86_P6_NOP),y) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 8f3d9cf26ff9..cbd73eb42170 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -29,6 +29,12 @@ #include #include +#if defined(CONFIG_FUNCTION_GRAPH_TRACER) && \ + !defined(CC_USING_FENTRY) && \ + !defined(CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE) +# error The following combination is not supported: ((compiler missing -mfentry) || (CONFIG_X86_32 and !CONFIG_DYNAMIC_FTRACE)) && CONFIG_FUNCTION_GRAPH_TRACER && CONFIG_CC_OPTIMIZE_FOR_SIZE +#endif + #ifdef CONFIG_DYNAMIC_FTRACE int ftrace_arch_code_modify_prepare(void) diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index d6ca649cb0e9..afe3fd3af1e4 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -148,6 +148,10 @@ cc-fullversion = $(shell $(CONFIG_SHELL) \ # Usage: EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1) cc-ifversion = $(shell [ $(cc-version) $(1) $(2) ] && echo $(3) || echo $(4)) +# cc-if-fullversion +# Usage: EXTRA_CFLAGS += $(call cc-if-fullversion, -lt, 040502, -O1) +cc-if-fullversion = $(shell [ $(cc-fullversion) $(1) $(2) ] && echo $(3) || echo $(4)) + # cc-ldoption # Usage: ldflags += $(call cc-ldoption, -Wl$(comma)--hash-style=both) cc-ldoption = $(call try-run,\ -- cgit v1.2.3 From ac77a0c463c1d7d659861f7b6d1261970dd3282a Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 30 Mar 2017 14:20:45 +0900 Subject: block: do not put mq context in blk_mq_alloc_request_hctx In blk_mq_alloc_request_hctx, blk_mq_sched_get_request doesn't get sw context so we don't need to put the context with blk_mq_put_ctx. Unless, we will see preempt counter underflow. Cc: Omar Sandoval Signed-off-by: Minchan Kim Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- block/blk-mq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 6b6e7bc041db..935f2cc7c8c3 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -321,7 +321,6 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw, rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data); - blk_mq_put_ctx(alloc_data.ctx); blk_queue_exit(q); if (!rq) -- cgit v1.2.3 From 6d6e500391875cc372336c88e9a8af377be19c36 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 28 Mar 2017 13:13:43 -0700 Subject: drm/vc4: Allocate the right amount of space for boot-time CRTC state. Without this, the first modeset would dereference past the allocation when trying to free the mm node. Signed-off-by: Eric Anholt Tested-by: Stefan Wahren Link: http://patchwork.freedesktop.org/patch/msgid/20170328201343.4884-1-eric@anholt.net Fixes: d8dbf44f13b9 ("drm/vc4: Make the CRTCs cooperate on allocating display lists.") Cc: # v4.6+ Reviewed-by: Daniel Vetter --- drivers/gpu/drm/vc4/vc4_crtc.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 0c06844af445..9fcf05ca492b 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -846,6 +846,17 @@ static void vc4_crtc_destroy_state(struct drm_crtc *crtc, drm_atomic_helper_crtc_destroy_state(crtc, state); } +static void +vc4_crtc_reset(struct drm_crtc *crtc) +{ + if (crtc->state) + __drm_atomic_helper_crtc_destroy_state(crtc->state); + + crtc->state = kzalloc(sizeof(struct vc4_crtc_state), GFP_KERNEL); + if (crtc->state) + crtc->state->crtc = crtc; +} + static const struct drm_crtc_funcs vc4_crtc_funcs = { .set_config = drm_atomic_helper_set_config, .destroy = vc4_crtc_destroy, @@ -853,7 +864,7 @@ static const struct drm_crtc_funcs vc4_crtc_funcs = { .set_property = NULL, .cursor_set = NULL, /* handled by drm_mode_cursor_universal */ .cursor_move = NULL, /* handled by drm_mode_cursor_universal */ - .reset = drm_atomic_helper_crtc_reset, + .reset = vc4_crtc_reset, .atomic_duplicate_state = vc4_crtc_duplicate_state, .atomic_destroy_state = vc4_crtc_destroy_state, .gamma_set = vc4_crtc_gamma_set, -- cgit v1.2.3 From 335d2c2d192266358c5dfa64953a4c162f46e464 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Fri, 24 Mar 2017 09:53:56 -0400 Subject: arm64: fix NULL dereference in have_cpu_die() Commit 5c492c3f5255 ("arm64: smp: Add function to determine if cpus are stuck in the kernel") added a helper function to determine if die() is supported in cpu_ops. This function assumes a cpu will have a valid cpu_ops entry, but that may not be the case for cpu0 is spin-table or parking protocol is used to boot secondary cpus. In that case, there is a NULL dereference if have_cpu_die() is called by cpu0. So add a check for a valid cpu_ops before dereferencing it. Fixes: 5c492c3f5255 ("arm64: smp: Add function to determine if cpus are stuck in the kernel") Signed-off-by: Mark Salter Signed-off-by: Will Deacon --- arch/arm64/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index ef1caae02110..9b1036570586 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -944,7 +944,7 @@ static bool have_cpu_die(void) #ifdef CONFIG_HOTPLUG_CPU int any_cpu = raw_smp_processor_id(); - if (cpu_ops[any_cpu]->cpu_die) + if (cpu_ops[any_cpu] && cpu_ops[any_cpu]->cpu_die) return true; #endif return false; -- cgit v1.2.3 From 893dc68f1b18451e6d550b1884fc6be76e1bb90c Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Tue, 21 Mar 2017 09:24:11 -0500 Subject: rtlwifi: Fix scheduling while atomic splat Following commit cceb0a597320 ("rtlwifi: Add work queue for c2h cmd."), the following BUG is reported when rtl8723be is used: BUG: sleeping function called from invalid context at mm/slab.h:432 in_atomic(): 1, irqs_disabled(): 1, pid: 0, name: swapper/0 CPU: 0 PID: 0 Comm: swapper/0 Tainted: G W O 4.11.0-rc3-wl+ #276 Hardware name: TOSHIBA TECRA A50-A/TECRA A50-A, BIOS Version 4.50 09/29/2014 Call Trace: dump_stack+0x63/0x89 ___might_sleep+0xe9/0x130 __might_sleep+0x4a/0x90 kmem_cache_alloc_trace+0x19f/0x200 ? rtl_c2hcmd_enqueue+0x3e/0x110 [rtlwifi] rtl_c2hcmd_enqueue+0x3e/0x110 [rtlwifi] rtl8723be_c2h_packet_handler+0xac/0xc0 [rtl8723be] rtl8723be_rx_command_packet+0x37/0x5c [rtl8723be] _rtl_pci_rx_interrupt+0x200/0x6b0 [rtl_pci] _rtl_pci_interrupt+0x20c/0x5d0 [rtl_pci] __handle_irq_event_percpu+0x3f/0x1d0 handle_irq_event_percpu+0x23/0x60 handle_irq_event+0x3c/0x60 handle_fasteoi_irq+0xa2/0x170 handle_irq+0x20/0x30 do_IRQ+0x48/0xd0 common_interrupt+0x89/0x89 ... Although commit cceb0a597320 converted most c2h commands to use a work queue, the Bluetooth coexistence routines can be in atomic mode when they execute such a call. Fixes: cceb0a597320 ("rtlwifi: Add work queue for c2h cmd.") Signed-off-by: Larry Finger Cc: Ping-Ke Shih Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/base.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index caea350f05aa..bdc379178e87 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -1742,12 +1742,14 @@ void rtl_c2hcmd_enqueue(struct ieee80211_hw *hw, u8 tag, u8 len, u8 *val) unsigned long flags; struct rtl_c2hcmd *c2hcmd; - c2hcmd = kmalloc(sizeof(*c2hcmd), GFP_KERNEL); + c2hcmd = kmalloc(sizeof(*c2hcmd), + in_interrupt() ? GFP_ATOMIC : GFP_KERNEL); if (!c2hcmd) goto label_err; - c2hcmd->val = kmalloc(len, GFP_KERNEL); + c2hcmd->val = kmalloc(len, + in_interrupt() ? GFP_ATOMIC : GFP_KERNEL); if (!c2hcmd->val) goto label_err2; -- cgit v1.2.3 From d77facb88448cdeaaa3adba5b9704a48ac2ac8d6 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Tue, 28 Mar 2017 09:11:30 +0100 Subject: brcmfmac: use local iftype avoiding use-after-free of virtual interface A use-after-free was found using KASAN. In brcmf_p2p_del_if() the virtual interface is removed using call to brcmf_remove_interface(). After that the virtual interface instance has been freed and should not be referenced. Solve this by storing the nl80211 iftype in local variable, which is used in a couple of places anyway. Cc: stable@vger.kernel.org # 4.10.x, 4.9.x Reported-by: Daniel J Blueman Reviewed-by: Hante Meuleman Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Franky Lin Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c index de19c7c92bc6..85d949e03f79 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c @@ -2238,14 +2238,16 @@ int brcmf_p2p_del_vif(struct wiphy *wiphy, struct wireless_dev *wdev) struct brcmf_cfg80211_info *cfg = wiphy_priv(wiphy); struct brcmf_p2p_info *p2p = &cfg->p2p; struct brcmf_cfg80211_vif *vif; + enum nl80211_iftype iftype; bool wait_for_disable = false; int err; brcmf_dbg(TRACE, "delete P2P vif\n"); vif = container_of(wdev, struct brcmf_cfg80211_vif, wdev); + iftype = vif->wdev.iftype; brcmf_cfg80211_arm_vif_event(cfg, vif); - switch (vif->wdev.iftype) { + switch (iftype) { case NL80211_IFTYPE_P2P_CLIENT: if (test_bit(BRCMF_VIF_STATUS_DISCONNECTING, &vif->sme_state)) wait_for_disable = true; @@ -2275,7 +2277,7 @@ int brcmf_p2p_del_vif(struct wiphy *wiphy, struct wireless_dev *wdev) BRCMF_P2P_DISABLE_TIMEOUT); err = 0; - if (vif->wdev.iftype != NL80211_IFTYPE_P2P_DEVICE) { + if (iftype != NL80211_IFTYPE_P2P_DEVICE) { brcmf_vif_clear_mgmt_ies(vif); err = brcmf_p2p_release_p2p_if(vif); } @@ -2291,7 +2293,7 @@ int brcmf_p2p_del_vif(struct wiphy *wiphy, struct wireless_dev *wdev) brcmf_remove_interface(vif->ifp, true); brcmf_cfg80211_arm_vif_event(cfg, NULL); - if (vif->wdev.iftype != NL80211_IFTYPE_P2P_DEVICE) + if (iftype != NL80211_IFTYPE_P2P_DEVICE) p2p->bss_idx[P2PAPI_BSSCFG_CONNECTION].vif = NULL; return err; -- cgit v1.2.3 From fabbbee0eb0f4b763576ac1e2db4fc3bf6dcc0cc Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 30 Mar 2017 10:10:55 -0400 Subject: PNFS fix fallback to MDS if got error on commit to DS Upong receiving some errors (EACCES) on commit to the DS the code doesn't fallback to MDS and intead retrieds to the same DS again. Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker --- fs/nfs/filelayout/filelayout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 367f8eb19bfa..c9230fecc77e 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -202,10 +202,10 @@ static int filelayout_async_handle_error(struct rpc_task *task, task->tk_status); nfs4_mark_deviceid_unavailable(devid); pnfs_error_mark_layout_for_return(inode, lseg); - pnfs_set_lo_fail(lseg); rpc_wake_up(&tbl->slot_tbl_waitq); /* fall through */ default: + pnfs_set_lo_fail(lseg); reset: dprintk("%s Retry through MDS. Error %d\n", __func__, task->tk_status); -- cgit v1.2.3 From 2b6867c2ce76c596676bec7d2d525af525fdc6e2 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 29 Mar 2017 16:11:20 +0200 Subject: net/packet: fix overflow in check for priv area size Subtracting tp_sizeof_priv from tp_block_size and casting to int to check whether one is less then the other doesn't always work (both of them are unsigned ints). Compare them as is instead. Also cast tp_sizeof_priv to u64 before using BLK_PLUS_PRIV, as it can overflow inside BLK_PLUS_PRIV otherwise. Signed-off-by: Andrey Konovalov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a0dbe7ca8f72..2323ee35dc09 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4193,8 +4193,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, if (unlikely(!PAGE_ALIGNED(req->tp_block_size))) goto out; if (po->tp_version >= TPACKET_V3 && - (int)(req->tp_block_size - - BLK_PLUS_PRIV(req_u->req3.tp_sizeof_priv)) <= 0) + req->tp_block_size <= + BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv)) goto out; if (unlikely(req->tp_frame_size < po->tp_hdrlen + po->tp_reserve)) -- cgit v1.2.3 From 8f8d28e4d6d815a391285e121c3a53a0b6cb9e7b Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 29 Mar 2017 16:11:21 +0200 Subject: net/packet: fix overflow in check for tp_frame_nr When calculating rb->frames_per_block * req->tp_block_nr the result can overflow. Add a check that tp_block_size * tp_block_nr <= UINT_MAX. Since frames_per_block <= tp_block_size, the expression would never overflow. Signed-off-by: Andrey Konovalov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 2323ee35dc09..3ac286ebb2f4 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4205,6 +4205,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, rb->frames_per_block = req->tp_block_size / req->tp_frame_size; if (unlikely(rb->frames_per_block == 0)) goto out; + if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr)) + goto out; if (unlikely((rb->frames_per_block * req->tp_block_nr) != req->tp_frame_nr)) goto out; -- cgit v1.2.3 From bcc5364bdcfe131e6379363f089e7b4108d35b70 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 29 Mar 2017 16:11:22 +0200 Subject: net/packet: fix overflow in check for tp_reserve When calculating po->tp_hdrlen + po->tp_reserve the result can overflow. Fix by checking that tp_reserve <= INT_MAX on assign. Signed-off-by: Andrey Konovalov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 3ac286ebb2f4..8489beff5c25 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3665,6 +3665,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; + if (val > INT_MAX) + return -EINVAL; po->tp_reserve = val; return 0; } -- cgit v1.2.3 From 3dbcc105d5561e18ccd0842c7baab1c835562a37 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 30 Mar 2017 01:00:53 +0800 Subject: sctp: alloc stream info when initializing asoc When sending a msg without asoc established, sctp will send INIT packet first and then enqueue chunks. Before receiving INIT_ACK, stream info is not yet alloced. But enqueuing chunks needs to access stream info, like out stream state and out stream cnt. This patch is to fix it by allocing out stream info when initializing an asoc, allocing in stream and re-allocing out stream when processing init. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 3 ++- net/sctp/associola.c | 7 ++++++- net/sctp/sm_make_chunk.c | 9 ++------- net/sctp/stream.c | 43 +++++++++++++++++++++++++++++++++++-------- 4 files changed, 45 insertions(+), 17 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 8caa5ee9e290..a127b7c2c3c9 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -377,7 +377,8 @@ typedef struct sctp_sender_hb_info { __u64 hb_nonce; } sctp_sender_hb_info_t; -struct sctp_stream *sctp_stream_new(__u16 incnt, __u16 outcnt, gfp_t gfp); +int sctp_stream_new(struct sctp_association *asoc, gfp_t gfp); +int sctp_stream_init(struct sctp_association *asoc, gfp_t gfp); void sctp_stream_free(struct sctp_stream *stream); void sctp_stream_clear(struct sctp_stream *stream); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 0439a1a68367..0b26df5f6188 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -246,6 +246,9 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a if (!sctp_ulpq_init(&asoc->ulpq, asoc)) goto fail_init; + if (sctp_stream_new(asoc, gfp)) + goto fail_init; + /* Assume that peer would support both address types unless we are * told otherwise. */ @@ -264,7 +267,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* AUTH related initializations */ INIT_LIST_HEAD(&asoc->endpoint_shared_keys); if (sctp_auth_asoc_copy_shkeys(ep, asoc, gfp)) - goto fail_init; + goto stream_free; asoc->active_key_id = ep->active_key_id; asoc->prsctp_enable = ep->prsctp_enable; @@ -287,6 +290,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a return asoc; +stream_free: + sctp_stream_free(asoc->stream); fail_init: sock_put(asoc->base.sk); sctp_endpoint_put(asoc->ep); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 969a30c7bb54..118faff6a332 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2460,15 +2460,10 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, * association. */ if (!asoc->temp) { - int error; - - asoc->stream = sctp_stream_new(asoc->c.sinit_max_instreams, - asoc->c.sinit_num_ostreams, gfp); - if (!asoc->stream) + if (sctp_stream_init(asoc, gfp)) goto clean_up; - error = sctp_assoc_set_id(asoc, gfp); - if (error) + if (sctp_assoc_set_id(asoc, gfp)) goto clean_up; } diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 1c6cc04fa3a4..bbed997e1c5f 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -35,33 +35,60 @@ #include #include -struct sctp_stream *sctp_stream_new(__u16 incnt, __u16 outcnt, gfp_t gfp) +int sctp_stream_new(struct sctp_association *asoc, gfp_t gfp) { struct sctp_stream *stream; int i; stream = kzalloc(sizeof(*stream), gfp); if (!stream) - return NULL; + return -ENOMEM; - stream->outcnt = outcnt; + stream->outcnt = asoc->c.sinit_num_ostreams; stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp); if (!stream->out) { kfree(stream); - return NULL; + return -ENOMEM; } for (i = 0; i < stream->outcnt; i++) stream->out[i].state = SCTP_STREAM_OPEN; - stream->incnt = incnt; + asoc->stream = stream; + + return 0; +} + +int sctp_stream_init(struct sctp_association *asoc, gfp_t gfp) +{ + struct sctp_stream *stream = asoc->stream; + int i; + + /* Initial stream->out size may be very big, so free it and alloc + * a new one with new outcnt to save memory. + */ + kfree(stream->out); + stream->outcnt = asoc->c.sinit_num_ostreams; + stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp); + if (!stream->out) + goto nomem; + + for (i = 0; i < stream->outcnt; i++) + stream->out[i].state = SCTP_STREAM_OPEN; + + stream->incnt = asoc->c.sinit_max_instreams; stream->in = kcalloc(stream->incnt, sizeof(*stream->in), gfp); if (!stream->in) { kfree(stream->out); - kfree(stream); - return NULL; + goto nomem; } - return stream; + return 0; + +nomem: + asoc->stream = NULL; + kfree(stream); + + return -ENOMEM; } void sctp_stream_free(struct sctp_stream *stream) -- cgit v1.2.3 From 34d04f25a98cbc1065e87f10d94798bbe9a8af94 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Thu, 30 Mar 2017 20:41:49 +0800 Subject: arm64: remove redundant header file in current.h Commint 9d84fb27fa1 ("arm64: restore get_current() optimisation") has removed read_sysreg() and asm/sysreg.h is redundant. This patch removes asm/sysreg.h header file. Acked-by: Mark Rutland Signed-off-by: Shaokun Zhang Signed-off-by: Will Deacon --- arch/arm64/include/asm/current.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/include/asm/current.h b/arch/arm64/include/asm/current.h index 86c404171305..f6580d4afb0e 100644 --- a/arch/arm64/include/asm/current.h +++ b/arch/arm64/include/asm/current.h @@ -3,8 +3,6 @@ #include -#include - #ifndef __ASSEMBLY__ struct task_struct; -- cgit v1.2.3 From 9b3403ae56e13cdf45252f34db196a8f5f52b6ac Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 31 Mar 2017 02:44:17 +0900 Subject: arm64: drop non-existing vdso-offsets.h from .gitignore Since commit a66649dab350 ("arm64: fix vdso-offsets.h dependency"), include/generated/vdso-offsets.h is directly generated without arch/arm64/kernel/vdso/vdso-offsets.h. Signed-off-by: Masahiro Yamada Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso/.gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kernel/vdso/.gitignore b/arch/arm64/kernel/vdso/.gitignore index b8cc94e9698b..f8b69d84238e 100644 --- a/arch/arm64/kernel/vdso/.gitignore +++ b/arch/arm64/kernel/vdso/.gitignore @@ -1,2 +1 @@ vdso.lds -vdso-offsets.h -- cgit v1.2.3 From 923713b357455cfb9aca2cd3429cb0806a724ed2 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 26 Mar 2017 13:14:45 +0200 Subject: mmc: sdhci: Disable runtime pm when the sdio_irq is enabled SDIO cards may need clock to send the card interrupt to the host. On a cherrytrail tablet with a RTL8723BS wifi chip, without this patch pinging the tablet results in: PING 192.168.1.14 (192.168.1.14) 56(84) bytes of data. 64 bytes from 192.168.1.14: icmp_seq=1 ttl=64 time=78.6 ms 64 bytes from 192.168.1.14: icmp_seq=2 ttl=64 time=1760 ms 64 bytes from 192.168.1.14: icmp_seq=3 ttl=64 time=753 ms 64 bytes from 192.168.1.14: icmp_seq=4 ttl=64 time=3.88 ms 64 bytes from 192.168.1.14: icmp_seq=5 ttl=64 time=795 ms 64 bytes from 192.168.1.14: icmp_seq=6 ttl=64 time=1841 ms 64 bytes from 192.168.1.14: icmp_seq=7 ttl=64 time=810 ms 64 bytes from 192.168.1.14: icmp_seq=8 ttl=64 time=1860 ms 64 bytes from 192.168.1.14: icmp_seq=9 ttl=64 time=812 ms 64 bytes from 192.168.1.14: icmp_seq=10 ttl=64 time=48.6 ms Where as with this patch I get: PING 192.168.1.14 (192.168.1.14) 56(84) bytes of data. 64 bytes from 192.168.1.14: icmp_seq=1 ttl=64 time=3.96 ms 64 bytes from 192.168.1.14: icmp_seq=2 ttl=64 time=1.97 ms 64 bytes from 192.168.1.14: icmp_seq=3 ttl=64 time=17.2 ms 64 bytes from 192.168.1.14: icmp_seq=4 ttl=64 time=2.46 ms 64 bytes from 192.168.1.14: icmp_seq=5 ttl=64 time=2.83 ms 64 bytes from 192.168.1.14: icmp_seq=6 ttl=64 time=1.40 ms 64 bytes from 192.168.1.14: icmp_seq=7 ttl=64 time=2.10 ms 64 bytes from 192.168.1.14: icmp_seq=8 ttl=64 time=1.40 ms 64 bytes from 192.168.1.14: icmp_seq=9 ttl=64 time=2.04 ms 64 bytes from 192.168.1.14: icmp_seq=10 ttl=64 time=1.40 ms Cc: Dong Aisheng Cc: Ian W MORRISON Signed-off-by: Hans de Goede Acked-by: Adrian Hunter Acked-by: Dong Aisheng Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 9c1a099afbbe..63bc33a54d0d 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1830,6 +1830,9 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable) struct sdhci_host *host = mmc_priv(mmc); unsigned long flags; + if (enable) + pm_runtime_get_noresume(host->mmc->parent); + spin_lock_irqsave(&host->lock, flags); if (enable) host->flags |= SDHCI_SDIO_IRQ_ENABLED; @@ -1838,6 +1841,9 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable) sdhci_enable_sdio_irq_nolock(host, enable); spin_unlock_irqrestore(&host->lock, flags); + + if (!enable) + pm_runtime_put_noidle(host->mmc->parent); } static int sdhci_start_signal_voltage_switch(struct mmc_host *mmc, -- cgit v1.2.3 From d0918764c17b94c30bbb2619929b1719ff52707a Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Tue, 28 Mar 2017 11:00:45 +0200 Subject: mmc: sdhci-of-at91: fix MMC_DDR_52 timing selection The controller has different timings for MMC_TIMING_UHS_DDR50 and MMC_TIMING_MMC_DDR52. Configuring the controller with SDHCI_CTRL_UHS_DDR50, when MMC_TIMING_MMC_DDR52 timings are requested, is not correct and can lead to unexpected behavior. Signed-off-by: Ludovic Desroches Fixes: bb5f8ea4d514 ("mmc: sdhci-of-at91: introduce driver for the Atmel SDMMC") Cc: # 4.4+ Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-at91.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c index 7fd964256faa..d5430ed02a67 100644 --- a/drivers/mmc/host/sdhci-of-at91.c +++ b/drivers/mmc/host/sdhci-of-at91.c @@ -29,6 +29,8 @@ #include "sdhci-pltfm.h" +#define SDMMC_MC1R 0x204 +#define SDMMC_MC1R_DDR BIT(3) #define SDMMC_CACR 0x230 #define SDMMC_CACR_CAPWREN BIT(0) #define SDMMC_CACR_KEY (0x46 << 8) @@ -103,11 +105,18 @@ static void sdhci_at91_set_power(struct sdhci_host *host, unsigned char mode, sdhci_set_power_noreg(host, mode, vdd); } +void sdhci_at91_set_uhs_signaling(struct sdhci_host *host, unsigned int timing) +{ + if (timing == MMC_TIMING_MMC_DDR52) + sdhci_writeb(host, SDMMC_MC1R_DDR, SDMMC_MC1R); + sdhci_set_uhs_signaling(host, timing); +} + static const struct sdhci_ops sdhci_at91_sama5d2_ops = { .set_clock = sdhci_at91_set_clock, .set_bus_width = sdhci_set_bus_width, .reset = sdhci_reset, - .set_uhs_signaling = sdhci_set_uhs_signaling, + .set_uhs_signaling = sdhci_at91_set_uhs_signaling, .set_power = sdhci_at91_set_power, }; -- cgit v1.2.3 From 2b83878dd74a7c73bedcb6600663c1c46836e8af Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Wed, 29 Mar 2017 15:44:47 -0700 Subject: xtensa: make __pa work with uncached KSEG addresses When __pa is applied to virtual address in uncached KSEG region the result is incorrect. Fix it by checking if the original address is in the uncached KSEG and adjusting the result. It looks better than masking off bits because pfn_valid would correctly work with new __pa results and it may be made working in noMMU case, once we get definition for uncached memory view. This is required for the dma_common_mmap and DMA debug code to work correctly: they both indirectly use __pa with coherent DMA addresses. In case of DMA debug the visible effect is false reports that an address mapped for DMA is accessed by CPU. Cc: stable@vger.kernel.org Tested-by: Boris Brezillon Reviewed-by: Boris Brezillon Signed-off-by: Max Filippov --- arch/xtensa/include/asm/page.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/xtensa/include/asm/page.h b/arch/xtensa/include/asm/page.h index 976b1d70edbc..4ddbfd57a7c8 100644 --- a/arch/xtensa/include/asm/page.h +++ b/arch/xtensa/include/asm/page.h @@ -164,8 +164,21 @@ void copy_user_highpage(struct page *to, struct page *from, #define ARCH_PFN_OFFSET (PHYS_OFFSET >> PAGE_SHIFT) +#ifdef CONFIG_MMU +static inline unsigned long ___pa(unsigned long va) +{ + unsigned long off = va - PAGE_OFFSET; + + if (off >= XCHAL_KSEG_SIZE) + off -= XCHAL_KSEG_SIZE; + + return off + PHYS_OFFSET; +} +#define __pa(x) ___pa((unsigned long)(x)) +#else #define __pa(x) \ ((unsigned long) (x) - PAGE_OFFSET + PHYS_OFFSET) +#endif #define __va(x) \ ((void *)((unsigned long) (x) - PHYS_OFFSET + PAGE_OFFSET)) #define pfn_valid(pfn) \ -- cgit v1.2.3 From 0b98ca2a45e35dfe02f4512fa30b9f5a9900cb29 Mon Sep 17 00:00:00 2001 From: Suresh Reddy Date: Thu, 30 Mar 2017 00:58:32 -0400 Subject: be2net: Fix endian issue in logical link config command Use cpu_to_le32() for link_config variable in set_logical_link_config command as this variable is of type u32. Signed-off-by: Suresh Reddy Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_cmds.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 30e855004c57..02dd5246dfae 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -4939,8 +4939,9 @@ static int __be_cmd_set_logical_link_config(struct be_adapter *adapter, int link_state, int version, u8 domain) { - struct be_mcc_wrb *wrb; struct be_cmd_req_set_ll_link *req; + struct be_mcc_wrb *wrb; + u32 link_config = 0; int status; mutex_lock(&adapter->mcc_lock); @@ -4962,10 +4963,12 @@ __be_cmd_set_logical_link_config(struct be_adapter *adapter, if (link_state == IFLA_VF_LINK_STATE_ENABLE || link_state == IFLA_VF_LINK_STATE_AUTO) - req->link_config |= PLINK_ENABLE; + link_config |= PLINK_ENABLE; if (link_state == IFLA_VF_LINK_STATE_AUTO) - req->link_config |= PLINK_TRACK; + link_config |= PLINK_TRACK; + + req->link_config = cpu_to_le32(link_config); status = be_mcc_notify_wait(adapter); err: -- cgit v1.2.3 From c70c473396cbdec1168a6eff60e13029c0916854 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Wed, 29 Mar 2017 17:15:11 +0300 Subject: ARCv2: SLC: Make sure busy bit is set properly on SLC flushing As reported in STAR 9001165532, an SLC control reg read (for checking busy state) right after SLC invalidate command may incorrectly return NOT busy causing software to NOT spin-wait while operation is underway. (and for some reason this only happens if L1 cache is also disabled - as required by IOC programming model) Suggested workaround is to do an additional Control Reg read, which ensures the 2nd read gets the right status. Cc: stable@vger.kernel.org #4.10 Signed-off-by: Alexey Brodkin [vgupta: reworte changelog a bit] Signed-off-by: Vineet Gupta --- arch/arc/mm/cache.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index d408fa21a07c..928562967f3c 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -633,6 +633,9 @@ noinline static void slc_entire_op(const int op) write_aux_reg(ARC_REG_SLC_INVALIDATE, 1); + /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */ + read_aux_reg(r); + /* Important to wait for flush to complete */ while (read_aux_reg(r) & SLC_CTRL_BUSY); } -- cgit v1.2.3 From 4c6fabda1ad1dec6d274c098ef0a91809c74f2e3 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 30 Mar 2017 10:02:57 -0700 Subject: ARC: fix build warnings with !CONFIG_KPROBES | CC lib/nmi_backtrace.o | In file included from ../include/linux/kprobes.h:43:0, | from ../lib/nmi_backtrace.c:17: | ../arch/arc/include/asm/kprobes.h:57:13: warning: 'trap_is_kprobe' defined but not used [-Wunused-function] | static void trap_is_kprobe(unsigned long address, struct pt_regs *regs) | ^~~~~~~~~~~~~~ The warning started with 7d134b2ce6 ("kprobes: move kprobe declarations to asm-generic/kprobes.h") which started including unconditionally into exposing a stub function for !CONFIG_KPROBES to rest of world. Fix that by making the stub a macro Signed-off-by: Vineet Gupta --- arch/arc/include/asm/kprobes.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arc/include/asm/kprobes.h b/arch/arc/include/asm/kprobes.h index 00bdbe167615..2e52d18e6bc7 100644 --- a/arch/arc/include/asm/kprobes.h +++ b/arch/arc/include/asm/kprobes.h @@ -54,9 +54,7 @@ int kprobe_fault_handler(struct pt_regs *regs, unsigned long cause); void kretprobe_trampoline(void); void trap_is_kprobe(unsigned long address, struct pt_regs *regs); #else -static void trap_is_kprobe(unsigned long address, struct pt_regs *regs) -{ -} +#define trap_is_kprobe(address, regs) #endif /* CONFIG_KPROBES */ #endif /* _ARC_KPROBES_H */ -- cgit v1.2.3 From fa7e25cf13a6d0b82b5ed1008246f44d42e8422c Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 30 Oct 2016 17:28:16 -0700 Subject: target: Fix unknown fabric callback queue-full errors This patch fixes a set of queue-full response handling bugs, where outgoing responses are leaked when a fabric driver is propagating non -EAGAIN or -ENOMEM errors to target-core. It introduces TRANSPORT_COMPLETE_QF_ERR state used to signal when CHECK_CONDITION status should be generated, when fabric driver ->write_pending(), ->queue_data_in(), or ->queue_status() callbacks fail with non -EAGAIN or -ENOMEM errors, and data-transfer should not be retried. Note all fabric driver -EAGAIN and -ENOMEM errors are still retried indefinately with associated data-transfer callbacks, following existing queue-full logic. Also fix two missing ->queue_status() queue-full cases related to CMD_T_ABORTED w/ TAS status handling. Reported-by: Potnuri Bharat Teja Reviewed-by: Potnuri Bharat Teja Tested-by: Potnuri Bharat Teja Cc: Potnuri Bharat Teja Reported-by: Steve Wise Cc: Steve Wise Cc: Sagi Grimberg Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 102 ++++++++++++++++++++++----------- include/target/target_core_base.h | 1 + 2 files changed, 69 insertions(+), 34 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index b1a3cdb29468..a0cd56ee5fe9 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -64,8 +64,9 @@ struct kmem_cache *t10_alua_lba_map_cache; struct kmem_cache *t10_alua_lba_map_mem_cache; static void transport_complete_task_attr(struct se_cmd *cmd); +static int translate_sense_reason(struct se_cmd *cmd, sense_reason_t reason); static void transport_handle_queue_full(struct se_cmd *cmd, - struct se_device *dev); + struct se_device *dev, int err, bool write_pending); static int transport_put_cmd(struct se_cmd *cmd); static void target_complete_ok_work(struct work_struct *work); @@ -804,7 +805,8 @@ void target_qf_do_work(struct work_struct *work) if (cmd->t_state == TRANSPORT_COMPLETE_QF_WP) transport_write_pending_qf(cmd); - else if (cmd->t_state == TRANSPORT_COMPLETE_QF_OK) + else if (cmd->t_state == TRANSPORT_COMPLETE_QF_OK || + cmd->t_state == TRANSPORT_COMPLETE_QF_ERR) transport_complete_qf(cmd); } } @@ -1719,7 +1721,7 @@ void transport_generic_request_failure(struct se_cmd *cmd, } trace_target_cmd_complete(cmd); ret = cmd->se_tfo->queue_status(cmd); - if (ret == -EAGAIN || ret == -ENOMEM) + if (ret) goto queue_full; goto check_stop; default: @@ -1730,7 +1732,7 @@ void transport_generic_request_failure(struct se_cmd *cmd, } ret = transport_send_check_condition_and_sense(cmd, sense_reason, 0); - if (ret == -EAGAIN || ret == -ENOMEM) + if (ret) goto queue_full; check_stop: @@ -1739,8 +1741,7 @@ check_stop: return; queue_full: - cmd->t_state = TRANSPORT_COMPLETE_QF_OK; - transport_handle_queue_full(cmd, cmd->se_dev); + transport_handle_queue_full(cmd, cmd->se_dev, ret, false); } EXPORT_SYMBOL(transport_generic_request_failure); @@ -1977,13 +1978,29 @@ static void transport_complete_qf(struct se_cmd *cmd) int ret = 0; transport_complete_task_attr(cmd); + /* + * If a fabric driver ->write_pending() or ->queue_data_in() callback + * has returned neither -ENOMEM or -EAGAIN, assume it's fatal and + * the same callbacks should not be retried. Return CHECK_CONDITION + * if a scsi_status is not already set. + * + * If a fabric driver ->queue_status() has returned non zero, always + * keep retrying no matter what.. + */ + if (cmd->t_state == TRANSPORT_COMPLETE_QF_ERR) { + if (cmd->scsi_status) + goto queue_status; - if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) { - trace_target_cmd_complete(cmd); - ret = cmd->se_tfo->queue_status(cmd); - goto out; + cmd->se_cmd_flags |= SCF_EMULATED_TASK_SENSE; + cmd->scsi_status = SAM_STAT_CHECK_CONDITION; + cmd->scsi_sense_length = TRANSPORT_SENSE_BUFFER; + translate_sense_reason(cmd, TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE); + goto queue_status; } + if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) + goto queue_status; + switch (cmd->data_direction) { case DMA_FROM_DEVICE: if (cmd->scsi_status) @@ -2007,19 +2024,33 @@ queue_status: break; } -out: if (ret < 0) { - transport_handle_queue_full(cmd, cmd->se_dev); + transport_handle_queue_full(cmd, cmd->se_dev, ret, false); return; } transport_lun_remove_cmd(cmd); transport_cmd_check_stop_to_fabric(cmd); } -static void transport_handle_queue_full( - struct se_cmd *cmd, - struct se_device *dev) +static void transport_handle_queue_full(struct se_cmd *cmd, struct se_device *dev, + int err, bool write_pending) { + /* + * -EAGAIN or -ENOMEM signals retry of ->write_pending() and/or + * ->queue_data_in() callbacks from new process context. + * + * Otherwise for other errors, transport_complete_qf() will send + * CHECK_CONDITION via ->queue_status() instead of attempting to + * retry associated fabric driver data-transfer callbacks. + */ + if (err == -EAGAIN || err == -ENOMEM) { + cmd->t_state = (write_pending) ? TRANSPORT_COMPLETE_QF_WP : + TRANSPORT_COMPLETE_QF_OK; + } else { + pr_warn_ratelimited("Got unknown fabric queue status: %d\n", err); + cmd->t_state = TRANSPORT_COMPLETE_QF_ERR; + } + spin_lock_irq(&dev->qf_cmd_lock); list_add_tail(&cmd->se_qf_node, &cmd->se_dev->qf_cmd_list); atomic_inc_mb(&dev->dev_qf_count); @@ -2083,7 +2114,7 @@ static void target_complete_ok_work(struct work_struct *work) WARN_ON(!cmd->scsi_status); ret = transport_send_check_condition_and_sense( cmd, 0, 1); - if (ret == -EAGAIN || ret == -ENOMEM) + if (ret) goto queue_full; transport_lun_remove_cmd(cmd); @@ -2109,7 +2140,7 @@ static void target_complete_ok_work(struct work_struct *work) } else if (rc) { ret = transport_send_check_condition_and_sense(cmd, rc, 0); - if (ret == -EAGAIN || ret == -ENOMEM) + if (ret) goto queue_full; transport_lun_remove_cmd(cmd); @@ -2134,7 +2165,7 @@ queue_rsp: if (target_read_prot_action(cmd)) { ret = transport_send_check_condition_and_sense(cmd, cmd->pi_err, 0); - if (ret == -EAGAIN || ret == -ENOMEM) + if (ret) goto queue_full; transport_lun_remove_cmd(cmd); @@ -2144,7 +2175,7 @@ queue_rsp: trace_target_cmd_complete(cmd); ret = cmd->se_tfo->queue_data_in(cmd); - if (ret == -EAGAIN || ret == -ENOMEM) + if (ret) goto queue_full; break; case DMA_TO_DEVICE: @@ -2157,7 +2188,7 @@ queue_rsp: atomic_long_add(cmd->data_length, &cmd->se_lun->lun_stats.tx_data_octets); ret = cmd->se_tfo->queue_data_in(cmd); - if (ret == -EAGAIN || ret == -ENOMEM) + if (ret) goto queue_full; break; } @@ -2166,7 +2197,7 @@ queue_rsp: queue_status: trace_target_cmd_complete(cmd); ret = cmd->se_tfo->queue_status(cmd); - if (ret == -EAGAIN || ret == -ENOMEM) + if (ret) goto queue_full; break; default: @@ -2180,8 +2211,8 @@ queue_status: queue_full: pr_debug("Handling complete_ok QUEUE_FULL: se_cmd: %p," " data_direction: %d\n", cmd, cmd->data_direction); - cmd->t_state = TRANSPORT_COMPLETE_QF_OK; - transport_handle_queue_full(cmd, cmd->se_dev); + + transport_handle_queue_full(cmd, cmd->se_dev, ret, false); } void target_free_sgl(struct scatterlist *sgl, int nents) @@ -2449,18 +2480,14 @@ transport_generic_new_cmd(struct se_cmd *cmd) spin_unlock_irqrestore(&cmd->t_state_lock, flags); ret = cmd->se_tfo->write_pending(cmd); - if (ret == -EAGAIN || ret == -ENOMEM) + if (ret) goto queue_full; - /* fabric drivers should only return -EAGAIN or -ENOMEM as error */ - WARN_ON(ret); - - return (!ret) ? 0 : TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + return 0; queue_full: pr_debug("Handling write_pending QUEUE__FULL: se_cmd: %p\n", cmd); - cmd->t_state = TRANSPORT_COMPLETE_QF_WP; - transport_handle_queue_full(cmd, cmd->se_dev); + transport_handle_queue_full(cmd, cmd->se_dev, ret, true); return 0; } EXPORT_SYMBOL(transport_generic_new_cmd); @@ -2470,10 +2497,10 @@ static void transport_write_pending_qf(struct se_cmd *cmd) int ret; ret = cmd->se_tfo->write_pending(cmd); - if (ret == -EAGAIN || ret == -ENOMEM) { + if (ret) { pr_debug("Handling write_pending QUEUE__FULL: se_cmd: %p\n", cmd); - transport_handle_queue_full(cmd, cmd->se_dev); + transport_handle_queue_full(cmd, cmd->se_dev, ret, true); } } @@ -3011,6 +3038,8 @@ static int __transport_check_aborted_status(struct se_cmd *cmd, int send_status) __releases(&cmd->t_state_lock) __acquires(&cmd->t_state_lock) { + int ret; + assert_spin_locked(&cmd->t_state_lock); WARN_ON_ONCE(!irqs_disabled()); @@ -3034,7 +3063,9 @@ static int __transport_check_aborted_status(struct se_cmd *cmd, int send_status) trace_target_cmd_complete(cmd); spin_unlock_irq(&cmd->t_state_lock); - cmd->se_tfo->queue_status(cmd); + ret = cmd->se_tfo->queue_status(cmd); + if (ret) + transport_handle_queue_full(cmd, cmd->se_dev, ret, false); spin_lock_irq(&cmd->t_state_lock); return 1; @@ -3055,6 +3086,7 @@ EXPORT_SYMBOL(transport_check_aborted_status); void transport_send_task_abort(struct se_cmd *cmd) { unsigned long flags; + int ret; spin_lock_irqsave(&cmd->t_state_lock, flags); if (cmd->se_cmd_flags & (SCF_SENT_CHECK_CONDITION)) { @@ -3090,7 +3122,9 @@ send_abort: cmd->t_task_cdb[0], cmd->tag); trace_target_cmd_complete(cmd); - cmd->se_tfo->queue_status(cmd); + ret = cmd->se_tfo->queue_status(cmd); + if (ret) + transport_handle_queue_full(cmd, cmd->se_dev, ret, false); } static void target_tmr_work(struct work_struct *work) diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 2e282461cfa5..730ed3055336 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -117,6 +117,7 @@ enum transport_state_table { TRANSPORT_ISTATE_PROCESSING = 11, TRANSPORT_COMPLETE_QF_WP = 18, TRANSPORT_COMPLETE_QF_OK = 19, + TRANSPORT_COMPLETE_QF_ERR = 20, }; /* Used for struct se_cmd->se_cmd_flags */ -- cgit v1.2.3 From a4467018c2a7228f4ef58051f0511bd037bff264 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 30 Oct 2016 17:30:08 -0700 Subject: iscsi-target: Propigate queue_data_in + queue_status errors This patch changes iscsi-target to propagate iscsit_transport ->iscsit_queue_data_in() and ->iscsit_queue_status() callback errors, back up into target-core. This allows target-core to retry failed iscsit_transport callbacks using internal queue-full logic. Reported-by: Potnuri Bharat Teja Reviewed-by: Potnuri Bharat Teja Tested-by: Potnuri Bharat Teja Cc: Potnuri Bharat Teja Reported-by: Steve Wise Cc: Steve Wise Cc: Sagi Grimberg Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target.c | 3 +-- drivers/target/iscsi/iscsi_target_configfs.c | 13 +++++-------- drivers/target/iscsi/iscsi_target_util.c | 5 +++-- drivers/target/iscsi/iscsi_target_util.h | 2 +- 4 files changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index a91802432f2f..e3f9ed3690b7 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -485,8 +485,7 @@ static void iscsit_get_rx_pdu(struct iscsi_conn *); int iscsit_queue_rsp(struct iscsi_conn *conn, struct iscsi_cmd *cmd) { - iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state); - return 0; + return iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state); } EXPORT_SYMBOL(iscsit_queue_rsp); diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index bf40f03755dd..344e8448869c 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -1398,11 +1398,10 @@ static u32 lio_sess_get_initiator_sid( static int lio_queue_data_in(struct se_cmd *se_cmd) { struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + struct iscsi_conn *conn = cmd->conn; cmd->i_state = ISTATE_SEND_DATAIN; - cmd->conn->conn_transport->iscsit_queue_data_in(cmd->conn, cmd); - - return 0; + return conn->conn_transport->iscsit_queue_data_in(conn, cmd); } static int lio_write_pending(struct se_cmd *se_cmd) @@ -1431,16 +1430,14 @@ static int lio_write_pending_status(struct se_cmd *se_cmd) static int lio_queue_status(struct se_cmd *se_cmd) { struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + struct iscsi_conn *conn = cmd->conn; cmd->i_state = ISTATE_SEND_STATUS; if (cmd->se_cmd.scsi_status || cmd->sense_reason) { - iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state); - return 0; + return iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); } - cmd->conn->conn_transport->iscsit_queue_status(cmd->conn, cmd); - - return 0; + return conn->conn_transport->iscsit_queue_status(conn, cmd); } static void lio_queue_tm_rsp(struct se_cmd *se_cmd) diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index b4640338f8d8..7d3e2fcc26a0 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -567,7 +567,7 @@ static void iscsit_remove_cmd_from_immediate_queue( } } -void iscsit_add_cmd_to_response_queue( +int iscsit_add_cmd_to_response_queue( struct iscsi_cmd *cmd, struct iscsi_conn *conn, u8 state) @@ -578,7 +578,7 @@ void iscsit_add_cmd_to_response_queue( if (!qr) { pr_err("Unable to allocate memory for" " struct iscsi_queue_req\n"); - return; + return -ENOMEM; } INIT_LIST_HEAD(&qr->qr_list); qr->cmd = cmd; @@ -590,6 +590,7 @@ void iscsit_add_cmd_to_response_queue( spin_unlock_bh(&conn->response_queue_lock); wake_up(&conn->queues_wq); + return 0; } struct iscsi_queue_req *iscsit_get_cmd_from_response_queue(struct iscsi_conn *conn) diff --git a/drivers/target/iscsi/iscsi_target_util.h b/drivers/target/iscsi/iscsi_target_util.h index 8ff08856516a..9e4197af8708 100644 --- a/drivers/target/iscsi/iscsi_target_util.h +++ b/drivers/target/iscsi/iscsi_target_util.h @@ -31,7 +31,7 @@ extern int iscsit_find_cmd_for_recovery(struct iscsi_session *, struct iscsi_cmd struct iscsi_conn_recovery **, itt_t); extern void iscsit_add_cmd_to_immediate_queue(struct iscsi_cmd *, struct iscsi_conn *, u8); extern struct iscsi_queue_req *iscsit_get_cmd_from_immediate_queue(struct iscsi_conn *); -extern void iscsit_add_cmd_to_response_queue(struct iscsi_cmd *, struct iscsi_conn *, u8); +extern int iscsit_add_cmd_to_response_queue(struct iscsi_cmd *, struct iscsi_conn *, u8); extern struct iscsi_queue_req *iscsit_get_cmd_from_response_queue(struct iscsi_conn *); extern void iscsit_remove_cmd_from_tx_queues(struct iscsi_cmd *, struct iscsi_conn *); extern bool iscsit_conn_all_queues_empty(struct iscsi_conn *); -- cgit v1.2.3 From 555a65f66c3c4d9dd46a565418b0b655d861a723 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 30 Oct 2016 18:58:39 -0700 Subject: iser-target: Fix queue-full response handling This patch addresses two queue-full handling bugs in iser-target. The first is propagating isert_rdma_rw_ctx_post() return back to target-core via isert_put_datain() + isert_get_dataout() callbacks, in order to trigger queue-full logic in target-core. Note target-core expects -EAGAIN or -ENOMEM error to signal RDMA WRITE/READ data-transfer callbacks should be retried, after queue-full logic been invoked. Other types of errors propagated up from RDMA RW API will result in target-core generating internal CHECK_CONDITION status, avoiding subsequent isert_put_datain() and isert_get_dataout() iscsit_transport callback retry attempts. The second is to use transport_generic_request_failure() during T10-PI hw-offload errors in isert_rdma_write_done() and isert_rdma_read_done(), so CHECK_CONDITION queue-full is handled internally by target-core. Also add isert_put_response() T10-PI failure case fixme in isert_rdma_write_done(), which is currently not internally retried or released until session reinstatement. Reported-by: Potnuri Bharat Teja Reviewed-by: Potnuri Bharat Teja Tested-by: Potnuri Bharat Teja Cc: Potnuri Bharat Teja Reported-by: Steve Wise Cc: Steve Wise Cc: Sagi Grimberg Signed-off-by: Nicholas Bellinger --- drivers/infiniband/ulp/isert/ib_isert.c | 53 ++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 91cbe86b25c8..9b33c0c97468 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1659,10 +1659,23 @@ isert_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) ret = isert_check_pi_status(cmd, isert_cmd->rw.sig->sig_mr); isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn); - if (ret) - transport_send_check_condition_and_sense(cmd, cmd->pi_err, 0); - else - isert_put_response(isert_conn->conn, isert_cmd->iscsi_cmd); + if (ret) { + /* + * transport_generic_request_failure() expects to have + * plus two references to handle queue-full, so re-add + * one here as target-core will have already dropped + * it after the first isert_put_datain() callback. + */ + kref_get(&cmd->cmd_kref); + transport_generic_request_failure(cmd, cmd->pi_err); + } else { + /* + * XXX: isert_put_response() failure is not retried. + */ + ret = isert_put_response(isert_conn->conn, isert_cmd->iscsi_cmd); + if (ret) + pr_warn_ratelimited("isert_put_response() ret: %d\n", ret); + } } static void @@ -1699,13 +1712,15 @@ isert_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc) cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT; spin_unlock_bh(&cmd->istate_lock); - if (ret) { - target_put_sess_cmd(se_cmd); - transport_send_check_condition_and_sense(se_cmd, - se_cmd->pi_err, 0); - } else { + /* + * transport_generic_request_failure() will drop the extra + * se_cmd->cmd_kref reference after T10-PI error, and handle + * any non-zero ->queue_status() callback error retries. + */ + if (ret) + transport_generic_request_failure(se_cmd, se_cmd->pi_err); + else target_execute_cmd(se_cmd); - } } static void @@ -2171,26 +2186,28 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd) chain_wr = &isert_cmd->tx_desc.send_wr; } - isert_rdma_rw_ctx_post(isert_cmd, isert_conn, cqe, chain_wr); - isert_dbg("Cmd: %p posted RDMA_WRITE for iSER Data READ\n", isert_cmd); - return 1; + rc = isert_rdma_rw_ctx_post(isert_cmd, isert_conn, cqe, chain_wr); + isert_dbg("Cmd: %p posted RDMA_WRITE for iSER Data READ rc: %d\n", + isert_cmd, rc); + return rc; } static int isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery) { struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); + int ret; isert_dbg("Cmd: %p RDMA_READ data_length: %u write_data_done: %u\n", isert_cmd, cmd->se_cmd.data_length, cmd->write_data_done); isert_cmd->tx_desc.tx_cqe.done = isert_rdma_read_done; - isert_rdma_rw_ctx_post(isert_cmd, conn->context, - &isert_cmd->tx_desc.tx_cqe, NULL); + ret = isert_rdma_rw_ctx_post(isert_cmd, conn->context, + &isert_cmd->tx_desc.tx_cqe, NULL); - isert_dbg("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n", - isert_cmd); - return 0; + isert_dbg("Cmd: %p posted RDMA_READ memory for ISER Data WRITE rc: %d\n", + isert_cmd, ret); + return ret; } static int -- cgit v1.2.3 From 7a56dc8888be23f44158a85b92da45d545cbf548 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 22 Mar 2017 17:07:30 +0200 Subject: iser-target: avoid posting a recv buffer twice We pre-allocate our send-queues and might overflow them in case we have multi work-request operations which tend to occur for large RDMA transfers over devices with limited allowed sg elements. When we get to a queue-full condition we might retry again later, so track our receive buffers so we don't repost them for a retry case. Reported-by: Potnuri Bharat Teja Tested-by: Potnuri Bharat Teja Reviewed-by: Potnuri Bharat Teja Signed-off-by: Sagi Grimberg Signed-off-by: Nicholas Bellinger --- drivers/infiniband/ulp/isert/ib_isert.c | 12 ++++++++++++ drivers/infiniband/ulp/isert/ib_isert.h | 3 ++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 9b33c0c97468..fcbed35e95a8 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -817,6 +817,7 @@ isert_post_recvm(struct isert_conn *isert_conn, u32 count) rx_wr->sg_list = &rx_desc->rx_sg; rx_wr->num_sge = 1; rx_wr->next = rx_wr + 1; + rx_desc->in_use = false; } rx_wr--; rx_wr->next = NULL; /* mark end of work requests list */ @@ -835,6 +836,15 @@ isert_post_recv(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc) struct ib_recv_wr *rx_wr_failed, rx_wr; int ret; + if (!rx_desc->in_use) { + /* + * if the descriptor is not in-use we already reposted it + * for recv, so just silently return + */ + return 0; + } + + rx_desc->in_use = false; rx_wr.wr_cqe = &rx_desc->rx_cqe; rx_wr.sg_list = &rx_desc->rx_sg; rx_wr.num_sge = 1; @@ -1397,6 +1407,8 @@ isert_recv_done(struct ib_cq *cq, struct ib_wc *wc) return; } + rx_desc->in_use = true; + ib_dma_sync_single_for_cpu(ib_dev, rx_desc->dma_addr, ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index c02ada57d7f5..87d994de8c91 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -60,7 +60,7 @@ #define ISER_RX_PAD_SIZE (ISCSI_DEF_MAX_RECV_SEG_LEN + 4096 - \ (ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge) + \ - sizeof(struct ib_cqe))) + sizeof(struct ib_cqe) + sizeof(bool))) #define ISCSI_ISER_SG_TABLESIZE 256 @@ -85,6 +85,7 @@ struct iser_rx_desc { u64 dma_addr; struct ib_sge rx_sg; struct ib_cqe rx_cqe; + bool in_use; char pad[ISER_RX_PAD_SIZE]; } __packed; -- cgit v1.2.3 From d19c4643a52f0a56a7ccc86b145f207a57f40116 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 29 Mar 2017 00:19:24 -0500 Subject: target: Fix ALUA transition state race between multiple initiators Multiple threads could be writing to alua_access_state at the same time, or there could be multiple STPGs in flight (different initiators sending them or one initiator sending them to different ports), or a combo of both and the core_alua_do_transition_tg_pt calls will race with each other. Because from the last patches we no longer delay running core_alua_do_transition_tg_pt_work, there does not seem to be any point in running that in a workqueue. And, we always wait for it to complete one way or another, so we can sleep in this code path. So, this patch made over target-pending just adds a mutex and does the work core_alua_do_transition_tg_pt_work was doing in core_alua_do_transition_tg_pt. There is also no need to use an atomic for the tg_pt_gp_alua_access_state. In core_alua_do_transition_tg_pt we will test and set it under the transition mutex. And, it is a int/32 bits so in the other places where it is read, we will never see it partially updated. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_alua.c | 136 ++++++++++++---------------------- drivers/target/target_core_configfs.c | 2 +- include/target/target_core_base.h | 8 +- 3 files changed, 51 insertions(+), 95 deletions(-) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index fd7c16a7ca6e..fc4a9c303d55 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -197,8 +197,7 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd) /* * Set the ASYMMETRIC ACCESS State */ - buf[off++] |= (atomic_read( - &tg_pt_gp->tg_pt_gp_alua_access_state) & 0xff); + buf[off++] |= tg_pt_gp->tg_pt_gp_alua_access_state & 0xff; /* * Set supported ASYMMETRIC ACCESS State bits */ @@ -710,7 +709,7 @@ target_alua_state_check(struct se_cmd *cmd) spin_lock(&lun->lun_tg_pt_gp_lock); tg_pt_gp = lun->lun_tg_pt_gp; - out_alua_state = atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state); + out_alua_state = tg_pt_gp->tg_pt_gp_alua_access_state; nonop_delay_msecs = tg_pt_gp->tg_pt_gp_nonop_delay_msecs; // XXX: keeps using tg_pt_gp witout reference after unlock @@ -911,7 +910,7 @@ static int core_alua_write_tpg_metadata( } /* - * Called with tg_pt_gp->tg_pt_gp_md_mutex held + * Called with tg_pt_gp->tg_pt_gp_transition_mutex held */ static int core_alua_update_tpg_primary_metadata( struct t10_alua_tg_pt_gp *tg_pt_gp) @@ -934,7 +933,7 @@ static int core_alua_update_tpg_primary_metadata( "alua_access_state=0x%02x\n" "alua_access_status=0x%02x\n", tg_pt_gp->tg_pt_gp_id, - tg_pt_gp->tg_pt_gp_alua_pending_state, + tg_pt_gp->tg_pt_gp_alua_access_state, tg_pt_gp->tg_pt_gp_alua_access_status); snprintf(path, ALUA_METADATA_PATH_LEN, @@ -1013,93 +1012,41 @@ static void core_alua_queue_state_change_ua(struct t10_alua_tg_pt_gp *tg_pt_gp) spin_unlock(&tg_pt_gp->tg_pt_gp_lock); } -static void core_alua_do_transition_tg_pt_work(struct work_struct *work) -{ - struct t10_alua_tg_pt_gp *tg_pt_gp = container_of(work, - struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work); - struct se_device *dev = tg_pt_gp->tg_pt_gp_dev; - bool explicit = (tg_pt_gp->tg_pt_gp_alua_access_status == - ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG); - - /* - * Update the ALUA metadata buf that has been allocated in - * core_alua_do_port_transition(), this metadata will be written - * to struct file. - * - * Note that there is the case where we do not want to update the - * metadata when the saved metadata is being parsed in userspace - * when setting the existing port access state and access status. - * - * Also note that the failure to write out the ALUA metadata to - * struct file does NOT affect the actual ALUA transition. - */ - if (tg_pt_gp->tg_pt_gp_write_metadata) { - mutex_lock(&tg_pt_gp->tg_pt_gp_md_mutex); - core_alua_update_tpg_primary_metadata(tg_pt_gp); - mutex_unlock(&tg_pt_gp->tg_pt_gp_md_mutex); - } - /* - * Set the current primary ALUA access state to the requested new state - */ - atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state, - tg_pt_gp->tg_pt_gp_alua_pending_state); - - pr_debug("Successful %s ALUA transition TG PT Group: %s ID: %hu" - " from primary access state %s to %s\n", (explicit) ? "explicit" : - "implicit", config_item_name(&tg_pt_gp->tg_pt_gp_group.cg_item), - tg_pt_gp->tg_pt_gp_id, - core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_previous_state), - core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_pending_state)); - - core_alua_queue_state_change_ua(tg_pt_gp); - - spin_lock(&dev->t10_alua.tg_pt_gps_lock); - atomic_dec(&tg_pt_gp->tg_pt_gp_ref_cnt); - spin_unlock(&dev->t10_alua.tg_pt_gps_lock); - - if (tg_pt_gp->tg_pt_gp_transition_complete) - complete(tg_pt_gp->tg_pt_gp_transition_complete); -} - static int core_alua_do_transition_tg_pt( struct t10_alua_tg_pt_gp *tg_pt_gp, int new_state, int explicit) { - struct se_device *dev = tg_pt_gp->tg_pt_gp_dev; - DECLARE_COMPLETION_ONSTACK(wait); + int prev_state; + mutex_lock(&tg_pt_gp->tg_pt_gp_transition_mutex); /* Nothing to be done here */ - if (atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == new_state) + if (tg_pt_gp->tg_pt_gp_alua_access_state == new_state) { + mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex); return 0; + } - if (explicit && new_state == ALUA_ACCESS_STATE_TRANSITION) + if (explicit && new_state == ALUA_ACCESS_STATE_TRANSITION) { + mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex); return -EAGAIN; - - /* - * Flush any pending transitions - */ - if (!explicit) - flush_work(&tg_pt_gp->tg_pt_gp_transition_work); + } /* * Save the old primary ALUA access state, and set the current state * to ALUA_ACCESS_STATE_TRANSITION. */ - atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state, - ALUA_ACCESS_STATE_TRANSITION); + prev_state = tg_pt_gp->tg_pt_gp_alua_access_state; + tg_pt_gp->tg_pt_gp_alua_access_state = ALUA_ACCESS_STATE_TRANSITION; tg_pt_gp->tg_pt_gp_alua_access_status = (explicit) ? ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG : ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA; core_alua_queue_state_change_ua(tg_pt_gp); - if (new_state == ALUA_ACCESS_STATE_TRANSITION) + if (new_state == ALUA_ACCESS_STATE_TRANSITION) { + mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex); return 0; - - tg_pt_gp->tg_pt_gp_alua_previous_state = - atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state); - tg_pt_gp->tg_pt_gp_alua_pending_state = new_state; + } /* * Check for the optional ALUA primary state transition delay @@ -1108,19 +1055,36 @@ static int core_alua_do_transition_tg_pt( msleep_interruptible(tg_pt_gp->tg_pt_gp_trans_delay_msecs); /* - * Take a reference for workqueue item + * Set the current primary ALUA access state to the requested new state */ - spin_lock(&dev->t10_alua.tg_pt_gps_lock); - atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt); - spin_unlock(&dev->t10_alua.tg_pt_gps_lock); + tg_pt_gp->tg_pt_gp_alua_access_state = new_state; - schedule_work(&tg_pt_gp->tg_pt_gp_transition_work); - if (explicit) { - tg_pt_gp->tg_pt_gp_transition_complete = &wait; - wait_for_completion(&wait); - tg_pt_gp->tg_pt_gp_transition_complete = NULL; + /* + * Update the ALUA metadata buf that has been allocated in + * core_alua_do_port_transition(), this metadata will be written + * to struct file. + * + * Note that there is the case where we do not want to update the + * metadata when the saved metadata is being parsed in userspace + * when setting the existing port access state and access status. + * + * Also note that the failure to write out the ALUA metadata to + * struct file does NOT affect the actual ALUA transition. + */ + if (tg_pt_gp->tg_pt_gp_write_metadata) { + core_alua_update_tpg_primary_metadata(tg_pt_gp); } + pr_debug("Successful %s ALUA transition TG PT Group: %s ID: %hu" + " from primary access state %s to %s\n", (explicit) ? "explicit" : + "implicit", config_item_name(&tg_pt_gp->tg_pt_gp_group.cg_item), + tg_pt_gp->tg_pt_gp_id, + core_alua_dump_state(prev_state), + core_alua_dump_state(new_state)); + + core_alua_queue_state_change_ua(tg_pt_gp); + + mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex); return 0; } @@ -1685,14 +1649,12 @@ struct t10_alua_tg_pt_gp *core_alua_allocate_tg_pt_gp(struct se_device *dev, } INIT_LIST_HEAD(&tg_pt_gp->tg_pt_gp_list); INIT_LIST_HEAD(&tg_pt_gp->tg_pt_gp_lun_list); - mutex_init(&tg_pt_gp->tg_pt_gp_md_mutex); + mutex_init(&tg_pt_gp->tg_pt_gp_transition_mutex); spin_lock_init(&tg_pt_gp->tg_pt_gp_lock); atomic_set(&tg_pt_gp->tg_pt_gp_ref_cnt, 0); - INIT_WORK(&tg_pt_gp->tg_pt_gp_transition_work, - core_alua_do_transition_tg_pt_work); tg_pt_gp->tg_pt_gp_dev = dev; - atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state, - ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED); + tg_pt_gp->tg_pt_gp_alua_access_state = + ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED; /* * Enable both explicit and implicit ALUA support by default */ @@ -1797,8 +1759,6 @@ void core_alua_free_tg_pt_gp( dev->t10_alua.alua_tg_pt_gps_counter--; spin_unlock(&dev->t10_alua.tg_pt_gps_lock); - flush_work(&tg_pt_gp->tg_pt_gp_transition_work); - /* * Allow a struct t10_alua_tg_pt_gp_member * referenced by * core_alua_get_tg_pt_gp_by_name() in @@ -1938,8 +1898,8 @@ ssize_t core_alua_show_tg_pt_gp_info(struct se_lun *lun, char *page) "Primary Access Status: %s\nTG Port Secondary Access" " State: %s\nTG Port Secondary Access Status: %s\n", config_item_name(tg_pt_ci), tg_pt_gp->tg_pt_gp_id, - core_alua_dump_state(atomic_read( - &tg_pt_gp->tg_pt_gp_alua_access_state)), + core_alua_dump_state( + tg_pt_gp->tg_pt_gp_alua_access_state), core_alua_dump_status( tg_pt_gp->tg_pt_gp_alua_access_status), atomic_read(&lun->lun_tg_pt_secondary_offline) ? diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 38b5025e4c7a..70657fd56440 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -2392,7 +2392,7 @@ static ssize_t target_tg_pt_gp_alua_access_state_show(struct config_item *item, char *page) { return sprintf(page, "%d\n", - atomic_read(&to_tg_pt_gp(item)->tg_pt_gp_alua_access_state)); + to_tg_pt_gp(item)->tg_pt_gp_alua_access_state); } static ssize_t target_tg_pt_gp_alua_access_state_store(struct config_item *item, diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 730ed3055336..ccfad0e9c2cd 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -280,8 +280,6 @@ struct t10_alua_tg_pt_gp { u16 tg_pt_gp_id; int tg_pt_gp_valid_id; int tg_pt_gp_alua_supported_states; - int tg_pt_gp_alua_pending_state; - int tg_pt_gp_alua_previous_state; int tg_pt_gp_alua_access_status; int tg_pt_gp_alua_access_type; int tg_pt_gp_nonop_delay_msecs; @@ -290,18 +288,16 @@ struct t10_alua_tg_pt_gp { int tg_pt_gp_pref; int tg_pt_gp_write_metadata; u32 tg_pt_gp_members; - atomic_t tg_pt_gp_alua_access_state; + int tg_pt_gp_alua_access_state; atomic_t tg_pt_gp_ref_cnt; spinlock_t tg_pt_gp_lock; - struct mutex tg_pt_gp_md_mutex; + struct mutex tg_pt_gp_transition_mutex; struct se_device *tg_pt_gp_dev; struct config_group tg_pt_gp_group; struct list_head tg_pt_gp_list; struct list_head tg_pt_gp_lun_list; struct se_lun *tg_pt_gp_alua_lun; struct se_node_acl *tg_pt_gp_alua_nacl; - struct work_struct tg_pt_gp_transition_work; - struct completion *tg_pt_gp_transition_complete; }; struct t10_vpd { -- cgit v1.2.3 From 29f72ce3e4d18066ec75c79c857bee0618a3504b Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 30 Mar 2017 13:17:14 +0200 Subject: x86/mce/AMD: Give a name to MCA bank 3 when accessed with legacy MSRs MCA bank 3 is reserved on systems pre-Fam17h, so it didn't have a name. However, MCA bank 3 is defined on Fam17h systems and can be accessed using legacy MSRs. Without a name we get a stack trace on Fam17h systems when trying to register sysfs files for bank 3 on kernels that don't recognize Scalable MCA. Call MCA bank 3 "decode_unit" since this is what it represents on Fam17h. This will allow kernels without SMCA support to see this bank on Fam17h+ and prevent the stack trace. This will not affect older systems since this bank is reserved on them, i.e. it'll be ignored. Tested on AMD Fam15h and Fam17h systems. WARNING: CPU: 26 PID: 1 at lib/kobject.c:210 kobject_add_internal kobject: (ffff88085bb256c0): attempted to be registered with empty name! ... Call Trace: kobject_add_internal kobject_add kobject_create_and_add threshold_create_device threshold_init_device Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1490102285-3659-1-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 524cc5780a77..6e4a047e4b68 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -60,7 +60,7 @@ static const char * const th_names[] = { "load_store", "insn_fetch", "combined_unit", - "", + "decode_unit", "northbridge", "execution_unit", }; -- cgit v1.2.3 From 6b1cc946ddfcfc17d66c7d02eafa14deeb183437 Mon Sep 17 00:00:00 2001 From: Zhengyi Shen Date: Wed, 29 Mar 2017 15:00:20 +0800 Subject: x86/boot: Include missing header file Sparse complains about missing forward declarations: arch/x86/boot/compressed/error.c:8:6: warning: symbol 'warn' was not declared. Should it be static? arch/x86/boot/compressed/error.c:15:6: warning: symbol 'error' was not declared. Should it be static? Include the missing header file. Signed-off-by: Zhengyi Shen Acked-by: Kess Cook Link: http://lkml.kernel.org/r/1490770820-24472-1-git-send-email-shenzhengyi@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/boot/compressed/error.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/boot/compressed/error.c b/arch/x86/boot/compressed/error.c index 6248740b68b5..31922023de49 100644 --- a/arch/x86/boot/compressed/error.c +++ b/arch/x86/boot/compressed/error.c @@ -4,6 +4,7 @@ * memcpy() and memmove() are defined for the compressed boot environment. */ #include "misc.h" +#include "error.h" void warn(char *m) { -- cgit v1.2.3 From 2f726aec19a9d2c63bec9a8a53a3910ffdcd09f8 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Fri, 31 Mar 2017 10:31:40 +0800 Subject: ALSA: hda - fix a problem for lineout on a Dell AIO machine On this Dell AIO machine, the lineout jack does not work. We found the pin 0x1a is assigned to lineout on this machine, and in the past, we applied ALC298_FIXUP_DELL1_MIC_NO_PRESENCE to fix the heaset-set mic problem for this machine, this fixup will redefine the pin 0x1a to headphone-mic, as a result the lineout doesn't work anymore. After consulting with Dell, they told us this machine doesn't support microphone via headset jack, so we add a new fixup which only defines the pin 0x18 as the headset-mic. [rearranged the fixup insertion position by tiwai in order to make the merge with other branches easier -- tiwai] Fixes: 59ec4b57bcae ("ALSA: hda - Fix headset mic detection problem for two dell machines") Cc: Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 7f989898cbd9..299835d1fbaa 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4858,6 +4858,7 @@ enum { ALC292_FIXUP_DISABLE_AAMIX, ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK, ALC298_FIXUP_DELL1_MIC_NO_PRESENCE, + ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE, ALC275_FIXUP_DELL_XPS, ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE, ALC293_FIXUP_LENOVO_SPK_NOISE, @@ -5470,6 +5471,15 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MODE }, + [ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x18, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE + }, [ALC275_FIXUP_DELL_XPS] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { @@ -5542,7 +5552,7 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc298_fixup_speaker_volume, .chained = true, - .chain_id = ALC298_FIXUP_DELL1_MIC_NO_PRESENCE, + .chain_id = ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE, }, [ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER] = { .type = HDA_FIXUP_PINS, -- cgit v1.2.3 From 6c9a8cdad48a04795dbc35ac3370afa3180045ae Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Mar 2017 21:21:43 +0100 Subject: drm/i915: Avoid lock dropping between rescheduling Unlocking is dangerous. In this case we combine an early update to the out-of-queue request, because we know that it will be inserted into the correct FIFO priority-ordered slot when it becomes ready in the future. However, given sufficient enthusiasm, it may become ready as we are continuing to reschedule, and so may gazump the FIFO if we have since dropped its spinlock. The result is that it may be executed too early, before its dependencies. v2: Move all work into the second phase over the topological sort. This removes the shortcut on the out-of-rbtree request to ensure that we only adjust its priority after adjusting all of its dependencies. Fixes: 20311bd35060 ("drm/i915/scheduler: Execute requests in order of priorities") Testcase: igt/gem_exec_whisper Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: # v4.10+ Link: http://patchwork.freedesktop.org/patch/msgid/20170327202143.7972-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin (cherry picked from commit a79a524e9260d4ffaff88348615e70fb3d393692) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_lrc.c | 53 ++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 91555d4e9129..47517a02f0a4 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -670,15 +670,14 @@ static void execlists_submit_request(struct drm_i915_gem_request *request) static struct intel_engine_cs * pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked) { - struct intel_engine_cs *engine; + struct intel_engine_cs *engine = + container_of(pt, struct drm_i915_gem_request, priotree)->engine; + + GEM_BUG_ON(!locked); - engine = container_of(pt, - struct drm_i915_gem_request, - priotree)->engine; if (engine != locked) { - if (locked) - spin_unlock_irq(&locked->timeline->lock); - spin_lock_irq(&engine->timeline->lock); + spin_unlock(&locked->timeline->lock); + spin_lock(&engine->timeline->lock); } return engine; @@ -686,7 +685,7 @@ pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked) static void execlists_schedule(struct drm_i915_gem_request *request, int prio) { - struct intel_engine_cs *engine = NULL; + struct intel_engine_cs *engine; struct i915_dependency *dep, *p; struct i915_dependency stack; LIST_HEAD(dfs); @@ -720,26 +719,23 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) list_for_each_entry_safe(dep, p, &dfs, dfs_link) { struct i915_priotree *pt = dep->signaler; - list_for_each_entry(p, &pt->signalers_list, signal_link) + /* Within an engine, there can be no cycle, but we may + * refer to the same dependency chain multiple times + * (redundant dependencies are not eliminated) and across + * engines. + */ + list_for_each_entry(p, &pt->signalers_list, signal_link) { + GEM_BUG_ON(p->signaler->priority < pt->priority); if (prio > READ_ONCE(p->signaler->priority)) list_move_tail(&p->dfs_link, &dfs); + } list_safe_reset_next(dep, p, dfs_link); - if (!RB_EMPTY_NODE(&pt->node)) - continue; - - engine = pt_lock_engine(pt, engine); - - /* If it is not already in the rbtree, we can update the - * priority inplace and skip over it (and its dependencies) - * if it is referenced *again* as we descend the dfs. - */ - if (prio > pt->priority && RB_EMPTY_NODE(&pt->node)) { - pt->priority = prio; - list_del_init(&dep->dfs_link); - } } + engine = request->engine; + spin_lock_irq(&engine->timeline->lock); + /* Fifo and depth-first replacement ensure our deps execute before us */ list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { struct i915_priotree *pt = dep->signaler; @@ -751,16 +747,15 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) if (prio <= pt->priority) continue; - GEM_BUG_ON(RB_EMPTY_NODE(&pt->node)); - pt->priority = prio; - rb_erase(&pt->node, &engine->execlist_queue); - if (insert_request(pt, &engine->execlist_queue)) - engine->execlist_first = &pt->node; + if (!RB_EMPTY_NODE(&pt->node)) { + rb_erase(&pt->node, &engine->execlist_queue); + if (insert_request(pt, &engine->execlist_queue)) + engine->execlist_first = &pt->node; + } } - if (engine) - spin_unlock_irq(&engine->timeline->lock); + spin_unlock_irq(&engine->timeline->lock); /* XXX Do we need to preempt to make room for us and our deps? */ } -- cgit v1.2.3 From 729a0cd45c886a8d1ae0b3063b20d525fc729523 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Mon, 27 Mar 2017 17:41:02 +0800 Subject: drm/i915/gvt: adjust mem size for low resolution type From commit d1a513be1f0a ("drm/i915/gvt: add resolution definition for vGPU type"), small type has been restricted to small resolution, so not require larger high GM size any more. Change to smaller 384M for more VM creation with vGPU enabled which still perform reasonable workload. Fixes: d1a513be1f0a ("drm/i915/gvt: add resolution definition for vGPU type") Signed-off-by: Zhenyu Wang (cherry picked from commit bf39ec335eb8cc51b4e1c9303ef92b380d204bb1) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gvt/vgpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 41cfa5ccae84..d8d128625331 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -72,7 +72,7 @@ static struct { char *name; } vgpu_types[] = { /* Fixed vGPU type table */ - { MB_TO_BYTES(64), MB_TO_BYTES(512), 4, GVT_EDID_1024_768, "8" }, + { MB_TO_BYTES(64), MB_TO_BYTES(384), 4, GVT_EDID_1024_768, "8" }, { MB_TO_BYTES(128), MB_TO_BYTES(512), 4, GVT_EDID_1920_1200, "4" }, { MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, GVT_EDID_1920_1200, "2" }, { MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, GVT_EDID_1920_1200, "1" }, -- cgit v1.2.3 From 9ba2a6261de49588714a25f49db80bbe961b870a Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Fri, 24 Mar 2017 01:56:54 -0400 Subject: drm/i915/gvt: remove the redundant info NULL check The variable info is never NULL, which is checked by the caller. This patch removes the redundant info NULL check logic. Fixes: 695fbc08d80f ("drm/i915/gvt: replace the gvt_err with gvt_vgpu_err") Signed-off-by: Tina Zhang Signed-off-by: Zhenyu Wang (cherry picked from commit 865f03d42ed0c90c9faf3301775176834ba13eba) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gvt/kvmgt.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index d641214578a7..2d92119b488c 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1340,13 +1340,6 @@ static int kvmgt_guest_init(struct mdev_device *mdev) static bool kvmgt_guest_exit(struct kvmgt_guest_info *info) { - struct intel_vgpu *vgpu = info->vgpu; - - if (!info) { - gvt_vgpu_err("kvmgt_guest_info invalid\n"); - return false; - } - kvm_page_track_unregister_notifier(info->kvm, &info->track_node); kvm_put_kvm(info->kvm); kvmgt_protect_table_destroy(info); -- cgit v1.2.3 From 1383aeca92b72f4179420820c3a64dfb5909cc97 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 30 Mar 2017 09:53:41 +0100 Subject: drm/i915: Ironlake do_idle_maps w/a may be called w/o struct_mutex Since commit 1233e2db199d ("drm/i915: Move object backing storage manipulation to its own locking"), i915_gem_object_put_pages() and specifically the i915_gem_gtt_finish_pages() may be called from outside of the struct_mutex and so we can no longer pass I915_WAIT_LOCKED to i915_gem_wait_for_idle. Fixes: 1233e2db199d ("drm/i915: Move object backing storage manipulation to its own locking") Signed-off-by: Chris Wilson Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: # v4.10+ Link: http://patchwork.freedesktop.org/patch/msgid/20170330085341.20311-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen (cherry picked from commit 228ec87ccd040b620c467cd61d594bfaa4f8a12e) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 2801a4d56324..96e45a4d5441 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2704,7 +2704,7 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, struct i915_ggtt *ggtt = &dev_priv->ggtt; if (unlikely(ggtt->do_idle_maps)) { - if (i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED)) { + if (i915_gem_wait_for_idle(dev_priv, 0)) { DRM_ERROR("Failed to wait for idle; VT'd may hang.\n"); /* Wait a bit, in hopes it avoids the hang */ udelay(10); -- cgit v1.2.3 From ecf8e89917d600fe846ebda911a9e690c6babfd0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 30 Mar 2017 12:16:14 +0100 Subject: drm/i915: Use a dummy timeline name for a signaled fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Michał Winiarski pointed out that the debugging infrastructure (such as trace_dma_fence_release) likes to pretty print the timeline name, long after we have freed the timeline. Our timelines currently live as part of the GTT (due to the strict ordering we currently use through each) which belong to the context. We aim to free the context and release its hardware resources as soon as we able to (i.e. when the last fence/request using it has been signaled and retired). As the .get_timeline_name is purely a debug feature, rather than extending the lifetime of the context, or splitting it into many different release phases just to keep the name around, replace the timeline name with a constant after the fence has been signaled. This avoids the potential use-after-free. Reported-by: Krzysztof Olinski Fixes: 80b204bce8f2 ("drm/i915: Enable multiple timelines") Signed-off-by: Chris Wilson Cc: Michał Winiarski Cc: Joonas Lahtinen Cc: # v4.10+ Link: http://patchwork.freedesktop.org/patch/msgid/20170330111614.29757-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Reviewed-by: Michał Winiarski (cherry picked from commit 05506b5be081b728353f1612b05c8ff689772832) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_request.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index e7c3c0318ff6..da70bfe97ec5 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -37,6 +37,17 @@ static const char *i915_fence_get_driver_name(struct dma_fence *fence) static const char *i915_fence_get_timeline_name(struct dma_fence *fence) { + /* The timeline struct (as part of the ppgtt underneath a context) + * may be freed when the request is no longer in use by the GPU. + * We could extend the life of a context to beyond that of all + * fences, possibly keeping the hw resource around indefinitely, + * or we just give them a false name. Since + * dma_fence_ops.get_timeline_name is a debug feature, the occasional + * lie seems justifiable. + */ + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + return "signaled"; + return to_request(fence)->timeline->common->name; } -- cgit v1.2.3 From 42969893b45a7a1864192f5634a8d2626e546a7b Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Fri, 31 Mar 2017 12:05:32 +0100 Subject: irqchip/mips-gic: Fix Local compare interrupt Commit 4cfffcfa5106 ("irqchip/mips-gic: Fix local interrupts") added mapping of several local interrupts during initialisation of the gic driver. This associates virq numbers with these interrupts. Unfortunately, as not all of the interrupts are mapped in hardware order, when drivers subsequently request these interrupts they conflict with the mappings that have already been set up. For example, this manifests itself in the gic clocksource driver, which fails to probe with the message: clocksource: GIC: mask: 0xffffffffffffffff max_cycles: 0x7350c9738, max_idle_ns: 440795203769 ns GIC timer IRQ 25 setup failed: -22 This is because virq 25 (the correct IRQ number specified via device tree) was allocated to the PERFCTR interrupt (and 24 to the timer, 26 to the FDC). To fix this, map all of these local interrupts in the hardware order so as to associate their virq numbers with the correct hw interrupts. Fixes: 4cfffcfa5106 ("irqchip/mips-gic: Fix local interrupts") Acked-by: Ralf Baechle Signed-off-by: Matt Redfearn Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-mips-gic.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index 11d12bccc4e7..cd20df12d63d 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -991,8 +991,12 @@ static void __init gic_map_single_int(struct device_node *node, static void __init gic_map_interrupts(struct device_node *node) { + gic_map_single_int(node, GIC_LOCAL_INT_WD); + gic_map_single_int(node, GIC_LOCAL_INT_COMPARE); gic_map_single_int(node, GIC_LOCAL_INT_TIMER); gic_map_single_int(node, GIC_LOCAL_INT_PERFCTR); + gic_map_single_int(node, GIC_LOCAL_INT_SWINT0); + gic_map_single_int(node, GIC_LOCAL_INT_SWINT1); gic_map_single_int(node, GIC_LOCAL_INT_FDC); } -- cgit v1.2.3 From 7a0c5c5b834fb60764b494b0e39c239da3b0774b Mon Sep 17 00:00:00 2001 From: Dmitry Bilunov Date: Thu, 30 Mar 2017 18:14:26 +0300 Subject: dm raid: fix NULL pointer dereference for raid1 without bitmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 4257e08 ("dm raid: support to change bitmap region size") introduced a bitmap resize call during preresume phase. User can create a DM device with "raid" target configured as raid1 with no metadata devices to hold superblock/bitmap info. It can be achieved using the following sequence: truncate -s 32M /dev/shm/raid-test LOOP=$(losetup --show -f /dev/shm/raid-test) dmsetup create raid-test-linear0 --table "0 1024 linear $LOOP 0" dmsetup create raid-test-linear1 --table "0 1024 linear $LOOP 1024" dmsetup create raid-test --table "0 1024 raid raid1 1 2048 2 - /dev/mapper/raid-test-linear0 - /dev/mapper/raid-test-linear1" This results in the following crash: [ 4029.110216] device-mapper: raid: Ignoring chunk size parameter for RAID 1 [ 4029.110217] device-mapper: raid: Choosing default region size of 4MiB [ 4029.111349] md/raid1:mdX: active with 2 out of 2 mirrors [ 4029.114770] BUG: unable to handle kernel NULL pointer dereference at 0000000000000030 [ 4029.114802] IP: bitmap_resize+0x25/0x7c0 [md_mod] [ 4029.114816] PGD 0 … [ 4029.115059] Hardware name: Aquarius Pro P30 S85 BUY-866/B85M-E, BIOS 2304 05/25/2015 [ 4029.115079] task: ffff88015cc29a80 task.stack: ffffc90001a5c000 [ 4029.115097] RIP: 0010:bitmap_resize+0x25/0x7c0 [md_mod] [ 4029.115112] RSP: 0018:ffffc90001a5fb68 EFLAGS: 00010246 [ 4029.115127] RAX: 0000000000000005 RBX: 0000000000000000 RCX: 0000000000000000 [ 4029.115146] RDX: 0000000000000000 RSI: 0000000000000400 RDI: 0000000000000000 [ 4029.115166] RBP: ffffc90001a5fc28 R08: 0000000800000000 R09: 00000008ffffffff [ 4029.115185] R10: ffffea0005661600 R11: ffff88015cc29a80 R12: ffff88021231f058 [ 4029.115204] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 [ 4029.115223] FS: 00007fe73a6b4740(0000) GS:ffff88021ea80000(0000) knlGS:0000000000000000 [ 4029.115245] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 4029.115261] CR2: 0000000000000030 CR3: 0000000159a74000 CR4: 00000000001426e0 [ 4029.115281] Call Trace: [ 4029.115291] ? raid_iterate_devices+0x63/0x80 [dm_raid] [ 4029.115309] ? dm_table_all_devices_attribute.isra.23+0x41/0x70 [dm_mod] [ 4029.115329] ? dm_table_set_restrictions+0x225/0x2d0 [dm_mod] [ 4029.115346] raid_preresume+0x81/0x2e0 [dm_raid] [ 4029.115361] dm_table_resume_targets+0x47/0xe0 [dm_mod] [ 4029.115378] dm_resume+0xa8/0xd0 [dm_mod] [ 4029.115391] dev_suspend+0x123/0x250 [dm_mod] [ 4029.115405] ? table_load+0x350/0x350 [dm_mod] [ 4029.115419] ctl_ioctl+0x1c2/0x490 [dm_mod] [ 4029.115433] dm_ctl_ioctl+0xe/0x20 [dm_mod] [ 4029.115447] do_vfs_ioctl+0x8d/0x5a0 [ 4029.115459] ? ____fput+0x9/0x10 [ 4029.115470] ? task_work_run+0x79/0xa0 [ 4029.115481] SyS_ioctl+0x3c/0x70 [ 4029.115493] entry_SYSCALL_64_fastpath+0x13/0x94 The raid_preresume() function incorrectly assumes that the raid_set has a bitmap enabled if RT_FLAG_RS_BITMAP_LOADED is set. But RT_FLAG_RS_BITMAP_LOADED is getting set in __load_dirty_region_bitmap() even if there is no bitmap present (and bitmap_load() happily returns 0 even if a bitmap isn't present). So the only way forward in the near-term is to check if the bitmap is present by seeing if mddev->bitmap is not NULL after bitmap_load() has been called. By doing so the above NULL pointer is avoided. Fixes: 4257e08 ("dm raid: support to change bitmap region size") Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Dmitry Bilunov Signed-off-by: Andrey Smetanin Acked-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- drivers/md/dm-raid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index f8564d63982f..1e217ba84d09 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3726,7 +3726,7 @@ static int raid_preresume(struct dm_target *ti) return r; /* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) */ - if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && + if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap && mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)) { r = bitmap_resize(mddev->bitmap, mddev->dev_sectors, to_bytes(rs->requested_bitmap_chunk_sectors), 0); -- cgit v1.2.3 From a6040bc610554c66088fda3608ae5d6307c548e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 20 Mar 2017 10:05:38 +0100 Subject: serial: mxs-auart: Fix baudrate calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The reference manual for the i.MX28 recommends to calculate the divisor as divisor = (UARTCLK * 32) / baud rate, rounded to the nearest integer , so let's do this. For a typical setup of UARTCLK = 24 MHz and baud rate = 115200 this changes the divisor from 6666 to 6667 and so the actual baud rate improves from 115211.521 Bd (error ≅ 0.01 %) to 115194.240 Bd (error ≅ 0.005 %). Signed-off-by: Uwe Kleine-König Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/mxs-auart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c index 6989b227d134..be94246b6fcc 100644 --- a/drivers/tty/serial/mxs-auart.c +++ b/drivers/tty/serial/mxs-auart.c @@ -1088,7 +1088,7 @@ static void mxs_auart_settermios(struct uart_port *u, AUART_LINECTRL_BAUD_DIV_MAX); baud_max = u->uartclk * 32 / AUART_LINECTRL_BAUD_DIV_MIN; baud = uart_get_baud_rate(u, termios, old, baud_min, baud_max); - div = u->uartclk * 32 / baud; + div = DIV_ROUND_CLOSEST(u->uartclk * 32, baud); } ctrl |= AUART_LINECTRL_BAUD_DIVFRAC(div & 0x3F); -- cgit v1.2.3 From 31ca2c63fdc0aee725cbd4f207c1256f5deaabde Mon Sep 17 00:00:00 2001 From: Richard Genoud Date: Mon, 20 Mar 2017 11:52:41 +0100 Subject: tty/serial: atmel: fix race condition (TX+DMA) If uart_flush_buffer() is called between atmel_tx_dma() and atmel_complete_tx_dma(), the circular buffer has been cleared, but not atmel_port->tx_len. That leads to a circular buffer overflow (dumping (UART_XMIT_SIZE - atmel_port->tx_len) bytes). Tested-by: Nicolas Ferre Signed-off-by: Richard Genoud Cc: stable # 3.12+ Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index dcebb28ffbc4..5f644515ed33 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -1951,6 +1951,11 @@ static void atmel_flush_buffer(struct uart_port *port) atmel_uart_writel(port, ATMEL_PDC_TCR, 0); atmel_port->pdc_tx.ofs = 0; } + /* + * in uart_flush_buffer(), the xmit circular buffer has just + * been cleared, so we have to reset tx_len accordingly. + */ + atmel_port->tx_len = 0; } /* -- cgit v1.2.3 From 497e1e16f45c70574dc9922c7f75c642c2162119 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Mon, 20 Mar 2017 16:38:57 +0100 Subject: tty/serial: atmel: fix TX path in atmel_console_write() A side effect of 89d8232411a8 ("tty/serial: atmel_serial: BUG: stop DMA from transmitting in stop_tx") is that the console can be called with TX path disabled. Then the system would hang trying to push charecters out in atmel_console_putchar(). Signed-off-by: Nicolas Ferre Fixes: 89d8232411a8 ("tty/serial: atmel_serial: BUG: stop DMA from transmitting in stop_tx") Cc: stable #4.4+ Acked-by: Richard Genoud Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 5f644515ed33..1f50a83ef958 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -2488,6 +2488,9 @@ static void atmel_console_write(struct console *co, const char *s, u_int count) pdc_tx = atmel_uart_readl(port, ATMEL_PDC_PTSR) & ATMEL_PDC_TXTEN; atmel_uart_writel(port, ATMEL_PDC_PTCR, ATMEL_PDC_TXTDIS); + /* Make sure that tx path is actually able to send characters */ + atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXEN); + uart_console_write(port, s, count, atmel_console_putchar); /* -- cgit v1.2.3 From 49e1590c2ead870f4ae5568816241c4cb9bc1606 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Mon, 27 Mar 2017 19:39:10 -0400 Subject: serial: 8250_EXAR: fix duplicate Kconfig text and add missing help text In commit d0aeaa83f0b0f7a92615bbdd6b1f96812f7dcfd2 ("serial: exar: split out the exar code from 8250_pci") the exar driver got its own Kconfig. However the text for the new option was never changed from the original 8250_PCI text, and hence it appears confusing when you get asked the same question twice: 8250/16550 PCI device support (SERIAL_8250_PCI) [Y/n/m/?] (NEW) 8250/16550 PCI device support (SERIAL_8250_EXAR) [Y/n/m] (NEW) Adding to the confusion, is that there is no help text for this new option to indicate it is specific to a certain family of cards. Fix both issues at the same time, as well as the space vs. tab issues introduced in the same commit. Fixes: d0aeaa83f0b0 ("serial: exar: split out the exar code from 8250_pci") Cc: Jiri Slaby Cc: linux-serial@vger.kernel.org Signed-off-by: Paul Gortmaker Acked-by: Sudip Mukherjee Reviewed-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/Kconfig | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig index a65fb8197aec..0e3f529d50e9 100644 --- a/drivers/tty/serial/8250/Kconfig +++ b/drivers/tty/serial/8250/Kconfig @@ -128,9 +128,13 @@ config SERIAL_8250_PCI by the parport_serial driver, enabled with CONFIG_PARPORT_SERIAL. config SERIAL_8250_EXAR - tristate "8250/16550 PCI device support" - depends on SERIAL_8250_PCI + tristate "8250/16550 Exar/Commtech PCI/PCIe device support" + depends on SERIAL_8250_PCI default SERIAL_8250 + help + This builds support for XR17C1xx, XR17V3xx and some Commtech + 422x PCIe serial cards that are not covered by the more generic + SERIAL_8250_PCI option. config SERIAL_8250_HP300 tristate -- cgit v1.2.3 From 0e3d3e5df07dcf8a50d96e0ecd6ab9a888f55dfc Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 30 Mar 2017 13:49:03 -0400 Subject: NFSv4.1 fix infinite loop on IO BAD_STATEID error Commit 63d63cbf5e03 "NFSv4.1: Don't recheck delegations that have already been checked" introduced a regression where when a client received BAD_STATEID error it would not send any TEST_STATEID and instead go into an infinite loop of resending the IO that caused the BAD_STATEID. Fixes: 63d63cbf5e03 ("NFSv4.1: Don't recheck delegations that have already been checked") Signed-off-by: Olga Kornievskaia Cc: stable@vger.kernel.org # 4.9+ Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c780d98035cc..201ca3f2c4ba 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2442,17 +2442,14 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state) } nfs4_stateid_copy(&stateid, &delegation->stateid); - if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) { + if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) || + !test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED, + &delegation->flags)) { rcu_read_unlock(); nfs_finish_clear_delegation_stateid(state, &stateid); return; } - if (!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags)) { - rcu_read_unlock(); - return; - } - cred = get_rpccred(delegation->cred); rcu_read_unlock(); status = nfs41_test_and_free_expired_stateid(server, &stateid, cred); -- cgit v1.2.3 From f17f8a14e82cdf34cd6473e3644f3c672b3884f6 Mon Sep 17 00:00:00 2001 From: Tigran Mkrtchyan Date: Thu, 30 Mar 2017 17:31:18 +0200 Subject: nfs: flexfiles: fix kernel OOPS if MDS returns unsupported DS type this fix aims to fix dereferencing of a mirror in an error state when MDS returns unsupported DS type (IOW, not v3), which causes the following oops: [ 220.370709] BUG: unable to handle kernel NULL pointer dereference at 0000000000000065 [ 220.370842] IP: ff_layout_mirror_valid+0x2d/0x110 [nfs_layout_flexfiles] [ 220.370920] PGD 0 [ 220.370972] Oops: 0000 [#1] SMP [ 220.371013] Modules linked in: nfnetlink_queue nfnetlink_log bluetooth nfs_layout_flexfiles rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache nf_conntrack_netbios_ns nf_conntrack_broadcast xt_CT ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 xt_conntrack ip_set nfnetlink ebtable_nat ebtable_broute bridge stp llc ip6table_raw ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security iptable_raw iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack libcrc32c iptable_mangle iptable_security ebtable_filter ebtables ip6table_filter ip6_tables binfmt_misc intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel btrfs kvm arc4 snd_hda_codec_hdmi iwldvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel intel_cstate mac80211 xor uvcvideo [ 220.371814] videobuf2_vmalloc videobuf2_memops snd_hda_codec_idt mei_wdt videobuf2_v4l2 snd_hda_codec_generic iTCO_wdt ppdev videobuf2_core iTCO_vendor_support dell_rbtn dell_wmi iwlwifi sparse_keymap dell_laptop dell_smbios snd_hda_intel dcdbas videodev snd_hda_codec dell_smm_hwmon snd_hda_core media cfg80211 intel_uncore snd_hwdep raid6_pq snd_seq intel_rapl_perf snd_seq_device joydev i2c_i801 rfkill lpc_ich snd_pcm parport_pc mei_me parport snd_timer dell_smo8800 mei snd shpchp soundcore tpm_tis tpm_tis_core tpm nfsd auth_rpcgss nfs_acl lockd grace sunrpc i915 nouveau mxm_wmi ttm i2c_algo_bit drm_kms_helper crc32c_intel e1000e drm sdhci_pci firewire_ohci sdhci serio_raw mmc_core firewire_core ptp crc_itu_t pps_core wmi fjes video [ 220.372568] CPU: 7 PID: 4988 Comm: cat Not tainted 4.10.5-200.fc25.x86_64 #1 [ 220.372647] Hardware name: Dell Inc. Latitude E6520/0J4TFW, BIOS A06 07/11/2011 [ 220.372729] task: ffff94791f6ea580 task.stack: ffffb72b88c0c000 [ 220.372802] RIP: 0010:ff_layout_mirror_valid+0x2d/0x110 [nfs_layout_flexfiles] [ 220.372883] RSP: 0018:ffffb72b88c0f970 EFLAGS: 00010246 [ 220.372945] RAX: 0000000000000000 RBX: ffff9479015ca600 RCX: ffffffffffffffed [ 220.373025] RDX: ffffffffffffffed RSI: ffff9479753dc980 RDI: 0000000000000000 [ 220.373104] RBP: ffffb72b88c0f988 R08: 000000000001c980 R09: ffffffffc0ea6112 [ 220.373184] R10: ffffef17477d9640 R11: ffff9479753dd6c0 R12: ffff9479211c7440 [ 220.373264] R13: ffff9478f45b7790 R14: 0000000000000001 R15: ffff9479015ca600 [ 220.373345] FS: 00007f555fa3e700(0000) GS:ffff9479753c0000(0000) knlGS:0000000000000000 [ 220.373435] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 220.373506] CR2: 0000000000000065 CR3: 0000000196044000 CR4: 00000000000406e0 [ 220.373586] Call Trace: [ 220.373627] nfs4_ff_layout_prepare_ds+0x5e/0x200 [nfs_layout_flexfiles] [ 220.373708] ff_layout_pg_init_read+0x81/0x160 [nfs_layout_flexfiles] [ 220.373806] __nfs_pageio_add_request+0x11f/0x4a0 [nfs] [ 220.373886] ? nfs_create_request.part.14+0x37/0x330 [nfs] [ 220.373967] nfs_pageio_add_request+0xb2/0x260 [nfs] [ 220.374042] readpage_async_filler+0xaf/0x280 [nfs] [ 220.374103] read_cache_pages+0xef/0x1b0 [ 220.374166] ? nfs_read_completion+0x210/0x210 [nfs] [ 220.374239] nfs_readpages+0x129/0x200 [nfs] [ 220.374293] __do_page_cache_readahead+0x1d0/0x2f0 [ 220.374352] ondemand_readahead+0x17d/0x2a0 [ 220.374403] page_cache_sync_readahead+0x2e/0x50 [ 220.374460] generic_file_read_iter+0x6c8/0x950 [ 220.374532] ? nfs_mapping_need_revalidate_inode+0x17/0x40 [nfs] [ 220.374617] nfs_file_read+0x6e/0xc0 [nfs] [ 220.374670] __vfs_read+0xe2/0x150 [ 220.374715] vfs_read+0x96/0x130 [ 220.374758] SyS_read+0x55/0xc0 [ 220.374801] entry_SYSCALL_64_fastpath+0x1a/0xa9 [ 220.374856] RIP: 0033:0x7f555f570bd0 [ 220.374900] RSP: 002b:00007ffeb73e1b38 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [ 220.374986] RAX: ffffffffffffffda RBX: 00007f555f839ae0 RCX: 00007f555f570bd0 [ 220.375066] RDX: 0000000000020000 RSI: 00007f555fa41000 RDI: 0000000000000003 [ 220.375145] RBP: 0000000000021010 R08: ffffffffffffffff R09: 0000000000000000 [ 220.375226] R10: 00007f555fa40010 R11: 0000000000000246 R12: 0000000000022000 [ 220.375305] R13: 0000000000021010 R14: 0000000000001000 R15: 0000000000002710 [ 220.375386] Code: 66 66 90 55 48 89 e5 41 54 53 49 89 fc 48 83 ec 08 48 85 f6 74 2e 48 8b 4e 30 48 89 f3 48 81 f9 00 f0 ff ff 77 1e 48 85 c9 74 15 <48> 83 79 78 00 b8 01 00 00 00 74 2c 48 83 c4 08 5b 41 5c 5d c3 [ 220.375653] RIP: ff_layout_mirror_valid+0x2d/0x110 [nfs_layout_flexfiles] RSP: ffffb72b88c0f970 [ 220.375748] CR2: 0000000000000065 [ 220.403538] ---[ end trace bcdca752211b7da9 ]--- Signed-off-by: Tigran Mkrtchyan Signed-off-by: Anna Schumaker --- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 85fde93dff77..457cfeb1d5c1 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -208,6 +208,10 @@ static bool ff_layout_mirror_valid(struct pnfs_layout_segment *lseg, } else goto outerr; } + + if (IS_ERR(mirror->mirror_ds)) + goto outerr; + if (mirror->mirror_ds->ds == NULL) { struct nfs4_deviceid_node *devid; devid = &mirror->mirror_ds->id_node; -- cgit v1.2.3 From 86e3e83b443669dd2bcc5c8a83b23e3aa0694c0d Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 31 Mar 2017 12:32:45 -0700 Subject: dm verity fec: fix bufio leaks Buffers read through dm_bufio_read() were not released in all code paths. Fixes: a739ff3f543a ("dm verity: add support for forward error correction") Cc: stable@vger.kernel.org # v4.5+ Signed-off-by: Sami Tolvanen Signed-off-by: Mike Snitzer --- drivers/md/dm-verity-fec.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index c3cc04d89524..78f36012eaca 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -146,8 +146,6 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio, block = fec_buffer_rs_block(v, fio, n, i); res = fec_decode_rs8(v, fio, block, &par[offset], neras); if (res < 0) { - dm_bufio_release(buf); - r = res; goto error; } @@ -172,6 +170,8 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio, done: r = corrected; error: + dm_bufio_release(buf); + if (r < 0 && neras) DMERR_LIMIT("%s: FEC %llu: failed to correct: %d", v->data_dev->name, (unsigned long long)rsb, r); @@ -269,7 +269,7 @@ static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io, &is_zero) == 0) { /* skip known zero blocks entirely */ if (is_zero) - continue; + goto done; /* * skip if we have already found the theoretical -- cgit v1.2.3 From fdad4e7a876a2cb3d2c1f04e5418c324e79fffef Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 1 Apr 2017 00:45:52 +0200 Subject: ACPI / scan: Prefer devices without _HID for _ADR matching Commit c2a6bbaf0c5f (ACPI / scan: Prefer devices without _HID/_CID for _ADR matching) added a list_empty(&adev->pnp.ids) check to find_child_checks() so as to catch situations in which the ACPI core attempts to decode _ADR for a device having a _HID too which is strictly against the spec. However, it overlooked the fact that the adev->pnp.ids list for the devices taken into account by find_child_checks() may contain device IDs set internally by the kernel, like "LNXVIDEO" (thanks to Zhang Rui for that realization), and it broke the enumeration of those devices as a result. To unbreak it, replace the overly coarse grained list_empty() check with a much more precise check against the pnp.type.platform_id flag which is only set for devices having a _HID (that's how it should be done from the start, as having both _ADR and _CID is actually permitted). Fixes: c2a6bbaf0c5f (ACPI / scan: Prefer devices without _HID/_CID for _ADR matching) Link: https://bugzilla.kernel.org/show_bug.cgi?id=194889 Reported-and-tested-by: Mike Tested-by: Hans de Goede Cc: 4.10+ # 4.10+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/glue.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index fb19e1cdb641..edc8663b5db3 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -99,13 +99,13 @@ static int find_child_checks(struct acpi_device *adev, bool check_children) return -ENODEV; /* - * If the device has a _HID (or _CID) returning a valid ACPI/PNP - * device ID, it is better to make it look less attractive here, so that - * the other device with the same _ADR value (that may not have a valid - * device ID) can be matched going forward. [This means a second spec - * violation in a row, so whatever we do here is best effort anyway.] + * If the device has a _HID returning a valid ACPI/PNP device ID, it is + * better to make it look less attractive here, so that the other device + * with the same _ADR value (that may not have a valid device ID) can be + * matched going forward. [This means a second spec violation in a row, + * so whatever we do here is best effort anyway.] */ - return sta_present && list_empty(&adev->pnp.ids) ? + return sta_present && !adev->pnp.type.platform_id ? FIND_CHILD_MAX_SCORE : FIND_CHILD_MIN_SCORE; } -- cgit v1.2.3 From e640cc306388b6f9dc8109d5c5d0550d7e69e5f7 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 31 Mar 2017 15:58:40 -0700 Subject: xtensa: fix stack dump output Use %pB in pr_cont format string instead of calling print_symbol separately. It turns [ 19.166249] Call Trace: [ 19.167265] [] [ 19.167843] __warn+0x92/0xa0 [ 19.169656] [] [ 19.170059] warn_slowpath_fmt+0x3c/0x40 [ 19.171934] [] back into [ 18.123240] Call Trace: [ 18.125039] [] __warn+0x92/0xa0 [ 18.126961] [] warn_slowpath_fmt+0x3c/0x40 Signed-off-by: Max Filippov --- arch/xtensa/kernel/traps.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index c82c43bff296..bae697a06a98 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -483,10 +483,8 @@ void show_regs(struct pt_regs * regs) static int show_trace_cb(struct stackframe *frame, void *data) { - if (kernel_text_address(frame->pc)) { - pr_cont(" [<%08lx>]", frame->pc); - print_symbol(" %s\n", frame->pc); - } + if (kernel_text_address(frame->pc)) + pr_cont(" [<%08lx>] %pB\n", frame->pc, (void *)frame->pc); return 0; } -- cgit v1.2.3 From 1493aa65ad076293722908f03bab3d4bf3dc3638 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 31 Mar 2017 16:26:21 -0700 Subject: xtensa: wire up statx system call Signed-off-by: Max Filippov --- arch/xtensa/include/uapi/asm/unistd.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/xtensa/include/uapi/asm/unistd.h b/arch/xtensa/include/uapi/asm/unistd.h index cd400af4a6b2..6be7eb27fd29 100644 --- a/arch/xtensa/include/uapi/asm/unistd.h +++ b/arch/xtensa/include/uapi/asm/unistd.h @@ -774,7 +774,10 @@ __SYSCALL(349, sys_pkey_alloc, 2) #define __NR_pkey_free 350 __SYSCALL(350, sys_pkey_free, 1) -#define __NR_syscall_count 351 +#define __NR_statx 351 +__SYSCALL(351, sys_statx, 5) + +#define __NR_syscall_count 352 /* * sysxtensa syscall handler -- cgit v1.2.3 From 4b0ece6fa0167b22c004ff69e137dc94ee2e469e Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Fri, 31 Mar 2017 15:11:44 -0700 Subject: mm: migrate: fix remove_migration_pte() for ksm pages I found that calling page migration for ksm pages causes the following bug: page:ffffea0004d51180 count:2 mapcount:2 mapping:ffff88013c785141 index:0x913 flags: 0x57ffffc0040068(uptodate|lru|active|swapbacked) raw: 0057ffffc0040068 ffff88013c785141 0000000000000913 0000000200000001 raw: ffffea0004d5f9e0 ffffea0004d53f60 0000000000000000 ffff88007d81b800 page dumped because: VM_BUG_ON_PAGE(!PageLocked(page)) page->mem_cgroup:ffff88007d81b800 ------------[ cut here ]------------ kernel BUG at /src/linux-dev/mm/rmap.c:1086! invalid opcode: 0000 [#1] SMP Modules linked in: ppdev parport_pc virtio_balloon i2c_piix4 pcspkr parport i2c_core acpi_cpufreq ip_tables xfs libcrc32c ata_generic pata_acpi ata_piix 8139too libata virtio_blk 8139cp crc32c_intel mii virtio_pci virtio_ring serio_raw virtio floppy dm_mirror dm_region_hash dm_log dm_mod CPU: 0 PID: 3162 Comm: bash Not tainted 4.11.0-rc2-mm1+ #1 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:do_page_add_anon_rmap+0x1ba/0x260 RSP: 0018:ffffc90002473b30 EFLAGS: 00010282 RAX: 0000000000000021 RBX: ffffea0004d51180 RCX: 0000000000000006 RDX: 0000000000000000 RSI: 0000000000000082 RDI: ffff88007dc0dfe0 RBP: ffffc90002473b58 R08: 00000000fffffffe R09: 00000000000001c1 R10: 0000000000000005 R11: 00000000000001c0 R12: ffff880139ab3d80 R13: 0000000000000000 R14: 0000700000000200 R15: 0000160000000000 FS: 00007f5195f50740(0000) GS:ffff88007dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fd450287000 CR3: 000000007a08e000 CR4: 00000000001406f0 Call Trace: page_add_anon_rmap+0x18/0x20 remove_migration_pte+0x220/0x2c0 rmap_walk_ksm+0x143/0x220 rmap_walk+0x55/0x60 remove_migration_ptes+0x53/0x80 migrate_pages+0x8ed/0xb60 soft_offline_page+0x309/0x8d0 store_soft_offline_page+0xaf/0xf0 dev_attr_store+0x18/0x30 sysfs_kf_write+0x3a/0x50 kernfs_fop_write+0xff/0x180 __vfs_write+0x37/0x160 vfs_write+0xb2/0x1b0 SyS_write+0x55/0xc0 do_syscall_64+0x67/0x180 entry_SYSCALL64_slow_path+0x25/0x25 RIP: 0033:0x7f51956339e0 RSP: 002b:00007ffcfa0dffc8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 000000000000000c RCX: 00007f51956339e0 RDX: 000000000000000c RSI: 00007f5195f53000 RDI: 0000000000000001 RBP: 00007f5195f53000 R08: 000000000000000a R09: 00007f5195f50740 R10: 000000000000000b R11: 0000000000000246 R12: 00007f5195907400 R13: 000000000000000c R14: 0000000000000001 R15: 0000000000000000 Code: fe ff ff 48 81 c2 00 02 00 00 48 89 55 d8 e8 2e c3 fd ff 48 8b 55 d8 e9 42 ff ff ff 48 c7 c6 e0 52 a1 81 48 89 df e8 46 ad fe ff <0f> 0b 48 83 e8 01 e9 7f fe ff ff 48 83 e8 01 e9 96 fe ff ff 48 RIP: do_page_add_anon_rmap+0x1ba/0x260 RSP: ffffc90002473b30 ---[ end trace a679d00f4af2df48 ]--- Kernel panic - not syncing: Fatal exception Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception The problem is in the following lines: new = page - pvmw.page->index + linear_page_index(vma, pvmw.address); The 'new' is calculated with 'page' which is given by the caller as a destination page and some offset adjustment for thp. But this doesn't properly work for ksm pages because pvmw.page->index doesn't change for each address but linear_page_index() changes, which means that 'new' points to different pages for each addresses backed by the ksm page. As a result, we try to set totally unrelated pages as destination pages, and that causes kernel crash. This patch fixes the miscalculation and makes ksm page migration work fine. Fixes: 3fe87967c536 ("mm: convert remove_migration_pte() to use page_vma_mapped_walk()") Link: http://lkml.kernel.org/r/1489717683-29905-1-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Mel Gorman Cc: Hillf Danton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/migrate.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 9a0897a14d37..ed97c2c14fa8 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -209,8 +209,11 @@ static int remove_migration_pte(struct page *page, struct vm_area_struct *vma, VM_BUG_ON_PAGE(PageTail(page), page); while (page_vma_mapped_walk(&pvmw)) { - new = page - pvmw.page->index + - linear_page_index(vma, pvmw.address); + if (PageKsm(page)) + new = page; + else + new = page - pvmw.page->index + + linear_page_index(vma, pvmw.address); get_page(new); pte = pte_mkold(mk_pte(new, READ_ONCE(vma->vm_page_prot))); -- cgit v1.2.3 From 597b7305dd8bafdb3aef4957d97128bc90af8e9f Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 31 Mar 2017 15:11:47 -0700 Subject: mm: move mm_percpu_wq initialization earlier Yang Li has reported that drain_all_pages triggers a WARN_ON which means that this function is called earlier than the mm_percpu_wq is initialized on arm64 with CMA configured: WARNING: CPU: 2 PID: 1 at mm/page_alloc.c:2423 drain_all_pages+0x244/0x25c Modules linked in: CPU: 2 PID: 1 Comm: swapper/0 Not tainted 4.11.0-rc1-next-20170310-00027-g64dfbc5 #127 Hardware name: Freescale Layerscape 2088A RDB Board (DT) task: ffffffc07c4a6d00 task.stack: ffffffc07c4a8000 PC is at drain_all_pages+0x244/0x25c LR is at start_isolate_page_range+0x14c/0x1f0 [...] drain_all_pages+0x244/0x25c start_isolate_page_range+0x14c/0x1f0 alloc_contig_range+0xec/0x354 cma_alloc+0x100/0x1fc dma_alloc_from_contiguous+0x3c/0x44 atomic_pool_init+0x7c/0x208 arm64_dma_init+0x44/0x4c do_one_initcall+0x38/0x128 kernel_init_freeable+0x1a0/0x240 kernel_init+0x10/0xfc ret_from_fork+0x10/0x20 Fix this by moving the whole setup_vmstat which is an initcall right now to init_mm_internals which will be called right after the WQ subsystem is initialized. Link: http://lkml.kernel.org/r/20170315164021.28532-1-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Yang Li Tested-by: Yang Li Tested-by: Xiaolong Ye Cc: Mel Gorman Cc: Vlastimil Babka Cc: Tetsuo Handa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 ++ init/main.c | 2 ++ mm/vmstat.c | 4 +--- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 5f01c88f0800..00a8fa7e366a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -32,6 +32,8 @@ struct user_struct; struct writeback_control; struct bdi_writeback; +void init_mm_internals(void); + #ifndef CONFIG_NEED_MULTIPLE_NODES /* Don't use mapnrs, do it properly */ extern unsigned long max_mapnr; diff --git a/init/main.c b/init/main.c index f9c9d9948203..b0c11cbf5ddf 100644 --- a/init/main.c +++ b/init/main.c @@ -1022,6 +1022,8 @@ static noinline void __init kernel_init_freeable(void) workqueue_init(); + init_mm_internals(); + do_pre_smp_initcalls(); lockup_detector_init(); diff --git a/mm/vmstat.c b/mm/vmstat.c index b1947f0cbee2..89f95396ec46 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1764,7 +1764,7 @@ static int vmstat_cpu_dead(unsigned int cpu) #endif -static int __init setup_vmstat(void) +void __init init_mm_internals(void) { #ifdef CONFIG_SMP int ret; @@ -1792,9 +1792,7 @@ static int __init setup_vmstat(void) proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations); proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations); #endif - return 0; } -module_init(setup_vmstat) #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION) -- cgit v1.2.3 From 553af430e7c981e6e8fa5007c5b7b5773acc63dd Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 31 Mar 2017 15:11:50 -0700 Subject: mm: rmap: fix huge file mmap accounting in the memcg stats Huge pages are accounted as single units in the memcg's "file_mapped" counter. Account the correct number of base pages, like we do in the corresponding node counter. Link: http://lkml.kernel.org/r/20170322005111.3156-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Reviewed-by: Kirill A. Shutemov Acked-by: Michal Hocko Cc: Vladimir Davydov Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 ++++++ mm/rmap.c | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5af377303880..bb7250c45cb8 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -740,6 +740,12 @@ static inline bool mem_cgroup_oom_synchronize(bool wait) return false; } +static inline void mem_cgroup_update_page_stat(struct page *page, + enum mem_cgroup_stat_index idx, + int nr) +{ +} + static inline void mem_cgroup_inc_page_stat(struct page *page, enum mem_cgroup_stat_index idx) { diff --git a/mm/rmap.c b/mm/rmap.c index 49ed681ccc7b..f6838015810f 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1159,7 +1159,7 @@ void page_add_file_rmap(struct page *page, bool compound) goto out; } __mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, nr); - mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED); + mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, nr); out: unlock_page_memcg(page); } @@ -1199,7 +1199,7 @@ static void page_remove_file_rmap(struct page *page, bool compound) * pte lock(a spinlock) is held, which implies preemption disabled. */ __mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, -nr); - mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED); + mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, -nr); if (unlikely(PageMlocked(page))) clear_page_mlock(page); -- cgit v1.2.3 From 0cefabdaf757a6455d75f00cb76874e62703ed18 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 31 Mar 2017 15:11:52 -0700 Subject: mm: workingset: fix premature shadow node shrinking with cgroups Commit 0a6b76dd23fa ("mm: workingset: make shadow node shrinker memcg aware") enabled cgroup-awareness in the shadow node shrinker, but forgot to also enable cgroup-awareness in the list_lru the shadow nodes sit on. Consequently, all shadow nodes are sitting on a global (per-NUMA node) list, while the shrinker applies the limits according to the amount of cache in the cgroup its shrinking. The result is excessive pressure on the shadow nodes from cgroups that have very little cache. Enable memcg-mode on the shadow node LRUs, such that per-cgroup limits are applied to per-cgroup lists. Fixes: 0a6b76dd23fa ("mm: workingset: make shadow node shrinker memcg aware") Link: http://lkml.kernel.org/r/20170322005320.8165-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Vladimir Davydov Cc: Michal Hocko Cc: [4.6+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/workingset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/workingset.c b/mm/workingset.c index ac839fca0e76..eda05c71fa49 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -532,7 +532,7 @@ static int __init workingset_init(void) pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n", timestamp_bits, max_order, bucket_order); - ret = list_lru_init_key(&shadow_nodes, &shadow_nodes_key); + ret = __list_lru_init(&shadow_nodes, true, &shadow_nodes_key); if (ret) goto err; ret = register_shrinker(&workingset_shadow_shrinker); -- cgit v1.2.3 From c9d398fa237882ea07167e23bcfc5e6847066518 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Fri, 31 Mar 2017 15:11:55 -0700 Subject: mm, hugetlb: use pte_present() instead of pmd_present() in follow_huge_pmd() I found the race condition which triggers the following bug when move_pages() and soft offline are called on a single hugetlb page concurrently. Soft offlining page 0x119400 at 0x700000000000 BUG: unable to handle kernel paging request at ffffea0011943820 IP: follow_huge_pmd+0x143/0x190 PGD 7ffd2067 PUD 7ffd1067 PMD 0 [61163.582052] Oops: 0000 [#1] SMP Modules linked in: binfmt_misc ppdev virtio_balloon parport_pc pcspkr i2c_piix4 parport i2c_core acpi_cpufreq ip_tables xfs libcrc32c ata_generic pata_acpi virtio_blk 8139too crc32c_intel ata_piix serio_raw libata virtio_pci 8139cp virtio_ring virtio mii floppy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: cap_check] CPU: 0 PID: 22573 Comm: iterate_numa_mo Tainted: P OE 4.11.0-rc2-mm1+ #2 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:follow_huge_pmd+0x143/0x190 RSP: 0018:ffffc90004bdbcd0 EFLAGS: 00010202 RAX: 0000000465003e80 RBX: ffffea0004e34d30 RCX: 00003ffffffff000 RDX: 0000000011943800 RSI: 0000000000080001 RDI: 0000000465003e80 RBP: ffffc90004bdbd18 R08: 0000000000000000 R09: ffff880138d34000 R10: ffffea0004650000 R11: 0000000000c363b0 R12: ffffea0011943800 R13: ffff8801b8d34000 R14: ffffea0000000000 R15: 000077ff80000000 FS: 00007fc977710740(0000) GS:ffff88007dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffea0011943820 CR3: 000000007a746000 CR4: 00000000001406f0 Call Trace: follow_page_mask+0x270/0x550 SYSC_move_pages+0x4ea/0x8f0 SyS_move_pages+0xe/0x10 do_syscall_64+0x67/0x180 entry_SYSCALL64_slow_path+0x25/0x25 RIP: 0033:0x7fc976e03949 RSP: 002b:00007ffe72221d88 EFLAGS: 00000246 ORIG_RAX: 0000000000000117 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fc976e03949 RDX: 0000000000c22390 RSI: 0000000000001400 RDI: 0000000000005827 RBP: 00007ffe72221e00 R08: 0000000000c2c3a0 R09: 0000000000000004 R10: 0000000000c363b0 R11: 0000000000000246 R12: 0000000000400650 R13: 00007ffe72221ee0 R14: 0000000000000000 R15: 0000000000000000 Code: 81 e4 ff ff 1f 00 48 21 c2 49 c1 ec 0c 48 c1 ea 0c 4c 01 e2 49 bc 00 00 00 00 00 ea ff ff 48 c1 e2 06 49 01 d4 f6 45 bc 04 74 90 <49> 8b 7c 24 20 40 f6 c7 01 75 2b 4c 89 e7 8b 47 1c 85 c0 7e 2a RIP: follow_huge_pmd+0x143/0x190 RSP: ffffc90004bdbcd0 CR2: ffffea0011943820 ---[ end trace e4f81353a2d23232 ]--- Kernel panic - not syncing: Fatal exception Kernel Offset: disabled This bug is triggered when pmd_present() returns true for non-present hugetlb, so fixing the present check in follow_huge_pmd() prevents it. Using pmd_present() to determine present/non-present for hugetlb is not correct, because pmd_present() checks multiple bits (not only _PAGE_PRESENT) for historical reason and it can misjudge hugetlb state. Fixes: e66f17ff7177 ("mm/hugetlb: take page table lock in follow_huge_pmd()") Link: http://lkml.kernel.org/r/1490149898-20231-1-git-send-email-n-horiguchi@ah.jp.nec.com Signed-off-by: Naoya Horiguchi Acked-by: Hillf Danton Cc: Hugh Dickins Cc: Michal Hocko Cc: "Kirill A. Shutemov" Cc: Mike Kravetz Cc: Christian Borntraeger Cc: Gerald Schaefer Cc: [4.0+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3d0aab9ee80d..f501f14f14ce 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4651,6 +4651,7 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, { struct page *page = NULL; spinlock_t *ptl; + pte_t pte; retry: ptl = pmd_lockptr(mm, pmd); spin_lock(ptl); @@ -4660,12 +4661,13 @@ retry: */ if (!pmd_huge(*pmd)) goto out; - if (pmd_present(*pmd)) { + pte = huge_ptep_get((pte_t *)pmd); + if (pte_present(pte)) { page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT); if (flags & FOLL_GET) get_page(page); } else { - if (is_hugetlb_entry_migration(huge_ptep_get((pte_t *)pmd))) { + if (is_hugetlb_entry_migration(pte)) { spin_unlock(ptl); __migration_entry_wait(mm, (pte_t *)pmd, ptl); goto retry; -- cgit v1.2.3 From 906f2a51c941e251ca196d5128953d9899a608ef Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 31 Mar 2017 15:11:58 -0700 Subject: mm: fix section name for .data..ro_after_init A section name for .data..ro_after_init was added by both: commit d07a980c1b8d ("s390: add proper __ro_after_init support") and commit d7c19b066dcf ("mm: kmemleak: scan .data.ro_after_init") The latter adds incorrect wrapping around the existing s390 section, and came later. I'd prefer the s390 naming, so this moves the s390-specific name up to the asm-generic/sections.h and renames the section as used by kmemleak (and in the future, kernel/extable.c). Link: http://lkml.kernel.org/r/20170327192213.GA129375@beast Signed-off-by: Kees Cook Acked-by: Heiko Carstens [s390 parts] Acked-by: Jakub Kicinski Cc: Eddie Kovsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/include/asm/sections.h | 1 - arch/s390/kernel/vmlinux.lds.S | 2 -- include/asm-generic/sections.h | 6 +++--- include/asm-generic/vmlinux.lds.h | 4 ++-- mm/kmemleak.c | 2 +- 5 files changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h index 5ce29fe100ba..fbd9116eb17b 100644 --- a/arch/s390/include/asm/sections.h +++ b/arch/s390/include/asm/sections.h @@ -4,6 +4,5 @@ #include extern char _eshared[], _ehead[]; -extern char __start_ro_after_init[], __end_ro_after_init[]; #endif diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 5ccf95396251..72307f108c40 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -63,11 +63,9 @@ SECTIONS . = ALIGN(PAGE_SIZE); __start_ro_after_init = .; - __start_data_ro_after_init = .; .data..ro_after_init : { *(.data..ro_after_init) } - __end_data_ro_after_init = .; EXCEPTION_TABLE(16) . = ALIGN(PAGE_SIZE); __end_ro_after_init = .; diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 4df64a1fc09e..532372c6cf15 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -14,8 +14,8 @@ * [_sdata, _edata]: contains .data.* sections, may also contain .rodata.* * and/or .init.* sections. * [__start_rodata, __end_rodata]: contains .rodata.* sections - * [__start_data_ro_after_init, __end_data_ro_after_init]: - * contains data.ro_after_init section + * [__start_ro_after_init, __end_ro_after_init]: + * contains .data..ro_after_init section * [__init_begin, __init_end]: contains .init.* sections, but .init.text.* * may be out of this range on some architectures. * [_sinittext, _einittext]: contains .init.text.* sections @@ -33,7 +33,7 @@ extern char _data[], _sdata[], _edata[]; extern char __bss_start[], __bss_stop[]; extern char __init_begin[], __init_end[]; extern char _sinittext[], _einittext[]; -extern char __start_data_ro_after_init[], __end_data_ro_after_init[]; +extern char __start_ro_after_init[], __end_ro_after_init[]; extern char _end[]; extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[]; extern char __kprobes_text_start[], __kprobes_text_end[]; diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 0968d13b3885..f9f21e2c59f3 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -260,9 +260,9 @@ */ #ifndef RO_AFTER_INIT_DATA #define RO_AFTER_INIT_DATA \ - __start_data_ro_after_init = .; \ + __start_ro_after_init = .; \ *(.data..ro_after_init) \ - __end_data_ro_after_init = .; + __end_ro_after_init = .; #endif /* diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 26c874e90b12..20036d4f9f13 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1416,7 +1416,7 @@ static void kmemleak_scan(void) /* data/bss scanning */ scan_large_block(_sdata, _edata); scan_large_block(__bss_start, __bss_stop); - scan_large_block(__start_data_ro_after_init, __end_data_ro_after_init); + scan_large_block(__start_ro_after_init, __end_ro_after_init); #ifdef CONFIG_SMP /* per-cpu sections scanning */ -- cgit v1.2.3 From 4742a35d9de745e867405b4311e1aac412f0ace1 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Fri, 31 Mar 2017 15:12:01 -0700 Subject: hugetlbfs: initialize shared policy as part of inode allocation Any time after inode allocation, destroy_inode can be called. The hugetlbfs inode contains a shared_policy structure, and mpol_free_shared_policy is unconditionally called as part of hugetlbfs_destroy_inode. Initialize the policy as part of inode allocation so that any quick (error path) calls to destroy_inode will be handed an initialized policy. syzkaller fuzzer found this bug, that resulted in the following: BUG: KASAN: user-memory-access in atomic_inc include/asm-generic/atomic-instrumented.h:87 [inline] at addr 000000131730bd7a BUG: KASAN: user-memory-access in __lock_acquire+0x21a/0x3a80 kernel/locking/lockdep.c:3239 at addr 000000131730bd7a Write of size 4 by task syz-executor6/14086 CPU: 3 PID: 14086 Comm: syz-executor6 Not tainted 4.11.0-rc3+ #364 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: atomic_inc include/asm-generic/atomic-instrumented.h:87 [inline] __lock_acquire+0x21a/0x3a80 kernel/locking/lockdep.c:3239 lock_acquire+0x1ee/0x590 kernel/locking/lockdep.c:3762 __raw_write_lock include/linux/rwlock_api_smp.h:210 [inline] _raw_write_lock+0x33/0x50 kernel/locking/spinlock.c:295 mpol_free_shared_policy+0x43/0xb0 mm/mempolicy.c:2536 hugetlbfs_destroy_inode+0xca/0x120 fs/hugetlbfs/inode.c:952 alloc_inode+0x10d/0x180 fs/inode.c:216 new_inode_pseudo+0x69/0x190 fs/inode.c:889 new_inode+0x1c/0x40 fs/inode.c:918 hugetlbfs_get_inode+0x40/0x420 fs/hugetlbfs/inode.c:734 hugetlb_file_setup+0x329/0x9f0 fs/hugetlbfs/inode.c:1282 newseg+0x422/0xd30 ipc/shm.c:575 ipcget_new ipc/util.c:285 [inline] ipcget+0x21e/0x580 ipc/util.c:639 SYSC_shmget ipc/shm.c:673 [inline] SyS_shmget+0x158/0x230 ipc/shm.c:657 entry_SYSCALL_64_fastpath+0x1f/0xc2 Analysis provided by Tetsuo Handa Link: http://lkml.kernel.org/r/1490477850-7944-1-git-send-email-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Reported-by: Dmitry Vyukov Acked-by: Hillf Danton Cc: Tetsuo Handa Cc: Michal Hocko Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 8f96461236f6..7163fe014b57 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -695,14 +695,11 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb, inode = new_inode(sb); if (inode) { - struct hugetlbfs_inode_info *info; inode->i_ino = get_next_ino(); inode->i_mode = S_IFDIR | config->mode; inode->i_uid = config->uid; inode->i_gid = config->gid; inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); - info = HUGETLBFS_I(inode); - mpol_shared_policy_init(&info->policy, NULL); inode->i_op = &hugetlbfs_dir_inode_operations; inode->i_fop = &simple_dir_operations; /* directory inodes start off with i_nlink == 2 (for "." entry) */ @@ -733,7 +730,6 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, inode = new_inode(sb); if (inode) { - struct hugetlbfs_inode_info *info; inode->i_ino = get_next_ino(); inode_init_owner(inode, dir, mode); lockdep_set_class(&inode->i_mapping->i_mmap_rwsem, @@ -741,15 +737,6 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, inode->i_mapping->a_ops = &hugetlbfs_aops; inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); inode->i_mapping->private_data = resv_map; - info = HUGETLBFS_I(inode); - /* - * The policy is initialized here even if we are creating a - * private inode because initialization simply creates an - * an empty rb tree and calls rwlock_init(), later when we - * call mpol_free_shared_policy() it will just return because - * the rb tree will still be empty. - */ - mpol_shared_policy_init(&info->policy, NULL); switch (mode & S_IFMT) { default: init_special_inode(inode, mode, dev); @@ -937,6 +924,18 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) hugetlbfs_inc_free_inodes(sbinfo); return NULL; } + + /* + * Any time after allocation, hugetlbfs_destroy_inode can be called + * for the inode. mpol_free_shared_policy is unconditionally called + * as part of hugetlbfs_destroy_inode. So, initialize policy here + * in case of a quick call to destroy. + * + * Note that the policy is initialized even if we are creating a + * private inode. This simplifies hugetlbfs_destroy_inode. + */ + mpol_shared_policy_init(&p->policy, NULL); + return &p->vfs_inode; } -- cgit v1.2.3 From b0845ce58379d11dcad4cdb6824a6410de260216 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 31 Mar 2017 15:12:04 -0700 Subject: kasan: report only the first error by default Disable kasan after the first report. There are several reasons for this: - Single bug quite often has multiple invalid memory accesses causing storm in the dmesg. - Write OOB access might corrupt metadata so the next report will print bogus alloc/free stacktraces. - Reports after the first easily could be not bugs by itself but just side effects of the first one. Given that multiple reports usually only do harm, it makes sense to disable kasan after the first one. If user wants to see all the reports, the boot-time parameter kasan_multi_shot must be used. [aryabinin@virtuozzo.com: wrote changelog and doc, added missing include] Link: http://lkml.kernel.org/r/20170323154416.30257-1-aryabinin@virtuozzo.com Signed-off-by: Mark Rutland Signed-off-by: Andrey Ryabinin Cc: Andrey Konovalov Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/admin-guide/kernel-parameters.txt | 6 +++++ include/linux/kasan.h | 3 +++ lib/test_kasan.c | 10 +++++++ mm/kasan/kasan.h | 5 ---- mm/kasan/report.c | 36 +++++++++++++++++++++++++ 5 files changed, 55 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 2ba45caabada..facc20a3f962 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1725,6 +1725,12 @@ kernel and module base offset ASLR (Address Space Layout Randomization). + kasan_multi_shot + [KNL] Enforce KASAN (Kernel Address Sanitizer) to print + report on every invalid memory access. Without this + parameter KASAN will print report only for the first + invalid access. + keepinitrd [HW,ARM] kernelcore= [KNL,X86,IA-64,PPC] diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 5734480c9590..a5c7046f26b4 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -76,6 +76,9 @@ size_t ksize(const void *); static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); } size_t kasan_metadata_size(struct kmem_cache *cache); +bool kasan_save_enable_multi_shot(void); +void kasan_restore_multi_shot(bool enabled); + #else /* CONFIG_KASAN */ static inline void kasan_unpoison_shadow(const void *address, size_t size) {} diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 0b1d3140fbb8..a25c9763fce1 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -20,6 +20,7 @@ #include #include #include +#include /* * Note: test functions are marked noinline so that their names appear in @@ -474,6 +475,12 @@ static noinline void __init use_after_scope_test(void) static int __init kmalloc_tests_init(void) { + /* + * Temporarily enable multi-shot mode. Otherwise, we'd only get a + * report for the first case. + */ + bool multishot = kasan_save_enable_multi_shot(); + kmalloc_oob_right(); kmalloc_oob_left(); kmalloc_node_oob_right(); @@ -499,6 +506,9 @@ static int __init kmalloc_tests_init(void) ksize_unpoisons_memory(); copy_user_test(); use_after_scope_test(); + + kasan_restore_multi_shot(multishot); + return -EAGAIN; } diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 1c260e6b3b3c..dd2dea8eb077 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -96,11 +96,6 @@ static inline const void *kasan_shadow_to_mem(const void *shadow_addr) << KASAN_SHADOW_SCALE_SHIFT); } -static inline bool kasan_report_enabled(void) -{ - return !current->kasan_depth; -} - void kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip); void kasan_report_double_free(struct kmem_cache *cache, void *object, diff --git a/mm/kasan/report.c b/mm/kasan/report.c index f479365530b6..ab42a0803f16 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -13,7 +13,9 @@ * */ +#include #include +#include #include #include #include @@ -293,6 +295,40 @@ static void kasan_report_error(struct kasan_access_info *info) kasan_end_report(&flags); } +static unsigned long kasan_flags; + +#define KASAN_BIT_REPORTED 0 +#define KASAN_BIT_MULTI_SHOT 1 + +bool kasan_save_enable_multi_shot(void) +{ + return test_and_set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags); +} +EXPORT_SYMBOL_GPL(kasan_save_enable_multi_shot); + +void kasan_restore_multi_shot(bool enabled) +{ + if (!enabled) + clear_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags); +} +EXPORT_SYMBOL_GPL(kasan_restore_multi_shot); + +static int __init kasan_set_multi_shot(char *str) +{ + set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags); + return 1; +} +__setup("kasan_multi_shot", kasan_set_multi_shot); + +static inline bool kasan_report_enabled(void) +{ + if (current->kasan_depth) + return false; + if (test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags)) + return true; + return !test_and_set_bit(KASAN_BIT_REPORTED, &kasan_flags); +} + void kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip) { -- cgit v1.2.3 From ff8c0c53c47530ffea82c22a0a6df6332b56c957 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Fri, 31 Mar 2017 15:12:07 -0700 Subject: mm/hugetlb.c: don't call region_abort if region_chg fails Changes to hugetlbfs reservation maps is a two step process. The first step is a call to region_chg to determine what needs to be changed, and prepare that change. This should be followed by a call to call to region_add to commit the change, or region_abort to abort the change. The error path in hugetlb_reserve_pages called region_abort after a failed call to region_chg. As a result, the adds_in_progress counter in the reservation map is off by 1. This is caught by a VM_BUG_ON in resv_map_release when the reservation map is freed. syzkaller fuzzer (when using an injected kmalloc failure) found this bug, that resulted in the following: kernel BUG at mm/hugetlb.c:742! Call Trace: hugetlbfs_evict_inode+0x7b/0xa0 fs/hugetlbfs/inode.c:493 evict+0x481/0x920 fs/inode.c:553 iput_final fs/inode.c:1515 [inline] iput+0x62b/0xa20 fs/inode.c:1542 hugetlb_file_setup+0x593/0x9f0 fs/hugetlbfs/inode.c:1306 newseg+0x422/0xd30 ipc/shm.c:575 ipcget_new ipc/util.c:285 [inline] ipcget+0x21e/0x580 ipc/util.c:639 SYSC_shmget ipc/shm.c:673 [inline] SyS_shmget+0x158/0x230 ipc/shm.c:657 entry_SYSCALL_64_fastpath+0x1f/0xc2 RIP: resv_map_release+0x265/0x330 mm/hugetlb.c:742 Link: http://lkml.kernel.org/r/1490821682-23228-1-git-send-email-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Reported-by: Dmitry Vyukov Acked-by: Hillf Danton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f501f14f14ce..e5828875f7bb 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4403,7 +4403,9 @@ int hugetlb_reserve_pages(struct inode *inode, return 0; out_err: if (!vma || vma->vm_flags & VM_MAYSHARE) - region_abort(resv_map, from, to); + /* Don't call region_abort if region_chg failed */ + if (chg >= 0) + region_abort(resv_map, from, to); if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER)) kref_put(&resv_map->refs, resv_map_release); return ret; -- cgit v1.2.3 From 4785603bd05b0b029c647080937674d9991600f9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 31 Mar 2017 15:12:10 -0700 Subject: drivers/rapidio/devices/tsi721.c: make module parameter variable name unique MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kbuild test robot reported a non-static variable name collision between a staging driver and a RapidIO driver, with a generic variable name of 'dbg_level'. Both drivers should be changed so that they don't use this generic public variable name. This patch fixes the RapidIO driver but does not change the user interface (name) for the module parameter. drivers/staging/built-in.o:(.bss+0x109d0): multiple definition of `dbg_level' drivers/rapidio/built-in.o:(.bss+0x16c): first defined here Link: http://lkml.kernel.org/r/ab527fc5-aa3c-4b07-5d48-eef5de703192@infradead.org Signed-off-by: Randy Dunlap Reported-by: kbuild test robot Cc: Greg Kroah-Hartman Cc: Matt Porter Cc: Alexandre Bounine Cc: Jérémy Lefaure Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/devices/tsi721.c | 4 ++-- drivers/rapidio/devices/tsi721.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c index 9d19b9a62011..315a4be8dc1e 100644 --- a/drivers/rapidio/devices/tsi721.c +++ b/drivers/rapidio/devices/tsi721.c @@ -37,8 +37,8 @@ #include "tsi721.h" #ifdef DEBUG -u32 dbg_level; -module_param(dbg_level, uint, S_IWUSR | S_IRUGO); +u32 tsi_dbg_level; +module_param_named(dbg_level, tsi_dbg_level, uint, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(dbg_level, "Debugging output level (default 0 = none)"); #endif diff --git a/drivers/rapidio/devices/tsi721.h b/drivers/rapidio/devices/tsi721.h index 5941437cbdd1..957eadc58150 100644 --- a/drivers/rapidio/devices/tsi721.h +++ b/drivers/rapidio/devices/tsi721.h @@ -40,11 +40,11 @@ enum { }; #ifdef DEBUG -extern u32 dbg_level; +extern u32 tsi_dbg_level; #define tsi_debug(level, dev, fmt, arg...) \ do { \ - if (DBG_##level & dbg_level) \ + if (DBG_##level & tsi_dbg_level) \ dev_dbg(dev, "%s: " fmt "\n", __func__, ##arg); \ } while (0) #else -- cgit v1.2.3 From 13a6798e4a03096b11bf402a063786a7be55d426 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Fri, 31 Mar 2017 15:12:12 -0700 Subject: kasan: do not sanitize kexec purgatory Fixes this: kexec: Undefined symbol: __asan_load8_noabort kexec-bzImage64: Loading purgatory failed Link: http://lkml.kernel.org/r/1489672155.4458.7.camel@gmx.de Signed-off-by: Mike Galbraith Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/purgatory/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 555b9fa0ad43..7dbdb780264d 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -8,6 +8,7 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib targets += purgatory.ro +KASAN_SANITIZE := n KCOV_INSTRUMENT := n # Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That -- cgit v1.2.3 From aa4ce4493c88dc324911152d1ccd25469366dba3 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Sat, 1 Apr 2017 00:00:53 +0800 Subject: drm/i915/gvt: Fix firmware loading interface for GVT-g golden HW state Firmware loading interface for GVT-g golden HW state has been broken before. This patch fixes GVT-g firmware loading interface. A user should apply this patch if he wants to load GVT-g golden HW state from firmware interface. Fixes: 579cea5 ("drm/i915/gvt: golden virtual HW state management") Cc: Zhenyu Wang Cc: drm-intel-fixes@lists.freedesktop.org Signed-off-by: Zhi Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/firmware.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/firmware.c b/drivers/gpu/drm/i915/gvt/firmware.c index 933a7c211a1c..dce8d15f706f 100644 --- a/drivers/gpu/drm/i915/gvt/firmware.c +++ b/drivers/gpu/drm/i915/gvt/firmware.c @@ -75,11 +75,11 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt) struct gvt_firmware_header *h; void *firmware; void *p; - unsigned long size; + unsigned long size, crc32_start; int i; int ret; - size = sizeof(*h) + info->mmio_size + info->cfg_space_size - 1; + size = sizeof(*h) + info->mmio_size + info->cfg_space_size; firmware = vzalloc(size); if (!firmware) return -ENOMEM; @@ -112,6 +112,9 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt) memcpy(gvt->firmware.mmio, p, info->mmio_size); + crc32_start = offsetof(struct gvt_firmware_header, crc32) + 4; + h->crc32 = crc32_le(0, firmware + crc32_start, size - crc32_start); + firmware_attr.size = size; firmware_attr.private = firmware; @@ -234,7 +237,7 @@ int intel_gvt_load_firmware(struct intel_gvt *gvt) firmware->mmio = mem; - sprintf(path, "%s/vid_0x%04x_did_0x%04x_rid_0x%04x.golden_hw_state", + sprintf(path, "%s/vid_0x%04x_did_0x%04x_rid_0x%02x.golden_hw_state", GVT_FIRMWARE_PATH, pdev->vendor, pdev->device, pdev->revision); -- cgit v1.2.3 From e53e597fd4c4a0b6ae58e57d76a240927fd17eaa Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Fri, 31 Mar 2017 17:05:02 -0500 Subject: tty: pl011: fix earlycon work-around for QDF2400 erratum 44 The work-around for the Qualcomm Datacenter Technologies QDF2400 erratum 44 sets the "qdf2400_e44_present" global variable if the work-around is needed. However, this check does not happen until after earlycon is initialized, which means the work-around is not used, and the console hangs as soon as it displays one character. Fixes: d8a4995bcea1 ("tty: pl011: Work around QDF2400 E44 stuck BUSY bit") Signed-off-by: Timur Tabi Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 56f92d7348bf..b0a377725d63 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -2452,18 +2452,37 @@ static void pl011_early_write(struct console *con, const char *s, unsigned n) uart_console_write(&dev->port, s, n, pl011_putc); } +/* + * On non-ACPI systems, earlycon is enabled by specifying + * "earlycon=pl011,
" on the kernel command line. + * + * On ACPI ARM64 systems, an "early" console is enabled via the SPCR table, + * by specifying only "earlycon" on the command line. Because it requires + * SPCR, the console starts after ACPI is parsed, which is later than a + * traditional early console. + * + * To get the traditional early console that starts before ACPI is parsed, + * specify the full "earlycon=pl011,
" option. + */ static int __init pl011_early_console_setup(struct earlycon_device *device, const char *opt) { if (!device->port.membase) return -ENODEV; - device->con->write = qdf2400_e44_present ? - qdf2400_e44_early_write : pl011_early_write; + /* On QDF2400 SOCs affected by Erratum 44, the "qdf2400_e44" must + * also be specified, e.g. "earlycon=pl011,
,qdf2400_e44". + */ + if (!strcmp(device->options, "qdf2400_e44")) + device->con->write = qdf2400_e44_early_write; + else + device->con->write = pl011_early_write; + return 0; } OF_EARLYCON_DECLARE(pl011, "arm,pl011", pl011_early_console_setup); OF_EARLYCON_DECLARE(pl011, "arm,sbsa-uart", pl011_early_console_setup); +EARLYCON_DECLARE(qdf2400_e44, pl011_early_console_setup); #else #define AMBA_CONSOLE NULL -- cgit v1.2.3 From b917078c1c107ce34264af893e436e6115eeb9f6 Mon Sep 17 00:00:00 2001 From: Daode Huang Date: Thu, 30 Mar 2017 16:37:41 +0100 Subject: net: hns: Add ACPI support to check SFP present The current code only supports DT to check SFP present. This patch adds ACPI support as well. Signed-off-by: Daode Huang Reviewed-by: Yisen Zhuang Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 11 +++++---- drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c | 28 +++++++++++++++++++++- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index 3239d27143b9..bdd8cdd732fb 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -82,9 +82,12 @@ void hns_mac_get_link_status(struct hns_mac_cb *mac_cb, u32 *link_status) else *link_status = 0; - ret = mac_cb->dsaf_dev->misc_op->get_sfp_prsnt(mac_cb, &sfp_prsnt); - if (!ret) - *link_status = *link_status && sfp_prsnt; + if (mac_cb->media_type == HNAE_MEDIA_TYPE_FIBER) { + ret = mac_cb->dsaf_dev->misc_op->get_sfp_prsnt(mac_cb, + &sfp_prsnt); + if (!ret) + *link_status = *link_status && sfp_prsnt; + } mac_cb->link = *link_status; } @@ -855,7 +858,7 @@ static int hns_mac_get_info(struct hns_mac_cb *mac_cb) of_node_put(np); np = of_parse_phandle(to_of_node(mac_cb->fw_port), - "serdes-syscon", 0); + "serdes-syscon", 0); syscon = syscon_node_to_regmap(np); of_node_put(np); if (IS_ERR_OR_NULL(syscon)) { diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c index a2c22d084ce9..e13aa064a8e9 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c @@ -461,6 +461,32 @@ int hns_mac_get_sfp_prsnt(struct hns_mac_cb *mac_cb, int *sfp_prsnt) return 0; } +int hns_mac_get_sfp_prsnt_acpi(struct hns_mac_cb *mac_cb, int *sfp_prsnt) +{ + union acpi_object *obj; + union acpi_object obj_args, argv4; + + obj_args.integer.type = ACPI_TYPE_INTEGER; + obj_args.integer.value = mac_cb->mac_id; + + argv4.type = ACPI_TYPE_PACKAGE, + argv4.package.count = 1, + argv4.package.elements = &obj_args, + + obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dev), + hns_dsaf_acpi_dsm_uuid, 0, + HNS_OP_GET_SFP_STAT_FUNC, &argv4); + + if (!obj || obj->type != ACPI_TYPE_INTEGER) + return -ENODEV; + + *sfp_prsnt = obj->integer.value; + + ACPI_FREE(obj); + + return 0; +} + /** * hns_mac_config_sds_loopback - set loop back for serdes * @mac_cb: mac control block @@ -592,7 +618,7 @@ struct dsaf_misc_op *hns_misc_op_get(struct dsaf_device *dsaf_dev) misc_op->hns_dsaf_roce_srst = hns_dsaf_roce_srst_acpi; misc_op->get_phy_if = hns_mac_get_phy_if_acpi; - misc_op->get_sfp_prsnt = hns_mac_get_sfp_prsnt; + misc_op->get_sfp_prsnt = hns_mac_get_sfp_prsnt_acpi; misc_op->cfg_serdes_loopback = hns_mac_config_sds_loopback_acpi; } else { -- cgit v1.2.3 From 3af887c38f1ae0db66c00c4ee2e8a0b1e99ffc29 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 30 Mar 2017 17:00:12 +0100 Subject: net/faraday: Explicitly include linux/of.h and linux/property.h This driver uses interfaces from linux/of.h and linux/property.h but relies on implict inclusion of those headers which means that changes in other headers could break the build, as happened in -next for arm today. Add a explicit includes. Signed-off-by: Mark Brown Acked-by: Joel Stanley Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 928b0df2b8e0..ade6b3e4ed13 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -28,8 +28,10 @@ #include #include #include +#include #include #include +#include #include #include -- cgit v1.2.3 From 6f56f6186c18e3fd54122b73da68e870687b8c59 Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Thu, 30 Mar 2017 12:36:03 -0700 Subject: openvswitch: Fix ovs_flow_key_update() ovs_flow_key_update() is called when the flow key is invalid, and it is used to update and revalidate the flow key. Commit 329f45bc4f19 ("openvswitch: add mac_proto field to the flow key") introduces mac_proto field to flow key and use it to determine whether the flow key is valid. However, the commit does not update the code path in ovs_flow_key_update() to revalidate the flow key which may cause BUG_ON() on execute_recirc(). This patch addresses the aforementioned issue. Fixes: 329f45bc4f19 ("openvswitch: add mac_proto field to the flow key") Signed-off-by: Yi-Hung Wei Acked-by: Jiri Benc Signed-off-by: David S. Miller --- net/openvswitch/flow.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 9d4bb8eb63f2..3f76cb765e5b 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -527,7 +527,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) /* Link layer. */ clear_vlan(key); - if (key->mac_proto == MAC_PROTO_NONE) { + if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) { if (unlikely(eth_type_vlan(skb->protocol))) return -EINVAL; @@ -745,7 +745,13 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key) { - return key_extract(skb, key); + int res; + + res = key_extract(skb, key); + if (!res) + key->mac_proto &= ~SW_FLOW_KEY_INVALID; + + return res; } static int key_extract_mac_proto(struct sk_buff *skb) -- cgit v1.2.3 From d5b07ccc1bf5fd2ccc6bf9da5677fc448a972e32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Rebe?= Date: Tue, 28 Mar 2017 07:56:51 +0200 Subject: r8152: The Microsoft Surface docks also use R8152 v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this the generic cdc_ether grabs the device, and does not really work. Signed-off-by: René Rebe Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ether.c | 15 +++++++++++++++ drivers/net/usb/r8152.c | 3 +++ 2 files changed, 18 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index f5552aaaa77a..f3ae88fdf332 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -532,6 +532,7 @@ static const struct driver_info wwan_info = { #define LENOVO_VENDOR_ID 0x17ef #define NVIDIA_VENDOR_ID 0x0955 #define HP_VENDOR_ID 0x03f0 +#define MICROSOFT_VENDOR_ID 0x045e static const struct usb_device_id products[] = { /* BLACKLIST !! @@ -761,6 +762,20 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +/* Microsoft Surface 2 dock (based on Realtek RTL8152) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(MICROSOFT_VENDOR_ID, 0x07ab, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + +/* Microsoft Surface 3 dock (based on Realtek RTL8153) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(MICROSOFT_VENDOR_ID, 0x07c6, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* WHITELIST!!! * * CDC Ether uses two interfaces, not necessarily consecutive. diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index c34df33c6d72..07f788c49d57 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -517,6 +517,7 @@ enum rtl8152_flags { /* Define these values to match your device */ #define VENDOR_ID_REALTEK 0x0bda +#define VENDOR_ID_MICROSOFT 0x045e #define VENDOR_ID_SAMSUNG 0x04e8 #define VENDOR_ID_LENOVO 0x17ef #define VENDOR_ID_NVIDIA 0x0955 @@ -4521,6 +4522,8 @@ static void rtl8152_disconnect(struct usb_interface *intf) static struct usb_device_id rtl8152_table[] = { {REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8152)}, {REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8153)}, + {REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07ab)}, + {REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07c6)}, {REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x304f)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3062)}, -- cgit v1.2.3 From fce366a9dd0ddc47e7ce05611c266e8574a45116 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 31 Mar 2017 02:24:02 +0200 Subject: bpf, verifier: fix alu ops against map_value{, _adj} register types While looking into map_value_adj, I noticed that alu operations directly on the map_value() resp. map_value_adj() register (any alu operation on a map_value() register will turn it into a map_value_adj() typed register) are not sufficiently protected against some of the operations. Two non-exhaustive examples are provided that the verifier needs to reject: i) BPF_AND on r0 (map_value_adj): 0: (bf) r2 = r10 1: (07) r2 += -8 2: (7a) *(u64 *)(r2 +0) = 0 3: (18) r1 = 0xbf842a00 5: (85) call bpf_map_lookup_elem#1 6: (15) if r0 == 0x0 goto pc+2 R0=map_value(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp 7: (57) r0 &= 8 8: (7a) *(u64 *)(r0 +0) = 22 R0=map_value_adj(ks=8,vs=48,id=0),min_value=0,max_value=8 R10=fp 9: (95) exit from 6 to 9: R0=inv,min_value=0,max_value=0 R10=fp 9: (95) exit processed 10 insns ii) BPF_ADD in 32 bit mode on r0 (map_value_adj): 0: (bf) r2 = r10 1: (07) r2 += -8 2: (7a) *(u64 *)(r2 +0) = 0 3: (18) r1 = 0xc24eee00 5: (85) call bpf_map_lookup_elem#1 6: (15) if r0 == 0x0 goto pc+2 R0=map_value(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp 7: (04) (u32) r0 += (u32) 0 8: (7a) *(u64 *)(r0 +0) = 22 R0=map_value_adj(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp 9: (95) exit from 6 to 9: R0=inv,min_value=0,max_value=0 R10=fp 9: (95) exit processed 10 insns Issue is, while min_value / max_value boundaries for the access are adjusted appropriately, we change the pointer value in a way that cannot be sufficiently tracked anymore from its origin. Operations like BPF_{AND,OR,DIV,MUL,etc} on a destination register that is PTR_TO_MAP_VALUE{,_ADJ} was probably unintended, in fact, all the test cases coming with 484611357c19 ("bpf: allow access into map value arrays") perform BPF_ADD only on the destination register that is PTR_TO_MAP_VALUE_ADJ. Only for UNKNOWN_VALUE register types such operations make sense, f.e. with unknown memory content fetched initially from a constant offset from the map value memory into a register. That register is then later tested against lower / upper bounds, so that the verifier can then do the tracking of min_value / max_value, and properly check once that UNKNOWN_VALUE register is added to the destination register with type PTR_TO_MAP_VALUE{,_ADJ}. This is also what the original use-case is solving. Note, tracking on what is being added is done through adjust_reg_min_max_vals() and later access to the map value enforced with these boundaries and the given offset from the insn through check_map_access_adj(). Tests will fail for non-root environment due to prohibited pointer arithmetic, in particular in check_alu_op(), we bail out on the is_pointer_value() check on the dst_reg (which is false in root case as we allow for pointer arithmetic via env->allow_ptr_leaks). Similarly to PTR_TO_PACKET, one way to fix it is to restrict the allowed operations on PTR_TO_MAP_VALUE{,_ADJ} registers to 64 bit mode BPF_ADD. The test_verifier suite runs fine after the patch and it also rejects mentioned test cases. Fixes: 484611357c19 ("bpf: allow access into map value arrays") Signed-off-by: Daniel Borkmann Reviewed-by: Josef Bacik Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5e6202e62265..86deddecff25 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1925,6 +1925,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) * register as unknown. */ if (env->allow_ptr_leaks && + BPF_CLASS(insn->code) == BPF_ALU64 && opcode == BPF_ADD && (dst_reg->type == PTR_TO_MAP_VALUE || dst_reg->type == PTR_TO_MAP_VALUE_ADJ)) dst_reg->type = PTR_TO_MAP_VALUE_ADJ; -- cgit v1.2.3 From 79adffcd6489ef43bda2dfded3d637d7fb4fac80 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 31 Mar 2017 02:24:03 +0200 Subject: bpf, verifier: fix rejection of unaligned access checks for map_value_adj Currently, the verifier doesn't reject unaligned access for map_value_adj register types. Commit 484611357c19 ("bpf: allow access into map value arrays") added logic to check_ptr_alignment() extending it from PTR_TO_PACKET to also PTR_TO_MAP_VALUE_ADJ, but for PTR_TO_MAP_VALUE_ADJ no enforcement is in place, because reg->id for PTR_TO_MAP_VALUE_ADJ reg types is never non-zero, meaning, we can cause BPF_H/_W/_DW-based unaligned access for architectures not supporting efficient unaligned access, and thus worst case could raise exceptions on some archs that are unable to correct the unaligned access or perform a different memory access to the actual requested one and such. i) Unaligned load with !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS on r0 (map_value_adj): 0: (bf) r2 = r10 1: (07) r2 += -8 2: (7a) *(u64 *)(r2 +0) = 0 3: (18) r1 = 0x42533a00 5: (85) call bpf_map_lookup_elem#1 6: (15) if r0 == 0x0 goto pc+11 R0=map_value(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp 7: (61) r1 = *(u32 *)(r0 +0) 8: (35) if r1 >= 0xb goto pc+9 R0=map_value(ks=8,vs=48,id=0),min_value=0,max_value=0 R1=inv,min_value=0,max_value=10 R10=fp 9: (07) r0 += 3 10: (79) r7 = *(u64 *)(r0 +0) R0=map_value_adj(ks=8,vs=48,id=0),min_value=3,max_value=3 R1=inv,min_value=0,max_value=10 R10=fp 11: (79) r7 = *(u64 *)(r0 +2) R0=map_value_adj(ks=8,vs=48,id=0),min_value=3,max_value=3 R1=inv,min_value=0,max_value=10 R7=inv R10=fp [...] ii) Unaligned store with !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS on r0 (map_value_adj): 0: (bf) r2 = r10 1: (07) r2 += -8 2: (7a) *(u64 *)(r2 +0) = 0 3: (18) r1 = 0x4df16a00 5: (85) call bpf_map_lookup_elem#1 6: (15) if r0 == 0x0 goto pc+19 R0=map_value(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp 7: (07) r0 += 3 8: (7a) *(u64 *)(r0 +0) = 42 R0=map_value_adj(ks=8,vs=48,id=0),min_value=3,max_value=3 R10=fp 9: (7a) *(u64 *)(r0 +2) = 43 R0=map_value_adj(ks=8,vs=48,id=0),min_value=3,max_value=3 R10=fp 10: (7a) *(u64 *)(r0 -2) = 44 R0=map_value_adj(ks=8,vs=48,id=0),min_value=3,max_value=3 R10=fp [...] For the PTR_TO_PACKET type, reg->id is initially zero when skb->data was fetched, it later receives a reg->id from env->id_gen generator once another register with UNKNOWN_VALUE type was added to it via check_packet_ptr_add(). The purpose of this reg->id is twofold: i) it is used in find_good_pkt_pointers() for setting the allowed access range for regs with PTR_TO_PACKET of same id once verifier matched on data/data_end tests, and ii) for check_ptr_alignment() to determine that when not having efficient unaligned access and register with UNKNOWN_VALUE was added to PTR_TO_PACKET, that we're only allowed to access the content bytewise due to unknown unalignment. reg->id was never intended for PTR_TO_MAP_VALUE{,_ADJ} types and thus is always zero, the only marking is in PTR_TO_MAP_VALUE_OR_NULL that was added after 484611357c19 via 57a09bf0a416 ("bpf: Detect identical PTR_TO_MAP_VALUE_OR_NULL registers"). Above tests will fail for non-root environment due to prohibited pointer arithmetic. The fix splits register-type specific checks into their own helper instead of keeping them combined, so we don't run into a similar issue in future once we extend check_ptr_alignment() further and forget to add reg->type checks for some of the checks. Fixes: 484611357c19 ("bpf: allow access into map value arrays") Signed-off-by: Daniel Borkmann Reviewed-by: Josef Bacik Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 58 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 86deddecff25..a834068a400e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -765,38 +765,56 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno) } } -static int check_ptr_alignment(struct bpf_verifier_env *env, - struct bpf_reg_state *reg, int off, int size) +static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg, + int off, int size) { - if (reg->type != PTR_TO_PACKET && reg->type != PTR_TO_MAP_VALUE_ADJ) { - if (off % size != 0) { - verbose("misaligned access off %d size %d\n", - off, size); - return -EACCES; - } else { - return 0; - } - } - - if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) - /* misaligned access to packet is ok on x86,arm,arm64 */ - return 0; - if (reg->id && size != 1) { - verbose("Unknown packet alignment. Only byte-sized access allowed\n"); + verbose("Unknown alignment. Only byte-sized access allowed in packet access.\n"); return -EACCES; } /* skb->data is NET_IP_ALIGN-ed */ - if (reg->type == PTR_TO_PACKET && - (NET_IP_ALIGN + reg->off + off) % size != 0) { + if ((NET_IP_ALIGN + reg->off + off) % size != 0) { verbose("misaligned packet access off %d+%d+%d size %d\n", NET_IP_ALIGN, reg->off, off, size); return -EACCES; } + return 0; } +static int check_val_ptr_alignment(const struct bpf_reg_state *reg, + int size) +{ + if (size != 1) { + verbose("Unknown alignment. Only byte-sized access allowed in value access.\n"); + return -EACCES; + } + + return 0; +} + +static int check_ptr_alignment(const struct bpf_reg_state *reg, + int off, int size) +{ + switch (reg->type) { + case PTR_TO_PACKET: + return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 : + check_pkt_ptr_alignment(reg, off, size); + case PTR_TO_MAP_VALUE_ADJ: + return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 : + check_val_ptr_alignment(reg, size); + default: + if (off % size != 0) { + verbose("misaligned access off %d size %d\n", + off, size); + return -EACCES; + } + + return 0; + } +} + /* check whether memory at (regno + off) is accessible for t = (read | write) * if t==write, value_regno is a register which value is stored into memory * if t==read, value_regno is a register which will receive the value from memory @@ -818,7 +836,7 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, if (size < 0) return size; - err = check_ptr_alignment(env, reg, off, size); + err = check_ptr_alignment(reg, off, size); if (err) return err; -- cgit v1.2.3 From 02ea80b1850e48abbce77878896229d7cc5cb230 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 31 Mar 2017 02:24:04 +0200 Subject: bpf: add various verifier test cases for self-tests Add a couple of test cases, for example, probing for xadd on a spilled pointer to packet and map_value_adj register, various other map_value_adj tests including the unaligned load/store, and trying out pointer arithmetic on map_value_adj register itself. For the unaligned load/store, we need to figure out whether the architecture has efficient unaligned access and need to mark affected tests accordingly. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- tools/include/linux/filter.h | 10 ++ tools/testing/selftests/bpf/Makefile | 9 +- tools/testing/selftests/bpf/test_verifier.c | 270 +++++++++++++++++++++++++++- 3 files changed, 283 insertions(+), 6 deletions(-) diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h index 122153b16ea4..390d7c9685fd 100644 --- a/tools/include/linux/filter.h +++ b/tools/include/linux/filter.h @@ -168,6 +168,16 @@ .off = OFF, \ .imm = 0 }) +/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */ + +#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Memory store, *(uint *) (dst_reg + off16) = imm32 */ #define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 6a1ad58cb66f..9af09e8099c0 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -1,7 +1,14 @@ LIBDIR := ../../../lib BPFDIR := $(LIBDIR)/bpf +APIDIR := ../../../include/uapi +GENDIR := ../../../../include/generated +GENHDR := $(GENDIR)/autoconf.h -CFLAGS += -Wall -O2 -I../../../include/uapi -I$(LIBDIR) +ifneq ($(wildcard $(GENHDR)),) + GENFLAGS := -DHAVE_GENHDR +endif + +CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) LDLIBS += -lcap TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 7d761d4cc759..c848e90b6421 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -30,6 +30,14 @@ #include +#ifdef HAVE_GENHDR +# include "autoconf.h" +#else +# if defined(__i386) || defined(__x86_64) || defined(__s390x__) || defined(__aarch64__) +# define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1 +# endif +#endif + #include "../../../include/linux/filter.h" #ifndef ARRAY_SIZE @@ -39,6 +47,8 @@ #define MAX_INSNS 512 #define MAX_FIXUPS 8 +#define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0) + struct bpf_test { const char *descr; struct bpf_insn insns[MAX_INSNS]; @@ -53,6 +63,7 @@ struct bpf_test { REJECT } result, result_unpriv; enum bpf_prog_type prog_type; + uint8_t flags; }; /* Note we want this to be 64 bit aligned so that the end of our array is @@ -2431,6 +2442,30 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "direct packet access: test15 (spill with xadd)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8), + BPF_MOV64_IMM(BPF_REG_5, 4096), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_STX_XADD(BPF_DW, BPF_REG_4, BPF_REG_5, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), + BPF_STX_MEM(BPF_W, BPF_REG_2, BPF_REG_5, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R2 invalid mem access 'inv'", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, { "helper access to packet: test1, valid packet_ptr range", .insns = { @@ -2934,6 +2969,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R0 pointer arithmetic prohibited", .result_unpriv = REJECT, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "valid map access into an array with a variable", @@ -2957,6 +2993,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R0 pointer arithmetic prohibited", .result_unpriv = REJECT, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "valid map access into an array with a signed variable", @@ -2984,6 +3021,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R0 pointer arithmetic prohibited", .result_unpriv = REJECT, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid map access into an array with a constant", @@ -3025,6 +3063,7 @@ static struct bpf_test tests[] = { .errstr = "R0 min value is outside of the array range", .result_unpriv = REJECT, .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid map access into an array with a variable", @@ -3048,6 +3087,7 @@ static struct bpf_test tests[] = { .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", .result_unpriv = REJECT, .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid map access into an array with no floor check", @@ -3074,6 +3114,7 @@ static struct bpf_test tests[] = { .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", .result_unpriv = REJECT, .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid map access into an array with a invalid max check", @@ -3100,6 +3141,7 @@ static struct bpf_test tests[] = { .errstr = "invalid access to map value, value_size=48 off=44 size=8", .result_unpriv = REJECT, .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid map access into an array with a invalid max check", @@ -3129,6 +3171,7 @@ static struct bpf_test tests[] = { .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", .result_unpriv = REJECT, .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "multiple registers share map_lookup_elem result", @@ -3252,6 +3295,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr_unpriv = "R0 pointer arithmetic prohibited", .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "constant register |= constant should keep constant type", @@ -3981,7 +4025,208 @@ static struct bpf_test tests[] = { .result_unpriv = REJECT, }, { - "map element value (adjusted) is preserved across register spilling", + "map element value or null is marked on register spilling", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -152), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 leaks addr", + .result = ACCEPT, + .result_unpriv = REJECT, + }, + { + "map element value store of cleared call register", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R1 !read_ok", + .errstr = "R1 !read_ok", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "map element value with unaligned store", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 17), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 43), + BPF_ST_MEM(BPF_DW, BPF_REG_0, -2, 44), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 32), + BPF_ST_MEM(BPF_DW, BPF_REG_8, 2, 33), + BPF_ST_MEM(BPF_DW, BPF_REG_8, -2, 34), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 5), + BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 22), + BPF_ST_MEM(BPF_DW, BPF_REG_8, 4, 23), + BPF_ST_MEM(BPF_DW, BPF_REG_8, -7, 24), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 3), + BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 22), + BPF_ST_MEM(BPF_DW, BPF_REG_7, 4, 23), + BPF_ST_MEM(BPF_DW, BPF_REG_7, -4, 24), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .result = ACCEPT, + .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "map element value with unaligned load", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 9), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 2), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 5), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .result = ACCEPT, + .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "map element value illegal alu op, 1", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 8), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "invalid mem access 'inv'", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "map element value illegal alu op, 2", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU32_IMM(BPF_ADD, BPF_REG_0, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "invalid mem access 'inv'", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "map element value illegal alu op, 3", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, 42), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "invalid mem access 'inv'", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "map element value illegal alu op, 4", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ENDIAN(BPF_FROM_BE, BPF_REG_0, 64), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "invalid mem access 'inv'", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "map element value illegal alu op, 5", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_MOV64_IMM(BPF_REG_3, 4096), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_STX_XADD(BPF_DW, BPF_REG_2, BPF_REG_3, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 invalid mem access 'inv'", + .errstr = "R0 invalid mem access 'inv'", + .result = REJECT, + .result_unpriv = REJECT, + }, + { + "map element value is preserved across register spilling", .insns = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), @@ -4003,6 +4248,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R0 pointer arithmetic prohibited", .result = ACCEPT, .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "helper access to variable memory: stack, bitwise AND + JMP, correct bounds", @@ -4441,6 +4687,7 @@ static struct bpf_test tests[] = { .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", .result = REJECT, .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid range check", @@ -4472,6 +4719,7 @@ static struct bpf_test tests[] = { .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", .result = REJECT, .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, } }; @@ -4550,11 +4798,11 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, static void do_test_single(struct bpf_test *test, bool unpriv, int *passes, int *errors) { + int fd_prog, expected_ret, reject_from_alignment; struct bpf_insn *prog = test->insns; int prog_len = probe_filter_length(prog); int prog_type = test->prog_type; int fd_f1 = -1, fd_f2 = -1, fd_f3 = -1; - int fd_prog, expected_ret; const char *expected_err; do_test_fixup(test, prog, &fd_f1, &fd_f2, &fd_f3); @@ -4567,8 +4815,19 @@ static void do_test_single(struct bpf_test *test, bool unpriv, test->result_unpriv : test->result; expected_err = unpriv && test->errstr_unpriv ? test->errstr_unpriv : test->errstr; + + reject_from_alignment = fd_prog < 0 && + (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) && + strstr(bpf_vlog, "Unknown alignment."); +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + if (reject_from_alignment) { + printf("FAIL\nFailed due to alignment despite having efficient unaligned access: '%s'!\n", + strerror(errno)); + goto fail_log; + } +#endif if (expected_ret == ACCEPT) { - if (fd_prog < 0) { + if (fd_prog < 0 && !reject_from_alignment) { printf("FAIL\nFailed to load prog '%s'!\n", strerror(errno)); goto fail_log; @@ -4578,14 +4837,15 @@ static void do_test_single(struct bpf_test *test, bool unpriv, printf("FAIL\nUnexpected success to load!\n"); goto fail_log; } - if (!strstr(bpf_vlog, expected_err)) { + if (!strstr(bpf_vlog, expected_err) && !reject_from_alignment) { printf("FAIL\nUnexpected error message!\n"); goto fail_log; } } (*passes)++; - printf("OK\n"); + printf("OK%s\n", reject_from_alignment ? + " (NOTE: reject due to unknown alignment)" : ""); close_fds: close(fd_prog); close(fd_f1); -- cgit v1.2.3 From afe89962ee0799955b606cc7637ac86a296923a6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 31 Mar 2017 17:57:28 +0800 Subject: sctp: use right in and out stream cnt Since sctp reconf was added in sctp, the real cnt of in/out stream have not been c.sinit_max_instreams and c.sinit_num_ostreams any more. This patch is to replace them with stream->in/outcnt. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 3 +-- net/sctp/proc.c | 4 ++-- net/sctp/sm_statefuns.c | 6 +++--- net/sctp/socket.c | 10 +++++----- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 025ccff67072..8081476ed313 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1026,8 +1026,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) /* RFC 2960 6.5 Every DATA chunk MUST carry a valid * stream identifier. */ - if (chunk->sinfo.sinfo_stream >= - asoc->c.sinit_num_ostreams) { + if (chunk->sinfo.sinfo_stream >= asoc->stream->outcnt) { /* Mark as failed send. */ sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM); diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 206377fe91ec..a0b29d43627f 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -361,8 +361,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) sctp_seq_dump_remote_addrs(seq, assoc); seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d " "%8d %8d %8d %8d", - assoc->hbinterval, assoc->c.sinit_max_instreams, - assoc->c.sinit_num_ostreams, assoc->max_retrans, + assoc->hbinterval, assoc->stream->incnt, + assoc->stream->outcnt, assoc->max_retrans, assoc->init_retries, assoc->shutdown_retries, assoc->rtx_data_chunks, atomic_read(&sk->sk_wmem_alloc), diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index e03bb1aab4d0..24c6ccce7539 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3946,7 +3946,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net, /* Silently discard the chunk if stream-id is not valid */ sctp_walk_fwdtsn(skip, chunk) { - if (ntohs(skip->stream) >= asoc->c.sinit_max_instreams) + if (ntohs(skip->stream) >= asoc->stream->incnt) goto discard_noforce; } @@ -4017,7 +4017,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast( /* Silently discard the chunk if stream-id is not valid */ sctp_walk_fwdtsn(skip, chunk) { - if (ntohs(skip->stream) >= asoc->c.sinit_max_instreams) + if (ntohs(skip->stream) >= asoc->stream->incnt) goto gen_shutdown; } @@ -6353,7 +6353,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, * and discard the DATA chunk. */ sid = ntohs(data_hdr->stream); - if (sid >= asoc->c.sinit_max_instreams) { + if (sid >= asoc->stream->incnt) { /* Mark tsn as received even though we drop it */ sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn)); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index baa269a0d52e..12fbae2c1002 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1920,7 +1920,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) } /* Check for invalid stream. */ - if (sinfo->sinfo_stream >= asoc->c.sinit_num_ostreams) { + if (sinfo->sinfo_stream >= asoc->stream->outcnt) { err = -EINVAL; goto out_free; } @@ -4461,8 +4461,8 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, info->sctpi_rwnd = asoc->a_rwnd; info->sctpi_unackdata = asoc->unack_data; info->sctpi_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map); - info->sctpi_instrms = asoc->c.sinit_max_instreams; - info->sctpi_outstrms = asoc->c.sinit_num_ostreams; + info->sctpi_instrms = asoc->stream->incnt; + info->sctpi_outstrms = asoc->stream->outcnt; list_for_each(pos, &asoc->base.inqueue.in_chunk_list) info->sctpi_inqueue++; list_for_each(pos, &asoc->outqueue.out_chunk_list) @@ -4691,8 +4691,8 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len, status.sstat_unackdata = asoc->unack_data; status.sstat_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map); - status.sstat_instrms = asoc->c.sinit_max_instreams; - status.sstat_outstrms = asoc->c.sinit_num_ostreams; + status.sstat_instrms = asoc->stream->incnt; + status.sstat_outstrms = asoc->stream->outcnt; status.sstat_fragmentation_point = asoc->frag_point; status.sstat_primary.spinfo_assoc_id = sctp_assoc2id(transport->asoc); memcpy(&status.sstat_primary.spinfo_address, &transport->ipaddr, -- cgit v1.2.3 From 61b9a047729bb230978178bca6729689d0c50ca2 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 31 Mar 2017 13:02:25 +0200 Subject: l2tp: fix race in l2tp_recv_common() Taking a reference on sessions in l2tp_recv_common() is racy; this has to be done by the callers. To this end, a new function is required (l2tp_session_get()) to atomically lookup a session and take a reference on it. Callers then have to manually drop this reference. Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 73 ++++++++++++++++++++++++++++++++++++++++++---------- net/l2tp/l2tp_core.h | 3 +++ net/l2tp/l2tp_ip.c | 17 ++++++++---- net/l2tp/l2tp_ip6.c | 18 +++++++++---- 4 files changed, 88 insertions(+), 23 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 8adab6335ced..8a067536d15c 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -278,6 +278,55 @@ struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunn } EXPORT_SYMBOL_GPL(l2tp_session_find); +/* Like l2tp_session_find() but takes a reference on the returned session. + * Optionally calls session->ref() too if do_ref is true. + */ +struct l2tp_session *l2tp_session_get(struct net *net, + struct l2tp_tunnel *tunnel, + u32 session_id, bool do_ref) +{ + struct hlist_head *session_list; + struct l2tp_session *session; + + if (!tunnel) { + struct l2tp_net *pn = l2tp_pernet(net); + + session_list = l2tp_session_id_hash_2(pn, session_id); + + rcu_read_lock_bh(); + hlist_for_each_entry_rcu(session, session_list, global_hlist) { + if (session->session_id == session_id) { + l2tp_session_inc_refcount(session); + if (do_ref && session->ref) + session->ref(session); + rcu_read_unlock_bh(); + + return session; + } + } + rcu_read_unlock_bh(); + + return NULL; + } + + session_list = l2tp_session_id_hash(tunnel, session_id); + read_lock_bh(&tunnel->hlist_lock); + hlist_for_each_entry(session, session_list, hlist) { + if (session->session_id == session_id) { + l2tp_session_inc_refcount(session); + if (do_ref && session->ref) + session->ref(session); + read_unlock_bh(&tunnel->hlist_lock); + + return session; + } + } + read_unlock_bh(&tunnel->hlist_lock); + + return NULL; +} +EXPORT_SYMBOL_GPL(l2tp_session_get); + struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth) { int hash; @@ -633,6 +682,9 @@ discard: * a data (not control) frame before coming here. Fields up to the * session-id have already been parsed and ptr points to the data * after the session-id. + * + * session->ref() must have been called prior to l2tp_recv_common(). + * session->deref() will be called automatically after skb is processed. */ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, @@ -642,14 +694,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, int offset; u32 ns, nr; - /* The ref count is increased since we now hold a pointer to - * the session. Take care to decrement the refcnt when exiting - * this function from now on... - */ - l2tp_session_inc_refcount(session); - if (session->ref) - (*session->ref)(session); - /* Parse and check optional cookie */ if (session->peer_cookie_len > 0) { if (memcmp(ptr, &session->peer_cookie[0], session->peer_cookie_len)) { @@ -802,8 +846,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, /* Try to dequeue as many skbs from reorder_q as we can. */ l2tp_recv_dequeue(session); - l2tp_session_dec_refcount(session); - return; discard: @@ -812,8 +854,6 @@ discard: if (session->deref) (*session->deref)(session); - - l2tp_session_dec_refcount(session); } EXPORT_SYMBOL(l2tp_recv_common); @@ -920,8 +960,14 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, } /* Find the session context */ - session = l2tp_session_find(tunnel->l2tp_net, tunnel, session_id); + session = l2tp_session_get(tunnel->l2tp_net, tunnel, session_id, true); if (!session || !session->recv_skb) { + if (session) { + if (session->deref) + session->deref(session); + l2tp_session_dec_refcount(session); + } + /* Not found? Pass to userspace to deal with */ l2tp_info(tunnel, L2TP_MSG_DATA, "%s: no session found (%u/%u). Passing up.\n", @@ -930,6 +976,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, } l2tp_recv_common(session, skb, ptr, optr, hdrflags, length, payload_hook); + l2tp_session_dec_refcount(session); return 0; diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index aebf281d09ee..4544e81a3d27 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -230,6 +230,9 @@ out: return tunnel; } +struct l2tp_session *l2tp_session_get(struct net *net, + struct l2tp_tunnel *tunnel, + u32 session_id, bool do_ref); struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id); diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 7208fbe5856b..4d322c1b7233 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -143,19 +143,19 @@ static int l2tp_ip_recv(struct sk_buff *skb) } /* Ok, this is a data packet. Lookup the session. */ - session = l2tp_session_find(net, NULL, session_id); - if (session == NULL) + session = l2tp_session_get(net, NULL, session_id, true); + if (!session) goto discard; tunnel = session->tunnel; - if (tunnel == NULL) - goto discard; + if (!tunnel) + goto discard_sess; /* Trace packet contents, if enabled */ if (tunnel->debug & L2TP_MSG_DATA) { length = min(32u, skb->len); if (!pskb_may_pull(skb, length)) - goto discard; + goto discard_sess; /* Point to L2TP header */ optr = ptr = skb->data; @@ -165,6 +165,7 @@ static int l2tp_ip_recv(struct sk_buff *skb) } l2tp_recv_common(session, skb, ptr, optr, 0, skb->len, tunnel->recv_payload_hook); + l2tp_session_dec_refcount(session); return 0; @@ -203,6 +204,12 @@ pass_up: return sk_receive_skb(sk, skb, 1); +discard_sess: + if (session->deref) + session->deref(session); + l2tp_session_dec_refcount(session); + goto discard; + discard_put: sock_put(sk); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 516d7ce24ba7..88b397c30d86 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -156,19 +156,19 @@ static int l2tp_ip6_recv(struct sk_buff *skb) } /* Ok, this is a data packet. Lookup the session. */ - session = l2tp_session_find(net, NULL, session_id); - if (session == NULL) + session = l2tp_session_get(net, NULL, session_id, true); + if (!session) goto discard; tunnel = session->tunnel; - if (tunnel == NULL) - goto discard; + if (!tunnel) + goto discard_sess; /* Trace packet contents, if enabled */ if (tunnel->debug & L2TP_MSG_DATA) { length = min(32u, skb->len); if (!pskb_may_pull(skb, length)) - goto discard; + goto discard_sess; /* Point to L2TP header */ optr = ptr = skb->data; @@ -179,6 +179,8 @@ static int l2tp_ip6_recv(struct sk_buff *skb) l2tp_recv_common(session, skb, ptr, optr, 0, skb->len, tunnel->recv_payload_hook); + l2tp_session_dec_refcount(session); + return 0; pass_up: @@ -216,6 +218,12 @@ pass_up: return sk_receive_skb(sk, skb, 1); +discard_sess: + if (session->deref) + session->deref(session); + l2tp_session_dec_refcount(session); + goto discard; + discard_put: sock_put(sk); -- cgit v1.2.3 From 57377d63547861919ee634b845c7caa38de4a452 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 31 Mar 2017 13:02:26 +0200 Subject: l2tp: ensure session can't get removed during pppol2tp_session_ioctl() Holding a reference on session is required before calling pppol2tp_session_ioctl(). The session could get freed while processing the ioctl otherwise. Since pppol2tp_session_ioctl() uses the session's socket, we also need to take a reference on it in l2tp_session_get(). Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 123b6a2411a0..827e55c41ba2 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1141,11 +1141,18 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel, if (stats.session_id != 0) { /* resend to session ioctl handler */ struct l2tp_session *session = - l2tp_session_find(sock_net(sk), tunnel, stats.session_id); - if (session != NULL) - err = pppol2tp_session_ioctl(session, cmd, arg); - else + l2tp_session_get(sock_net(sk), tunnel, + stats.session_id, true); + + if (session) { + err = pppol2tp_session_ioctl(session, cmd, + arg); + if (session->deref) + session->deref(session); + l2tp_session_dec_refcount(session); + } else { err = -EBADR; + } break; } #ifdef CONFIG_XFRM -- cgit v1.2.3 From dbdbc73b44782e22b3b4b6e8b51e7a3d245f3086 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 31 Mar 2017 13:02:27 +0200 Subject: l2tp: fix duplicate session creation l2tp_session_create() relies on its caller for checking for duplicate sessions. This is racy since a session can be concurrently inserted after the caller's verification. Fix this by letting l2tp_session_create() verify sessions uniqueness upon insertion. Callers need to be adapted to check for l2tp_session_create()'s return code instead of calling l2tp_session_find(). pppol2tp_connect() is a bit special because it has to work on existing sessions (if they're not connected) or to create a new session if none is found. When acting on a preexisting session, a reference must be held or it could go away on us. So we have to use l2tp_session_get() instead of l2tp_session_find() and drop the reference before exiting. Fixes: d9e31d17ceba ("l2tp: Add L2TP ethernet pseudowire support") Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 70 +++++++++++++++++++++++++++++++++++++++------------- net/l2tp/l2tp_eth.c | 10 ++------ net/l2tp/l2tp_ppp.c | 60 ++++++++++++++++++++++---------------------- 3 files changed, 84 insertions(+), 56 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 8a067536d15c..46b450a1bc21 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -374,6 +374,48 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname) } EXPORT_SYMBOL_GPL(l2tp_session_find_by_ifname); +static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel, + struct l2tp_session *session) +{ + struct l2tp_session *session_walk; + struct hlist_head *g_head; + struct hlist_head *head; + struct l2tp_net *pn; + + head = l2tp_session_id_hash(tunnel, session->session_id); + + write_lock_bh(&tunnel->hlist_lock); + hlist_for_each_entry(session_walk, head, hlist) + if (session_walk->session_id == session->session_id) + goto exist; + + if (tunnel->version == L2TP_HDR_VER_3) { + pn = l2tp_pernet(tunnel->l2tp_net); + g_head = l2tp_session_id_hash_2(l2tp_pernet(tunnel->l2tp_net), + session->session_id); + + spin_lock_bh(&pn->l2tp_session_hlist_lock); + hlist_for_each_entry(session_walk, g_head, global_hlist) + if (session_walk->session_id == session->session_id) + goto exist_glob; + + hlist_add_head_rcu(&session->global_hlist, g_head); + spin_unlock_bh(&pn->l2tp_session_hlist_lock); + } + + hlist_add_head(&session->hlist, head); + write_unlock_bh(&tunnel->hlist_lock); + + return 0; + +exist_glob: + spin_unlock_bh(&pn->l2tp_session_hlist_lock); +exist: + write_unlock_bh(&tunnel->hlist_lock); + + return -EEXIST; +} + /* Lookup a tunnel by id */ struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id) @@ -1785,6 +1827,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_set_header_len); struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg) { struct l2tp_session *session; + int err; session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL); if (session != NULL) { @@ -1840,6 +1883,13 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn l2tp_session_set_header_len(session, tunnel->version); + err = l2tp_session_add_to_tunnel(tunnel, session); + if (err) { + kfree(session); + + return ERR_PTR(err); + } + /* Bump the reference count. The session context is deleted * only when this drops to zero. */ @@ -1849,28 +1899,14 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn /* Ensure tunnel socket isn't deleted */ sock_hold(tunnel->sock); - /* Add session to the tunnel's hash list */ - write_lock_bh(&tunnel->hlist_lock); - hlist_add_head(&session->hlist, - l2tp_session_id_hash(tunnel, session_id)); - write_unlock_bh(&tunnel->hlist_lock); - - /* And to the global session list if L2TPv3 */ - if (tunnel->version != L2TP_HDR_VER_2) { - struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); - - spin_lock_bh(&pn->l2tp_session_hlist_lock); - hlist_add_head_rcu(&session->global_hlist, - l2tp_session_id_hash_2(pn, session_id)); - spin_unlock_bh(&pn->l2tp_session_hlist_lock); - } - /* Ignore management session in session count value */ if (session->session_id != 0) atomic_inc(&l2tp_session_count); + + return session; } - return session; + return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL_GPL(l2tp_session_create); diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 8bf18a5f66e0..6fd41d7afe1e 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -221,12 +221,6 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p goto out; } - session = l2tp_session_find(net, tunnel, session_id); - if (session) { - rc = -EEXIST; - goto out; - } - if (cfg->ifname) { dev = dev_get_by_name(net, cfg->ifname); if (dev) { @@ -240,8 +234,8 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p session = l2tp_session_create(sizeof(*spriv), tunnel, session_id, peer_session_id, cfg); - if (!session) { - rc = -ENOMEM; + if (IS_ERR(session)) { + rc = PTR_ERR(session); goto out; } diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 827e55c41ba2..26501902d1a7 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -583,6 +583,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, int error = 0; u32 tunnel_id, peer_tunnel_id; u32 session_id, peer_session_id; + bool drop_refcnt = false; int ver = 2; int fd; @@ -684,36 +685,36 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, if (tunnel->peer_tunnel_id == 0) tunnel->peer_tunnel_id = peer_tunnel_id; - /* Create session if it doesn't already exist. We handle the - * case where a session was previously created by the netlink - * interface by checking that the session doesn't already have - * a socket and its tunnel socket are what we expect. If any - * of those checks fail, return EEXIST to the caller. - */ - session = l2tp_session_find(sock_net(sk), tunnel, session_id); - if (session == NULL) { - /* Default MTU must allow space for UDP/L2TP/PPP - * headers. + session = l2tp_session_get(sock_net(sk), tunnel, session_id, false); + if (session) { + drop_refcnt = true; + ps = l2tp_session_priv(session); + + /* Using a pre-existing session is fine as long as it hasn't + * been connected yet. */ - cfg.mtu = cfg.mru = 1500 - PPPOL2TP_HEADER_OVERHEAD; + if (ps->sock) { + error = -EEXIST; + goto end; + } - /* Allocate and initialize a new session context. */ - session = l2tp_session_create(sizeof(struct pppol2tp_session), - tunnel, session_id, - peer_session_id, &cfg); - if (session == NULL) { - error = -ENOMEM; + /* consistency checks */ + if (ps->tunnel_sock != tunnel->sock) { + error = -EEXIST; goto end; } } else { - ps = l2tp_session_priv(session); - error = -EEXIST; - if (ps->sock != NULL) - goto end; + /* Default MTU must allow space for UDP/L2TP/PPP headers */ + cfg.mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD; + cfg.mru = cfg.mtu; - /* consistency checks */ - if (ps->tunnel_sock != tunnel->sock) + session = l2tp_session_create(sizeof(struct pppol2tp_session), + tunnel, session_id, + peer_session_id, &cfg); + if (IS_ERR(session)) { + error = PTR_ERR(session); goto end; + } } /* Associate session with its PPPoL2TP socket */ @@ -778,6 +779,8 @@ out_no_ppp: session->name); end: + if (drop_refcnt) + l2tp_session_dec_refcount(session); release_sock(sk); return error; @@ -805,12 +808,6 @@ static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_i if (tunnel->sock == NULL) goto out; - /* Check that this session doesn't already exist */ - error = -EEXIST; - session = l2tp_session_find(net, tunnel, session_id); - if (session != NULL) - goto out; - /* Default MTU values. */ if (cfg->mtu == 0) cfg->mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD; @@ -818,12 +815,13 @@ static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_i cfg->mru = cfg->mtu; /* Allocate and initialize a new session context. */ - error = -ENOMEM; session = l2tp_session_create(sizeof(struct pppol2tp_session), tunnel, session_id, peer_session_id, cfg); - if (session == NULL) + if (IS_ERR(session)) { + error = PTR_ERR(session); goto out; + } ps = l2tp_session_priv(session); ps->tunnel_sock = tunnel->sock; -- cgit v1.2.3 From 5e6a9e5a3554a5b3db09cdc22253af1849c65dff Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 31 Mar 2017 13:02:29 +0200 Subject: l2tp: hold session while sending creation notifications l2tp_session_find() doesn't take any reference on the returned session. Therefore, the session may disappear while sending the notification. Use l2tp_session_get() instead and decrement session's refcount once the notification is sent. Fixes: 33f72e6f0c67 ("l2tp : multicast notification to the registered listeners") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_netlink.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 3620fba31786..f1b68effb077 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -642,10 +642,12 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf session_id, peer_session_id, &cfg); if (ret >= 0) { - session = l2tp_session_find(net, tunnel, session_id); - if (session) + session = l2tp_session_get(net, tunnel, session_id, false); + if (session) { ret = l2tp_session_notify(&l2tp_nl_family, info, session, L2TP_CMD_SESSION_CREATE); + l2tp_session_dec_refcount(session); + } } out: -- cgit v1.2.3 From 2777e2ab5a9cf2b4524486c6db1517a6ded25261 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 31 Mar 2017 13:02:30 +0200 Subject: l2tp: take a reference on sessions used in genetlink handlers Callers of l2tp_nl_session_find() need to hold a reference on the returned session since there's no guarantee that it isn't going to disappear from under them. Relying on the fact that no l2tp netlink message may be processed concurrently isn't enough: sessions can be deleted by other means (e.g. by closing the PPPOL2TP socket of a ppp pseudowire). l2tp_nl_cmd_session_delete() is a bit special: it runs a callback function that may require a previous call to session->ref(). In particular, for ppp pseudowires, the callback is l2tp_session_delete(), which then calls pppol2tp_session_close() and dereferences the PPPOL2TP socket. The socket might already be gone at the moment l2tp_session_delete() calls session->ref(), so we need to take a reference during the session lookup. So we need to pass the do_ref variable down to l2tp_session_get() and l2tp_session_get_by_ifname(). Since all callers have to be updated, l2tp_session_find_by_ifname() and l2tp_nl_session_find() are renamed to reflect their new behaviour. Fixes: 309795f4bec2 ("l2tp: Add netlink control API for L2TP") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 9 +++++++-- net/l2tp/l2tp_core.h | 3 ++- net/l2tp/l2tp_netlink.c | 39 ++++++++++++++++++++++++++------------- 3 files changed, 35 insertions(+), 16 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 46b450a1bc21..e927422d8c58 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -352,7 +352,8 @@ EXPORT_SYMBOL_GPL(l2tp_session_find_nth); /* Lookup a session by interface name. * This is very inefficient but is only used by management interfaces. */ -struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname) +struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname, + bool do_ref) { struct l2tp_net *pn = l2tp_pernet(net); int hash; @@ -362,7 +363,11 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname) for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) { hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) { if (!strcmp(session->ifname, ifname)) { + l2tp_session_inc_refcount(session); + if (do_ref && session->ref) + session->ref(session); rcu_read_unlock_bh(); + return session; } } @@ -372,7 +377,7 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname) return NULL; } -EXPORT_SYMBOL_GPL(l2tp_session_find_by_ifname); +EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname); static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel, struct l2tp_session *session) diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 4544e81a3d27..3b9b704a84e4 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -237,7 +237,8 @@ struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id); struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth); -struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname); +struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname, + bool do_ref); struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id); struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth); diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index f1b68effb077..93e317377c66 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -48,7 +48,8 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, /* Accessed under genl lock */ static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX]; -static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info) +static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info, + bool do_ref) { u32 tunnel_id; u32 session_id; @@ -59,14 +60,15 @@ static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info) if (info->attrs[L2TP_ATTR_IFNAME]) { ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]); - session = l2tp_session_find_by_ifname(net, ifname); + session = l2tp_session_get_by_ifname(net, ifname, do_ref); } else if ((info->attrs[L2TP_ATTR_SESSION_ID]) && (info->attrs[L2TP_ATTR_CONN_ID])) { tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]); session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]); tunnel = l2tp_tunnel_find(net, tunnel_id); if (tunnel) - session = l2tp_session_find(net, tunnel, session_id); + session = l2tp_session_get(net, tunnel, session_id, + do_ref); } return session; @@ -660,7 +662,7 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf struct l2tp_session *session; u16 pw_type; - session = l2tp_nl_session_find(info); + session = l2tp_nl_session_get(info, true); if (session == NULL) { ret = -ENODEV; goto out; @@ -674,6 +676,10 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete) ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session); + if (session->deref) + session->deref(session); + l2tp_session_dec_refcount(session); + out: return ret; } @@ -683,7 +689,7 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf int ret = 0; struct l2tp_session *session; - session = l2tp_nl_session_find(info); + session = l2tp_nl_session_get(info, false); if (session == NULL) { ret = -ENODEV; goto out; @@ -718,6 +724,8 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf ret = l2tp_session_notify(&l2tp_nl_family, info, session, L2TP_CMD_SESSION_MODIFY); + l2tp_session_dec_refcount(session); + out: return ret; } @@ -813,29 +821,34 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info) struct sk_buff *msg; int ret; - session = l2tp_nl_session_find(info); + session = l2tp_nl_session_get(info, false); if (session == NULL) { ret = -ENODEV; - goto out; + goto err; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; - goto out; + goto err_ref; } ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq, 0, session, L2TP_CMD_SESSION_GET); if (ret < 0) - goto err_out; + goto err_ref_msg; - return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid); + ret = genlmsg_unicast(genl_info_net(info), msg, info->snd_portid); -err_out: - nlmsg_free(msg); + l2tp_session_dec_refcount(session); -out: + return ret; + +err_ref_msg: + nlmsg_free(msg); +err_ref: + l2tp_session_dec_refcount(session); +err: return ret; } -- cgit v1.2.3 From bf17aa36c0f199f5b254262e77eaefda7da0f50b Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 1 Mar 2017 18:22:01 -0800 Subject: nvme: Correct NVMF enum values to match NVMe-oF rev 1.0 The enum values for QPTYPE, PRTYPE and CMS are off by 1 from the values defined in figure 42 of the NVM Express over Fabrics 1.0: http://www.nvmexpress.org/wp-content/uploads/NVMe_over_Fabrics_1_0_Gold_20160605-1.pdf Fix our enums to match the final spec. Signed-off-by: Roland Dreier Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- include/linux/nvme.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index c43d435d4225..9061780b141f 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -64,26 +64,26 @@ enum { * RDMA_QPTYPE field */ enum { - NVMF_RDMA_QPTYPE_CONNECTED = 0, /* Reliable Connected */ - NVMF_RDMA_QPTYPE_DATAGRAM = 1, /* Reliable Datagram */ + NVMF_RDMA_QPTYPE_CONNECTED = 1, /* Reliable Connected */ + NVMF_RDMA_QPTYPE_DATAGRAM = 2, /* Reliable Datagram */ }; /* RDMA QP Service Type codes for Discovery Log Page entry TSAS * RDMA_QPTYPE field */ enum { - NVMF_RDMA_PRTYPE_NOT_SPECIFIED = 0, /* No Provider Specified */ - NVMF_RDMA_PRTYPE_IB = 1, /* InfiniBand */ - NVMF_RDMA_PRTYPE_ROCE = 2, /* InfiniBand RoCE */ - NVMF_RDMA_PRTYPE_ROCEV2 = 3, /* InfiniBand RoCEV2 */ - NVMF_RDMA_PRTYPE_IWARP = 4, /* IWARP */ + NVMF_RDMA_PRTYPE_NOT_SPECIFIED = 1, /* No Provider Specified */ + NVMF_RDMA_PRTYPE_IB = 2, /* InfiniBand */ + NVMF_RDMA_PRTYPE_ROCE = 3, /* InfiniBand RoCE */ + NVMF_RDMA_PRTYPE_ROCEV2 = 4, /* InfiniBand RoCEV2 */ + NVMF_RDMA_PRTYPE_IWARP = 5, /* IWARP */ }; /* RDMA Connection Management Service Type codes for Discovery Log Page * entry TSAS RDMA_CMS field */ enum { - NVMF_RDMA_CMS_RDMA_CM = 0, /* Sockets based enpoint addressing */ + NVMF_RDMA_CMS_RDMA_CM = 1, /* Sockets based endpoint addressing */ }; #define NVMF_AQ_DEPTH 32 -- cgit v1.2.3 From f1dd03a84dbf3e5ca91295a3d04c882b8bd86251 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 31 Mar 2017 17:00:05 +0200 Subject: nvme: add missing byte swap in nvme_setup_discard Fixes: b35ba01e ("nvme: support ranged discard requests") Signed-off-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9b3b57fef446..9583a5f58a1d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -270,7 +270,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req, memset(cmnd, 0, sizeof(*cmnd)); cmnd->dsm.opcode = nvme_cmd_dsm; cmnd->dsm.nsid = cpu_to_le32(ns->ns_id); - cmnd->dsm.nr = segments - 1; + cmnd->dsm.nr = cpu_to_le32(segments - 1); cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); req->special_vec.bv_page = virt_to_page(range); -- cgit v1.2.3 From 5ac5fcc6c7a2339a34c876a9b6926a7f17225493 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 31 Mar 2017 17:00:06 +0200 Subject: nvmet: add missing byte swap in nvmet_get_smart_log In this case entirely harmless as it's all-ones, but still nice to shut up sparse. Signed-off-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/target/admin-cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index a7bcff45f437..76450b0c55f1 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -100,7 +100,7 @@ static u16 nvmet_get_smart_log(struct nvmet_req *req, u16 status; WARN_ON(req == NULL || slog == NULL); - if (req->cmd->get_log_page.nsid == 0xFFFFFFFF) + if (req->cmd->get_log_page.nsid == cpu_to_le32(0xFFFFFFFF)) status = nvmet_get_smart_log_all(req, slog); else status = nvmet_get_smart_log_nsid(req, slog); -- cgit v1.2.3 From 78ce3daa7d703028c00eff2e03ad22efd116e549 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 31 Mar 2017 17:00:07 +0200 Subject: nvmet: fix byte swap in nvmet_execute_write_zeroes The length field in the Write Zeroes command is a 16-bit field. Signed-off-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/target/io-cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c index 4195115c7e54..e37acd77b5fe 100644 --- a/drivers/nvme/target/io-cmd.c +++ b/drivers/nvme/target/io-cmd.c @@ -180,7 +180,7 @@ static void nvmet_execute_write_zeroes(struct nvmet_req *req) sector = le64_to_cpu(write_zeroes->slba) << (req->ns->blksize_shift - 9); - nr_sector = (((sector_t)le32_to_cpu(write_zeroes->length)) << + nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length)) << (req->ns->blksize_shift - 9)) + 1; if (__blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector, -- cgit v1.2.3 From 793c7ed9d785411a5cd6fe7e998cd7ee2870b38b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 31 Mar 2017 17:00:08 +0200 Subject: nvmet: fix byte swap in nvmet_parse_io_cmd We need to do arithmetics after byte swapping, not before. Signed-off-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/target/io-cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c index e37acd77b5fe..6b0baa9caab9 100644 --- a/drivers/nvme/target/io-cmd.c +++ b/drivers/nvme/target/io-cmd.c @@ -230,7 +230,7 @@ int nvmet_parse_io_cmd(struct nvmet_req *req) return 0; case nvme_cmd_dsm: req->execute = nvmet_execute_dsm; - req->data_len = le32_to_cpu(cmd->dsm.nr + 1) * + req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) * sizeof(struct nvme_dsm_range); return 0; case nvme_cmd_write_zeroes: -- cgit v1.2.3 From 51f528a1636f352ad776a912ac86026ac7a89a2a Mon Sep 17 00:00:00 2001 From: Shrirang Bagul Date: Thu, 30 Mar 2017 23:47:21 +0800 Subject: iio: st_pressure: initialize lps22hb bootime This patch initializes the bootime in struct st_sensor_settings for lps22hb sensor. Without this, sensor channels read from sysfs always report stale values. Signed-off-by: Shrirang Bagul Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/st_pressure_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c index 5f2680855552..fd0edca0e656 100644 --- a/drivers/iio/pressure/st_pressure_core.c +++ b/drivers/iio/pressure/st_pressure_core.c @@ -457,6 +457,7 @@ static const struct st_sensor_settings st_press_sensors_settings[] = { .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, }, .multi_read_bit = true, + .bootime = 2, }, }; -- cgit v1.2.3 From 7fd6592d1287046f61bfd3cda3c03cd35be490f7 Mon Sep 17 00:00:00 2001 From: Nikolaus Schulz Date: Fri, 24 Mar 2017 13:41:51 +0100 Subject: iio: core: Fix IIO_VAL_FRACTIONAL_LOG2 for negative values Fix formatting of negative values of type IIO_VAL_FRACTIONAL_LOG2 by switching from do_div(), which can't handle negative numbers, to div_s64_rem(). Also use shift_right for shifting, which is safe with negative values. Signed-off-by: Nikolaus Schulz Reviewed-by: Lars-Peter Clausen Cc: stable@vger.kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-core.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index d18ded45bedd..3ff91e02fee3 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -610,10 +610,9 @@ static ssize_t __iio_format_value(char *buf, size_t len, unsigned int type, tmp0 = (int)div_s64_rem(tmp, 1000000000, &tmp1); return snprintf(buf, len, "%d.%09u", tmp0, abs(tmp1)); case IIO_VAL_FRACTIONAL_LOG2: - tmp = (s64)vals[0] * 1000000000LL >> vals[1]; - tmp1 = do_div(tmp, 1000000000LL); - tmp0 = tmp; - return snprintf(buf, len, "%d.%09u", tmp0, tmp1); + tmp = shift_right((s64)vals[0] * 1000000000LL, vals[1]); + tmp0 = (int)div_s64_rem(tmp, 1000000000LL, &tmp1); + return snprintf(buf, len, "%d.%09u", tmp0, abs(tmp1)); case IIO_VAL_INT_MULTIPLE: { int i; -- cgit v1.2.3 From 862d1d89ad9e5b117f1fb2a472cef6fc92c0007a Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 29 Mar 2017 16:23:35 -0700 Subject: iio: accel: hid-sensor-accel-3d: Fix duplicate scan index error When both accel_3d and gravity sensor are present, iio_device_register() fails with "Duplicate scan index" error. The reason for this is setting of indio_dev->num_channels based on accel_3d channel for both gravity and accel-3d sensor. But number of channels are not same, so for gravity it is pointing to some invalid memory and getting scan_index to compare which may match. To fix this issue, set the indio_dev->num_channels correctly based on the sensor type. Fixes: 0e377f3b9ae9 ('iio: Add gravity sensor support') Signed-off-by: Srinivas Pandruvada Signed-off-by: Jonathan Cameron --- drivers/iio/accel/hid-sensor-accel-3d.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iio/accel/hid-sensor-accel-3d.c b/drivers/iio/accel/hid-sensor-accel-3d.c index ca5759c0c318..43a6cb078193 100644 --- a/drivers/iio/accel/hid-sensor-accel-3d.c +++ b/drivers/iio/accel/hid-sensor-accel-3d.c @@ -370,10 +370,12 @@ static int hid_accel_3d_probe(struct platform_device *pdev) name = "accel_3d"; channel_spec = accel_3d_channels; channel_size = sizeof(accel_3d_channels); + indio_dev->num_channels = ARRAY_SIZE(accel_3d_channels); } else { name = "gravity"; channel_spec = gravity_channels; channel_size = sizeof(gravity_channels); + indio_dev->num_channels = ARRAY_SIZE(gravity_channels); } ret = hid_sensor_parse_common_attributes(hsdev, hsdev->usage, &accel_state->common_attributes); @@ -395,7 +397,6 @@ static int hid_accel_3d_probe(struct platform_device *pdev) goto error_free_dev_mem; } - indio_dev->num_channels = ARRAY_SIZE(accel_3d_channels); indio_dev->dev.parent = &pdev->dev; indio_dev->info = &accel_3d_info; indio_dev->name = name; -- cgit v1.2.3 From bba6d9e47f3ea894e501f94b086a59ffe28241ac Mon Sep 17 00:00:00 2001 From: Song Hongyan Date: Tue, 28 Mar 2017 22:23:55 +0800 Subject: iio: hid-sensor-attributes: Fix sensor property setting failure. When system bootup without get sensor property, set sensor property will be fail. If no get_feature operation done before set_feature, the sensor properties will all be the initialized value, which is not the same with sensor real properties. When set sensor property it will write back to sensor the changed perperty data combines with other sensor properties data, it is not right and may be dangerous. In order to get all sensor properties, choose to read one of the sensor properties(no matter read any sensor peroperty, driver will get all the peroperties and return the requested one). Fixes: 73c6768b710a ("iio: hid-sensors: Common attribute and trigger") Signed-off-by: Song Hongyan Acked-by: Srinivas Pandruvada Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/common/hid-sensors/hid-sensor-attributes.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c index 7afdac42ed42..01e02b9926d4 100644 --- a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c +++ b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c @@ -379,6 +379,8 @@ int hid_sensor_parse_common_attributes(struct hid_sensor_hub_device *hsdev, { struct hid_sensor_hub_attribute_info timestamp; + s32 value; + int ret; hid_sensor_get_reporting_interval(hsdev, usage_id, st); @@ -417,6 +419,14 @@ int hid_sensor_parse_common_attributes(struct hid_sensor_hub_device *hsdev, st->sensitivity.index, st->sensitivity.report_id, timestamp.index, timestamp.report_id); + ret = sensor_hub_get_feature(hsdev, + st->power_state.report_id, + st->power_state.index, sizeof(value), &value); + if (ret < 0) + return ret; + if (value < 0) + return -EINVAL; + return 0; } EXPORT_SYMBOL(hid_sensor_parse_common_attributes); -- cgit v1.2.3 From 1c99de981f30b3e7868b8d20ce5479fa1c0fea46 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 2 Apr 2017 13:36:44 -0700 Subject: iscsi-target: Drop work-around for legacy GlobalSAN initiator Once upon a time back in 2009, a work-around was added to support the GlobalSAN iSCSI initiator v3.3 for MacOSX, which during login did not propose nor respond to MaxBurstLength, FirstBurstLength, DefaultTime2Wait and DefaultTime2Retain keys. The work-around in iscsi_check_proposer_for_optional_reply() allowed the missing keys to be proposed, but did not require waiting for a response before moving to full feature phase operation. This allowed GlobalSAN v3.3 to work out-of-the box, and for many years we didn't run into login interopt issues with any other initiators.. Until recently, when Martin tried a QLogic 57840S iSCSI Offload HBA on Windows 2016 which completed login, but subsequently failed with: Got unknown iSCSI OpCode: 0x43 The issue was QLogic MSFT side did not propose DefaultTime2Wait + DefaultTime2Retain, so LIO proposes them itself, and immediately transitions to full feature phase because of the GlobalSAN hack. However, the QLogic MSFT side still attempts to respond to DefaultTime2Retain + DefaultTime2Wait, even though LIO has set ISCSI_FLAG_LOGIN_NEXT_STAGE3 + ISCSI_FLAG_LOGIN_TRANSIT in last login response. So while the QLogic MSFT side should have been proposing these two keys to start, it was doing the correct thing per RFC-3720 attempting to respond to proposed keys before transitioning to full feature phase. All that said, recent versions of GlobalSAN iSCSI (v5.3.0.541) does correctly propose the four keys during login, making the original work-around moot. So in order to allow QLogic MSFT to run unmodified as-is, go ahead and drop this long standing work-around. Reported-by: Martin Svec Cc: Martin Svec Cc: Himanshu Madhani Cc: Arun Easi Cc: stable@vger.kernel.org # 3.1+ Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target_parameters.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c index e65bf78ceef3..fce627628200 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.c +++ b/drivers/target/iscsi/iscsi_target_parameters.c @@ -781,22 +781,6 @@ static void iscsi_check_proposer_for_optional_reply(struct iscsi_param *param) } else if (IS_TYPE_NUMBER(param)) { if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH)) SET_PSTATE_REPLY_OPTIONAL(param); - /* - * The GlobalSAN iSCSI Initiator for MacOSX does - * not respond to MaxBurstLength, FirstBurstLength, - * DefaultTime2Wait or DefaultTime2Retain parameter keys. - * So, we set them to 'reply optional' here, and assume the - * the defaults from iscsi_parameters.h if the initiator - * is not RFC compliant and the keys are not negotiated. - */ - if (!strcmp(param->name, MAXBURSTLENGTH)) - SET_PSTATE_REPLY_OPTIONAL(param); - if (!strcmp(param->name, FIRSTBURSTLENGTH)) - SET_PSTATE_REPLY_OPTIONAL(param); - if (!strcmp(param->name, DEFAULTTIME2WAIT)) - SET_PSTATE_REPLY_OPTIONAL(param); - if (!strcmp(param->name, DEFAULTTIME2RETAIN)) - SET_PSTATE_REPLY_OPTIONAL(param); /* * Required for gPXE iSCSI boot client */ -- cgit v1.2.3 From a5d68ba85801a78c892a0eb8efb711e293ed314b Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Fri, 31 Mar 2017 10:35:25 +0800 Subject: tcmu: Skip Data-Out blocks before gathering Data-In buffer for BIDI case For the bidirectional case, the Data-Out buffer blocks will always at the head of the tcmu_cmd's bitmap, and before gathering the Data-In buffer, first of all it should skip the Data-Out ones, or the device supporting BIDI commands won't work. Fixed: 26418649eead ("target/user: Introduce data_bitmap, replace data_length/data_head/data_tail") Reported-by: Ilias Tsitsimpis Tested-by: Ilias Tsitsimpis Signed-off-by: Xiubo Li Cc: stable@vger.kernel.org # 4.6+ Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 48 +++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 9885d1b521fe..f615c3bbb73e 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -311,24 +311,50 @@ static void free_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd) DATA_BLOCK_BITS); } -static void gather_data_area(struct tcmu_dev *udev, unsigned long *cmd_bitmap, - struct scatterlist *data_sg, unsigned int data_nents) +static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd, + bool bidi) { + struct se_cmd *se_cmd = cmd->se_cmd; int i, block; int block_remaining = 0; void *from, *to; size_t copy_bytes, from_offset; - struct scatterlist *sg; + struct scatterlist *sg, *data_sg; + unsigned int data_nents; + DECLARE_BITMAP(bitmap, DATA_BLOCK_BITS); + + bitmap_copy(bitmap, cmd->data_bitmap, DATA_BLOCK_BITS); + + if (!bidi) { + data_sg = se_cmd->t_data_sg; + data_nents = se_cmd->t_data_nents; + } else { + uint32_t count; + + /* + * For bidi case, the first count blocks are for Data-Out + * buffer blocks, and before gathering the Data-In buffer + * the Data-Out buffer blocks should be discarded. + */ + count = DIV_ROUND_UP(se_cmd->data_length, DATA_BLOCK_SIZE); + while (count--) { + block = find_first_bit(bitmap, DATA_BLOCK_BITS); + clear_bit(block, bitmap); + } + + data_sg = se_cmd->t_bidi_data_sg; + data_nents = se_cmd->t_bidi_data_nents; + } for_each_sg(data_sg, sg, data_nents, i) { int sg_remaining = sg->length; to = kmap_atomic(sg_page(sg)) + sg->offset; while (sg_remaining > 0) { if (block_remaining == 0) { - block = find_first_bit(cmd_bitmap, + block = find_first_bit(bitmap, DATA_BLOCK_BITS); block_remaining = DATA_BLOCK_SIZE; - clear_bit(block, cmd_bitmap); + clear_bit(block, bitmap); } copy_bytes = min_t(size_t, sg_remaining, block_remaining); @@ -610,19 +636,11 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry * se_cmd->scsi_sense_length); free_data_area(udev, cmd); } else if (se_cmd->se_cmd_flags & SCF_BIDI) { - DECLARE_BITMAP(bitmap, DATA_BLOCK_BITS); - /* Get Data-In buffer before clean up */ - bitmap_copy(bitmap, cmd->data_bitmap, DATA_BLOCK_BITS); - gather_data_area(udev, bitmap, - se_cmd->t_bidi_data_sg, se_cmd->t_bidi_data_nents); + gather_data_area(udev, cmd, true); free_data_area(udev, cmd); } else if (se_cmd->data_direction == DMA_FROM_DEVICE) { - DECLARE_BITMAP(bitmap, DATA_BLOCK_BITS); - - bitmap_copy(bitmap, cmd->data_bitmap, DATA_BLOCK_BITS); - gather_data_area(udev, bitmap, - se_cmd->t_data_sg, se_cmd->t_data_nents); + gather_data_area(udev, cmd, false); free_data_area(udev, cmd); } else if (se_cmd->data_direction == DMA_TO_DEVICE) { free_data_area(udev, cmd); -- cgit v1.2.3 From a71c9a1c779f2499fb2afc0553e543f18aff6edf Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 2 Apr 2017 17:23:54 -0700 Subject: Linux 4.11-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 231e6a7749bd..e11989d36c87 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit v1.2.3 From 75514b6654859e0130b512396dc964d2a9e84967 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 31 Mar 2017 18:41:23 -0500 Subject: net: ethernet: ti: cpsw: wake tx queues on ndo_tx_timeout In case, if TX watchdog is fired some or all netdev TX queues will be stopped and as part of recovery it is required not only to drain and reinitailize CPSW TX channeles, but also wake up stoppted TX queues what doesn't happen now and netdevice will stop transmiting data until reopenned. Hence, add netif_tx_wake_all_queues() call in .ndo_tx_timeout() to complete recovery and restore TX path. Signed-off-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 9f3d9c67e3fe..58cdc066ef2c 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1817,6 +1817,8 @@ static void cpsw_ndo_tx_timeout(struct net_device *ndev) } cpsw_intr_enable(cpsw); + netif_trans_update(ndev); + netif_tx_wake_all_queues(ndev); } static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p) -- cgit v1.2.3 From 921d701e6f31e1ffaca3560416af1aa04edb4c4f Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Sun, 2 Apr 2017 20:08:04 -0700 Subject: nios2: reserve boot memory for device tree Make sure to reserve the boot memory for the flattened device tree. Otherwise it might get overwritten, e.g. when initial_boot_params is copied, leading to a corrupted FDT and a boot hang/crash: bootconsole [early0] enabled Early console on uart16650 initialized at 0xf8001600 OF: fdt: Error -11 processing FDT Kernel panic - not syncing: setup_cpuinfo: No CPU found in devicetree! ---[ end Kernel panic - not syncing: setup_cpuinfo: No CPU found in devicetree! Guenter Roeck says: > I think I found the problem. In unflatten_and_copy_device_tree(), with added > debug information: > > OF: fdt: initial_boot_params=c861e400, dt=c861f000 size=28874 (0x70ca) > > ... and then initial_boot_params is copied to dt, which results in corrupted > fdt since the memory overlaps. Looks like the initial_boot_params memory > is not reserved and (re-)allocated by early_init_dt_alloc_memory_arch(). Cc: stable@vger.kernel.org Reported-by: Guenter Roeck Reference: http://lkml.kernel.org/r/20170226210338.GA19476@roeck-us.net Tested-by: Guenter Roeck Signed-off-by: Tobias Klauser Acked-by: Ley Foon Tan --- arch/nios2/kernel/prom.c | 7 +++++++ arch/nios2/kernel/setup.c | 3 +++ 2 files changed, 10 insertions(+) diff --git a/arch/nios2/kernel/prom.c b/arch/nios2/kernel/prom.c index 367c5426157b..3901b80d4420 100644 --- a/arch/nios2/kernel/prom.c +++ b/arch/nios2/kernel/prom.c @@ -48,6 +48,13 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) return alloc_bootmem_align(size, align); } +int __init early_init_dt_reserve_memory_arch(phys_addr_t base, phys_addr_t size, + bool nomap) +{ + reserve_bootmem(base, size, BOOTMEM_DEFAULT); + return 0; +} + void __init early_init_devtree(void *params) { __be32 *dtb = (u32 *)__dtb_start; diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c index 6e57ffa5db27..6044d9be28b4 100644 --- a/arch/nios2/kernel/setup.c +++ b/arch/nios2/kernel/setup.c @@ -201,6 +201,9 @@ void __init setup_arch(char **cmdline_p) } #endif /* CONFIG_BLK_DEV_INITRD */ + early_init_fdt_reserve_self(); + early_init_fdt_scan_reserved_mem(); + unflatten_and_copy_device_tree(); setup_cpuinfo(); -- cgit v1.2.3 From 75dd7e4bb663c7047c7d1bd4dad26f8c048851be Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 31 Mar 2017 18:31:25 +0100 Subject: Documentation/filesystems: fix documentation for ->getattr() Following the recent merge of statx, correct the documented prototype for the ->getattr() inode operation, and add an entry to the porting file. Signed-off-by: Eric Biggers Signed-off-by: David Howells Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 3 +-- Documentation/filesystems/porting | 6 ++++++ Documentation/filesystems/vfs.txt | 3 +-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index fdcfdd79682a..fe25787ff6d4 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -58,8 +58,7 @@ prototypes: int (*permission) (struct inode *, int, unsigned int); int (*get_acl)(struct inode *, int); int (*setattr) (struct dentry *, struct iattr *); - int (*getattr) (const struct path *, struct dentry *, struct kstat *, - u32, unsigned int); + int (*getattr) (const struct path *, struct kstat *, u32, unsigned int); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); void (*update_time)(struct inode *, struct timespec *, int); diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 95280079c0b3..5fb17f49f7a2 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -600,3 +600,9 @@ in your dentry operations instead. [recommended] ->readlink is optional for symlinks. Don't set, unless filesystem needs to fake something for readlink(2). +-- +[mandatory] + ->getattr() is now passed a struct path rather than a vfsmount and + dentry separately, and it now has request_mask and query_flags arguments + to specify the fields and sync type requested by statx. Filesystems not + supporting any statx-specific features may ignore the new arguments. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 569211703721..94dd27ef4a76 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -382,8 +382,7 @@ struct inode_operations { int (*permission) (struct inode *, int); int (*get_acl)(struct inode *, int); int (*setattr) (struct dentry *, struct iattr *); - int (*getattr) (const struct path *, struct dentry *, struct kstat *, - u32, unsigned int); + int (*getattr) (const struct path *, struct kstat *, u32, unsigned int); ssize_t (*listxattr) (struct dentry *, char *, size_t); void (*update_time)(struct inode *, struct timespec *, int); int (*atomic_open)(struct inode *, struct dentry *, struct file *, -- cgit v1.2.3 From 8c7493aa3e9ae90f90196f4d4c1398ad143cba7b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 31 Mar 2017 18:31:32 +0100 Subject: statx: reject unknown flags when using NULL path The statx() system call currently accepts unknown flags when called with a NULL path to operate on a file descriptor. Left unchanged, this could make it hard to introduce new query flags in the future, since applications may not be able to tell whether a given flag is supported. Fix this by failing the system call with EINVAL if any flags other than KSTAT_QUERY_FLAGS are specified in combination with a NULL path. Arguably, we could still permit known lookup-related flags such as AT_SYMLINK_NOFOLLOW. However, that would be inconsistent with how sys_utimensat() behaves when passed a NULL path, which seems to be the closest precedent. And given that the NULL path case is (I believe) mainly intended to be used to implement a wrapper function like fstatx() that doesn't have a path argument, I think rejecting lookup-related flags too is probably the best choice. Signed-off-by: Eric Biggers Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/stat.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/stat.c b/fs/stat.c index fa0be59340cc..df484a60846d 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -130,9 +130,13 @@ EXPORT_SYMBOL(vfs_getattr); int vfs_statx_fd(unsigned int fd, struct kstat *stat, u32 request_mask, unsigned int query_flags) { - struct fd f = fdget_raw(fd); + struct fd f; int error = -EBADF; + if (query_flags & ~KSTAT_QUERY_FLAGS) + return -EINVAL; + + f = fdget_raw(fd); if (f.file) { error = vfs_getattr(&f.file->f_path, stat, request_mask, query_flags); -- cgit v1.2.3 From b15fb70b82299f92bb8d591c9d1731cb23fa8290 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 31 Mar 2017 18:31:40 +0100 Subject: statx: remove incorrect part of vfs_statx() comment request_mask and query_flags are function arguments, not passed in struct kstat. So remove the part of the comment which claims otherwise. This was apparently left over from an earlier version of the statx patch. Signed-off-by: Eric Biggers Signed-off-by: David Howells Reviewed-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/stat.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/stat.c b/fs/stat.c index df484a60846d..b792dd201c31 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -159,9 +159,6 @@ EXPORT_SYMBOL(vfs_statx_fd); * Additionally, the use of AT_SYMLINK_NOFOLLOW in flags will prevent a symlink * at the given name from being referenced. * - * The caller must have preset stat->request_mask as for vfs_getattr(). The - * flags are also used to load up stat->query_flags. - * * 0 will be returned on success, and a -ve error code if unsuccessful. */ int vfs_statx(int dfd, const char __user *filename, int flags, -- cgit v1.2.3 From 64bd72048a2ac07efed70debe606a1c6e5e03554 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 31 Mar 2017 18:31:48 +0100 Subject: statx: optimize copy of struct statx to userspace I found that statx() was significantly slower than stat(). As a microbenchmark, I compared 10,000,000 invocations of fstat() on a tmpfs file to the same with statx() passed a NULL path: $ time ./stat_benchmark real 0m1.464s user 0m0.275s sys 0m1.187s $ time ./statx_benchmark real 0m5.530s user 0m0.281s sys 0m5.247s statx is expected to be a little slower than stat because struct statx is larger than struct stat, but not by *that* much. It turns out that most of the overhead was in copying struct statx to userspace, mostly in all the stac/clac instructions that got generated for each __put_user() call. (This was on x86_64, but some other architectures, e.g. arm64, have something similar now too.) stat() instead initializes its struct on the stack and copies it to userspace with a single call to copy_to_user(). This turns out to be much faster, and changing statx to do this makes it almost as fast as stat: $ time ./statx_benchmark real 0m1.624s user 0m0.270s sys 0m1.354s For zeroing the reserved fields, start by zeroing the full struct with memset. This makes it clear that every byte copied to userspace is initialized, even implicit padding bytes (though there are none currently). In the scenarios I tested, it also performed the same as a designated initializer. Manually initializing each field was still slightly faster, but would have been more error-prone and less verifiable. Also rename statx_set_result() to cp_statx() for consistency with cp_old_stat() et al., and make it noinline so that struct statx doesn't add to the stack usage during the main portion of the syscall execution. Signed-off-by: Eric Biggers Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/stat.c | 74 +++++++++++++++++++++++++++------------------------------------ 1 file changed, 32 insertions(+), 42 deletions(-) diff --git a/fs/stat.c b/fs/stat.c index b792dd201c31..ab27f2868588 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -510,46 +510,37 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, const char __user *, filename, } #endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */ -static inline int __put_timestamp(struct timespec *kts, - struct statx_timestamp __user *uts) +static noinline_for_stack int +cp_statx(const struct kstat *stat, struct statx __user *buffer) { - return (__put_user(kts->tv_sec, &uts->tv_sec ) || - __put_user(kts->tv_nsec, &uts->tv_nsec ) || - __put_user(0, &uts->__reserved )); -} - -/* - * Set the statx results. - */ -static long statx_set_result(struct kstat *stat, struct statx __user *buffer) -{ - uid_t uid = from_kuid_munged(current_user_ns(), stat->uid); - gid_t gid = from_kgid_munged(current_user_ns(), stat->gid); - - if (__put_user(stat->result_mask, &buffer->stx_mask ) || - __put_user(stat->mode, &buffer->stx_mode ) || - __clear_user(&buffer->__spare0, sizeof(buffer->__spare0)) || - __put_user(stat->nlink, &buffer->stx_nlink ) || - __put_user(uid, &buffer->stx_uid ) || - __put_user(gid, &buffer->stx_gid ) || - __put_user(stat->attributes, &buffer->stx_attributes ) || - __put_user(stat->blksize, &buffer->stx_blksize ) || - __put_user(MAJOR(stat->rdev), &buffer->stx_rdev_major ) || - __put_user(MINOR(stat->rdev), &buffer->stx_rdev_minor ) || - __put_user(MAJOR(stat->dev), &buffer->stx_dev_major ) || - __put_user(MINOR(stat->dev), &buffer->stx_dev_minor ) || - __put_timestamp(&stat->atime, &buffer->stx_atime ) || - __put_timestamp(&stat->btime, &buffer->stx_btime ) || - __put_timestamp(&stat->ctime, &buffer->stx_ctime ) || - __put_timestamp(&stat->mtime, &buffer->stx_mtime ) || - __put_user(stat->ino, &buffer->stx_ino ) || - __put_user(stat->size, &buffer->stx_size ) || - __put_user(stat->blocks, &buffer->stx_blocks ) || - __clear_user(&buffer->__spare1, sizeof(buffer->__spare1)) || - __clear_user(&buffer->__spare2, sizeof(buffer->__spare2))) - return -EFAULT; - - return 0; + struct statx tmp; + + memset(&tmp, 0, sizeof(tmp)); + + tmp.stx_mask = stat->result_mask; + tmp.stx_blksize = stat->blksize; + tmp.stx_attributes = stat->attributes; + tmp.stx_nlink = stat->nlink; + tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid); + tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid); + tmp.stx_mode = stat->mode; + tmp.stx_ino = stat->ino; + tmp.stx_size = stat->size; + tmp.stx_blocks = stat->blocks; + tmp.stx_atime.tv_sec = stat->atime.tv_sec; + tmp.stx_atime.tv_nsec = stat->atime.tv_nsec; + tmp.stx_btime.tv_sec = stat->btime.tv_sec; + tmp.stx_btime.tv_nsec = stat->btime.tv_nsec; + tmp.stx_ctime.tv_sec = stat->ctime.tv_sec; + tmp.stx_ctime.tv_nsec = stat->ctime.tv_nsec; + tmp.stx_mtime.tv_sec = stat->mtime.tv_sec; + tmp.stx_mtime.tv_nsec = stat->mtime.tv_nsec; + tmp.stx_rdev_major = MAJOR(stat->rdev); + tmp.stx_rdev_minor = MINOR(stat->rdev); + tmp.stx_dev_major = MAJOR(stat->dev); + tmp.stx_dev_minor = MINOR(stat->dev); + + return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0; } /** @@ -573,8 +564,6 @@ SYSCALL_DEFINE5(statx, if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE) return -EINVAL; - if (!access_ok(VERIFY_WRITE, buffer, sizeof(*buffer))) - return -EFAULT; if (filename) error = vfs_statx(dfd, filename, flags, &stat, mask); @@ -582,7 +571,8 @@ SYSCALL_DEFINE5(statx, error = vfs_statx_fd(dfd, &stat, mask, flags); if (error) return error; - return statx_set_result(&stat, buffer); + + return cp_statx(&stat, buffer); } /* Caller is here responsible for sufficient locking (ie. inode->i_lock) */ -- cgit v1.2.3 From 99652ea56a4186bc5bf8a3721c5353f41b35ebcb Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 31 Mar 2017 18:31:56 +0100 Subject: ext4: Add statx support Return enhanced file attributes from the Ext4 filesystem. This includes the following: (1) The inode creation time (i_crtime) as stx_btime, setting STATX_BTIME. (2) Certain FS_xxx_FL flags are mapped to stx_attribute flags. This requires that all ext4 inodes have a getattr call, not just some of them, so to this end, split the ext4_getattr() function and only call part of it where appropriate. Example output: [root@andromeda ~]# touch foo [root@andromeda ~]# chattr +ai foo [root@andromeda ~]# /tmp/test-statx foo statx(foo) = 0 results=fff Size: 0 Blocks: 0 IO Block: 4096 regular file Device: 08:12 Inode: 2101950 Links: 1 Access: (0644/-rw-r--r--) Uid: 0 Gid: 0 Access: 2016-02-11 17:08:29.031795451+0000 Modify: 2016-02-11 17:08:29.031795451+0000 Change: 2016-02-11 17:11:11.987790114+0000 Birth: 2016-02-11 17:08:29.031795451+0000 Attributes: 0000000000000030 (-------- -------- -------- -------- -------- -------- -------- --ai----) Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/ext4/ext4.h | 1 + fs/ext4/file.c | 2 +- fs/ext4/inode.c | 35 ++++++++++++++++++++++++++++++++--- fs/ext4/namei.c | 2 ++ fs/ext4/symlink.c | 3 +++ 5 files changed, 39 insertions(+), 4 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index f493af666591..fb69ee2388db 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2466,6 +2466,7 @@ extern int ext4_setattr(struct dentry *, struct iattr *); extern int ext4_getattr(const struct path *, struct kstat *, u32, unsigned int); extern void ext4_evict_inode(struct inode *); extern void ext4_clear_inode(struct inode *); +extern int ext4_file_getattr(const struct path *, struct kstat *, u32, unsigned int); extern int ext4_sync_inode(handle_t *, struct inode *); extern void ext4_dirty_inode(struct inode *, int); extern int ext4_change_inode_journal_flag(struct inode *, int); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 8210c1f43556..cefa9835f275 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -744,7 +744,7 @@ const struct file_operations ext4_file_operations = { const struct inode_operations ext4_file_inode_operations = { .setattr = ext4_setattr, - .getattr = ext4_getattr, + .getattr = ext4_file_getattr, .listxattr = ext4_listxattr, .get_acl = ext4_get_acl, .set_acl = ext4_set_acl, diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4247d8d25687..5d02b922afa3 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5390,11 +5390,40 @@ err_out: int ext4_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { - struct inode *inode; - unsigned long long delalloc_blocks; + struct inode *inode = d_inode(path->dentry); + struct ext4_inode *raw_inode; + struct ext4_inode_info *ei = EXT4_I(inode); + unsigned int flags; + + if (EXT4_FITS_IN_INODE(raw_inode, ei, i_crtime)) { + stat->result_mask |= STATX_BTIME; + stat->btime.tv_sec = ei->i_crtime.tv_sec; + stat->btime.tv_nsec = ei->i_crtime.tv_nsec; + } + + flags = ei->i_flags & EXT4_FL_USER_VISIBLE; + if (flags & EXT4_APPEND_FL) + stat->attributes |= STATX_ATTR_APPEND; + if (flags & EXT4_COMPR_FL) + stat->attributes |= STATX_ATTR_COMPRESSED; + if (flags & EXT4_ENCRYPT_FL) + stat->attributes |= STATX_ATTR_ENCRYPTED; + if (flags & EXT4_IMMUTABLE_FL) + stat->attributes |= STATX_ATTR_IMMUTABLE; + if (flags & EXT4_NODUMP_FL) + stat->attributes |= STATX_ATTR_NODUMP; - inode = d_inode(path->dentry); generic_fillattr(inode, stat); + return 0; +} + +int ext4_file_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) +{ + struct inode *inode = d_inode(path->dentry); + u64 delalloc_blocks; + + ext4_getattr(path, stat, request_mask, query_flags); /* * If there is inline data in the inode, the inode will normally not diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 6ad612c576fc..07e5e1405771 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3912,6 +3912,7 @@ const struct inode_operations ext4_dir_inode_operations = { .tmpfile = ext4_tmpfile, .rename = ext4_rename2, .setattr = ext4_setattr, + .getattr = ext4_getattr, .listxattr = ext4_listxattr, .get_acl = ext4_get_acl, .set_acl = ext4_set_acl, @@ -3920,6 +3921,7 @@ const struct inode_operations ext4_dir_inode_operations = { const struct inode_operations ext4_special_inode_operations = { .setattr = ext4_setattr, + .getattr = ext4_getattr, .listxattr = ext4_listxattr, .get_acl = ext4_get_acl, .set_acl = ext4_set_acl, diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index 73b184d161fc..5c8fc53cb0e5 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c @@ -85,17 +85,20 @@ errout: const struct inode_operations ext4_encrypted_symlink_inode_operations = { .get_link = ext4_encrypted_get_link, .setattr = ext4_setattr, + .getattr = ext4_getattr, .listxattr = ext4_listxattr, }; const struct inode_operations ext4_symlink_inode_operations = { .get_link = page_get_link, .setattr = ext4_setattr, + .getattr = ext4_getattr, .listxattr = ext4_listxattr, }; const struct inode_operations ext4_fast_symlink_inode_operations = { .get_link = simple_get_link, .setattr = ext4_setattr, + .getattr = ext4_getattr, .listxattr = ext4_listxattr, }; -- cgit v1.2.3 From 5f955f26f3d42d04aba65590a32eb70eedb7f37d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 31 Mar 2017 18:32:03 +0100 Subject: xfs: report crtime and attribute flags to statx statx has the ability to report inode creation times and inode flags, so hook up di_crtime and di_flags to that functionality. Signed-off-by: Darrick J. Wong Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/xfs/xfs_iops.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 229cc6a6d8ef..ebfc13350f9a 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -516,6 +516,20 @@ xfs_vn_getattr( stat->blocks = XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); + if (ip->i_d.di_version == 3) { + if (request_mask & STATX_BTIME) { + stat->result_mask |= STATX_BTIME; + stat->btime.tv_sec = ip->i_d.di_crtime.t_sec; + stat->btime.tv_nsec = ip->i_d.di_crtime.t_nsec; + } + } + + if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) + stat->attributes |= STATX_ATTR_IMMUTABLE; + if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) + stat->attributes |= STATX_ATTR_APPEND; + if (ip->i_d.di_flags & XFS_DIFLAG_NODUMP) + stat->attributes |= STATX_ATTR_NODUMP; switch (inode->i_mode & S_IFMT) { case S_IFBLK: -- cgit v1.2.3 From 47071aee6a1956524b9929b3b821f6d2f8cae23c Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 31 Mar 2017 18:32:10 +0100 Subject: statx: Reserve the top bit of the mask for future struct expansion Reserve the top bit of the mask for future expansion of the statx struct and give an error if statx() sees it set. All the other bits are ignored if we see them set but don't support the bit; we just clear the bit in the returned mask. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/stat.c | 2 ++ include/uapi/linux/stat.h | 1 + 2 files changed, 3 insertions(+) diff --git a/fs/stat.c b/fs/stat.c index ab27f2868588..0c7e6cdc435c 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -562,6 +562,8 @@ SYSCALL_DEFINE5(statx, struct kstat stat; int error; + if (mask & STATX__RESERVED) + return -EINVAL; if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE) return -EINVAL; diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 51a6b86e3700..0869b9eaa8ce 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -152,6 +152,7 @@ struct statx { #define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ #define STATX_BTIME 0x00000800U /* Want/got stx_btime */ #define STATX_ALL 0x00000fffU /* All currently supported flags */ +#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ /* * Attributes to be found in stx_attributes -- cgit v1.2.3 From 3209f68b3ca4667069923a325c88b21131bfdf9f Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 31 Mar 2017 18:32:17 +0100 Subject: statx: Include a mask for stx_attributes in struct statx Include a mask in struct stat to indicate which bits of stx_attributes the filesystem actually supports. This would also be useful if we add another system call that allows you to do a 'bulk attribute set' and pass in a statx struct with the masks appropriately set to say what you want to set. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/ext4/inode.c | 6 ++++++ fs/stat.c | 1 + include/linux/stat.h | 1 + include/uapi/linux/stat.h | 4 ++-- samples/statx/test-statx.c | 12 ++++++++---- 5 files changed, 18 insertions(+), 6 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5d02b922afa3..b9ffa9f4191f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5413,6 +5413,12 @@ int ext4_getattr(const struct path *path, struct kstat *stat, if (flags & EXT4_NODUMP_FL) stat->attributes |= STATX_ATTR_NODUMP; + stat->attributes_mask |= (STATX_ATTR_APPEND | + STATX_ATTR_COMPRESSED | + STATX_ATTR_ENCRYPTED | + STATX_ATTR_IMMUTABLE | + STATX_ATTR_NODUMP); + generic_fillattr(inode, stat); return 0; } diff --git a/fs/stat.c b/fs/stat.c index 0c7e6cdc435c..c6c963b2546b 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -527,6 +527,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer) tmp.stx_ino = stat->ino; tmp.stx_size = stat->size; tmp.stx_blocks = stat->blocks; + tmp.stx_attributes_mask = stat->attributes_mask; tmp.stx_atime.tv_sec = stat->atime.tv_sec; tmp.stx_atime.tv_nsec = stat->atime.tv_nsec; tmp.stx_btime.tv_sec = stat->btime.tv_sec; diff --git a/include/linux/stat.h b/include/linux/stat.h index c76e524fb34b..64b6b3aece21 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -26,6 +26,7 @@ struct kstat { unsigned int nlink; uint32_t blksize; /* Preferred I/O size */ u64 attributes; + u64 attributes_mask; #define KSTAT_ATTR_FS_IOC_FLAGS \ (STATX_ATTR_COMPRESSED | \ STATX_ATTR_IMMUTABLE | \ diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 0869b9eaa8ce..d538897b8e08 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -114,7 +114,7 @@ struct statx { __u64 stx_ino; /* Inode number */ __u64 stx_size; /* File size */ __u64 stx_blocks; /* Number of 512-byte blocks allocated */ - __u64 __spare1[1]; + __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */ /* 0x40 */ struct statx_timestamp stx_atime; /* Last access time */ struct statx_timestamp stx_btime; /* File creation time */ @@ -155,7 +155,7 @@ struct statx { #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ /* - * Attributes to be found in stx_attributes + * Attributes to be found in stx_attributes and masked in stx_attributes_mask. * * These give information about the features or the state of a file that might * be of use to ordinary userspace programs such as GUIs or ls rather than diff --git a/samples/statx/test-statx.c b/samples/statx/test-statx.c index 8571d766331d..d4d77b09412c 100644 --- a/samples/statx/test-statx.c +++ b/samples/statx/test-statx.c @@ -141,8 +141,8 @@ static void dump_statx(struct statx *stx) if (stx->stx_mask & STATX_BTIME) print_time(" Birth: ", &stx->stx_btime); - if (stx->stx_attributes) { - unsigned char bits; + if (stx->stx_attributes_mask) { + unsigned char bits, mbits; int loop, byte; static char attr_representation[64 + 1] = @@ -160,14 +160,18 @@ static void dump_statx(struct statx *stx) printf("Attributes: %016llx (", stx->stx_attributes); for (byte = 64 - 8; byte >= 0; byte -= 8) { bits = stx->stx_attributes >> byte; + mbits = stx->stx_attributes_mask >> byte; for (loop = 7; loop >= 0; loop--) { int bit = byte + loop; - if (bits & 0x80) + if (!(mbits & 0x80)) + putchar('.'); /* Not supported */ + else if (bits & 0x80) putchar(attr_representation[63 - bit]); else - putchar('-'); + putchar('-'); /* Not set */ bits <<= 1; + mbits <<= 1; } if (byte) putchar(' '); -- cgit v1.2.3 From 2b5efc089769cd2aa583880d29416d00e7441f39 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 25 Mar 2017 00:43:43 -0400 Subject: alpha: fix stack smashing in old_adjtimex(2) Signed-off-by: Al Viro --- arch/alpha/kernel/osf_sys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 0b961093ca5c..6d76e528ab8f 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1290,7 +1290,7 @@ SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p) /* copy relevant bits of struct timex. */ if (copy_from_user(&txc, txc_p, offsetof(struct timex32, time)) || copy_from_user(&txc.tick, &txc_p->tick, sizeof(struct timex32) - - offsetof(struct timex32, time))) + offsetof(struct timex32, tick))) return -EFAULT; ret = do_adjtimex(&txc); -- cgit v1.2.3 From 232b8e3b1d4946a45e3b9dd4c282b12a085dd39d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 31 Mar 2017 12:50:48 +0200 Subject: KVM: s390: remove change-recording override support Change-recording override (CO) was never implemented in any machine. According to the architecture it is unpredictable if a translation-specification exception will be recognized if the bit is set and EDAT1 does not apply. Therefore the easiest solution is to simply ignore the bit. This also fixes commit cd1836f583d7 ("KVM: s390: instruction-execution-protection support"). A guest may enable instruction-execution-protection (IEP) but not EDAT1. In such a case the guest_translate() function (arch/s390/kvm/gaccess.c) will report a specification exception on pages that have the IEP bit set while it should not. It might make sense to add full IEP support to guest_translate() and the GACC_IFETCH case. However, as far as I can tell the GACC_IFETCH case is currently only used after an instruction was executed in order to fetch the failing instruction. So there is no additional problem *currently*. Fixes: cd1836f583d7 ("KVM: s390: instruction-execution-protection support") Signed-off-by: Heiko Carstens Signed-off-by: Christian Borntraeger --- arch/s390/kvm/gaccess.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index d55c829a5944..ddbffb715b40 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -168,8 +168,7 @@ union page_table_entry { unsigned long z : 1; /* Zero Bit */ unsigned long i : 1; /* Page-Invalid Bit */ unsigned long p : 1; /* DAT-Protection Bit */ - unsigned long co : 1; /* Change-Recording Override */ - unsigned long : 8; + unsigned long : 9; }; }; @@ -745,8 +744,6 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, return PGM_PAGE_TRANSLATION; if (pte.z) return PGM_TRANSLATION_SPEC; - if (pte.co && !edat1) - return PGM_TRANSLATION_SPEC; dat_protection |= pte.p; raddr.pfra = pte.pfra; real_address: @@ -1182,7 +1179,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val); if (!rc && pte.i) rc = PGM_PAGE_TRANSLATION; - if (!rc && (pte.z || (pte.co && sg->edat_level < 1))) + if (!rc && pte.z) rc = PGM_TRANSLATION_SPEC; shadow_page: pte.p |= dat_protection; -- cgit v1.2.3 From 78420281a9d74014af7616958806c3aba056319e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 3 Apr 2017 12:22:20 -0700 Subject: xfs: rework the inline directory verifiers The inline directory verifiers should be called on the inode fork data, which means after iformat_local on the read side, and prior to ifork_flush on the write side. This makes the fork verifier more consistent with the way buffer verifiers work -- i.e. they will operate on the memory buffer that the code will be reading and writing directly. Furthermore, revise the verifier function to return -EFSCORRUPTED so that we don't flood the logs with corruption messages and assert notices. This has been a particular problem with xfs/348, which triggers the XFS_WANT_CORRUPTED_RETURN assertions, which halts the kernel when CONFIG_XFS_DEBUG=y. Disk corruption isn't supposed to do that, at least not in a verifier. Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_dir2_priv.h | 3 +- fs/xfs/libxfs/xfs_dir2_sf.c | 63 +++++++++++++++++++++++++++--------------- fs/xfs/libxfs/xfs_inode_fork.c | 35 +++++++++-------------- fs/xfs/libxfs/xfs_inode_fork.h | 2 +- fs/xfs/xfs_inode.c | 19 +++++++------ 5 files changed, 66 insertions(+), 56 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h index eb00bc133bca..39f8604f764e 100644 --- a/fs/xfs/libxfs/xfs_dir2_priv.h +++ b/fs/xfs/libxfs/xfs_dir2_priv.h @@ -125,8 +125,7 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); extern int xfs_dir2_sf_removename(struct xfs_da_args *args); extern int xfs_dir2_sf_replace(struct xfs_da_args *args); -extern int xfs_dir2_sf_verify(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *sfp, - int size); +extern int xfs_dir2_sf_verify(struct xfs_inode *ip); /* xfs_dir2_readdir.c */ extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx, diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index 96b45cd6c63f..e84af093b2ab 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -632,36 +632,49 @@ xfs_dir2_sf_check( /* Verify the consistency of an inline directory. */ int xfs_dir2_sf_verify( - struct xfs_mount *mp, - struct xfs_dir2_sf_hdr *sfp, - int size) + struct xfs_inode *ip) { + struct xfs_mount *mp = ip->i_mount; + struct xfs_dir2_sf_hdr *sfp; struct xfs_dir2_sf_entry *sfep; struct xfs_dir2_sf_entry *next_sfep; char *endp; const struct xfs_dir_ops *dops; + struct xfs_ifork *ifp; xfs_ino_t ino; int i; int i8count; int offset; + int size; + int error; __uint8_t filetype; + ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL); + /* + * xfs_iread calls us before xfs_setup_inode sets up ip->d_ops, + * so we can only trust the mountpoint to have the right pointer. + */ dops = xfs_dir_get_ops(mp, NULL); + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + sfp = (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data; + size = ifp->if_bytes; + /* * Give up if the directory is way too short. */ - XFS_WANT_CORRUPTED_RETURN(mp, size > - offsetof(struct xfs_dir2_sf_hdr, parent)); - XFS_WANT_CORRUPTED_RETURN(mp, size >= - xfs_dir2_sf_hdr_size(sfp->i8count)); + if (size <= offsetof(struct xfs_dir2_sf_hdr, parent) || + size < xfs_dir2_sf_hdr_size(sfp->i8count)) + return -EFSCORRUPTED; endp = (char *)sfp + size; /* Check .. entry */ ino = dops->sf_get_parent_ino(sfp); i8count = ino > XFS_DIR2_MAX_SHORT_INUM; - XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino)); + error = xfs_dir_ino_validate(mp, ino); + if (error) + return error; offset = dops->data_first_offset; /* Check all reported entries */ @@ -672,12 +685,12 @@ xfs_dir2_sf_verify( * Check the fixed-offset parts of the structure are * within the data buffer. */ - XFS_WANT_CORRUPTED_RETURN(mp, - ((char *)sfep + sizeof(*sfep)) < endp); + if (((char *)sfep + sizeof(*sfep)) >= endp) + return -EFSCORRUPTED; /* Don't allow names with known bad length. */ - XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen > 0); - XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen < MAXNAMELEN); + if (sfep->namelen == 0) + return -EFSCORRUPTED; /* * Check that the variable-length part of the structure is @@ -685,33 +698,39 @@ xfs_dir2_sf_verify( * name component, so nextentry is an acceptable test. */ next_sfep = dops->sf_nextentry(sfp, sfep); - XFS_WANT_CORRUPTED_RETURN(mp, endp >= (char *)next_sfep); + if (endp < (char *)next_sfep) + return -EFSCORRUPTED; /* Check that the offsets always increase. */ - XFS_WANT_CORRUPTED_RETURN(mp, - xfs_dir2_sf_get_offset(sfep) >= offset); + if (xfs_dir2_sf_get_offset(sfep) < offset) + return -EFSCORRUPTED; /* Check the inode number. */ ino = dops->sf_get_ino(sfp, sfep); i8count += ino > XFS_DIR2_MAX_SHORT_INUM; - XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino)); + error = xfs_dir_ino_validate(mp, ino); + if (error) + return error; /* Check the file type. */ filetype = dops->sf_get_ftype(sfep); - XFS_WANT_CORRUPTED_RETURN(mp, filetype < XFS_DIR3_FT_MAX); + if (filetype >= XFS_DIR3_FT_MAX) + return -EFSCORRUPTED; offset = xfs_dir2_sf_get_offset(sfep) + dops->data_entsize(sfep->namelen); sfep = next_sfep; } - XFS_WANT_CORRUPTED_RETURN(mp, i8count == sfp->i8count); - XFS_WANT_CORRUPTED_RETURN(mp, (void *)sfep == (void *)endp); + if (i8count != sfp->i8count) + return -EFSCORRUPTED; + if ((void *)sfep != (void *)endp) + return -EFSCORRUPTED; /* Make sure this whole thing ought to be in local format. */ - XFS_WANT_CORRUPTED_RETURN(mp, offset + - (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + - (uint)sizeof(xfs_dir2_block_tail_t) <= mp->m_dir_geo->blksize); + if (offset + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + + (uint)sizeof(xfs_dir2_block_tail_t) > mp->m_dir_geo->blksize) + return -EFSCORRUPTED; return 0; } diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 9653e964eda4..8a37efe04de3 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -212,6 +212,16 @@ xfs_iformat_fork( if (error) return error; + /* Check inline dir contents. */ + if (S_ISDIR(VFS_I(ip)->i_mode) && + dip->di_format == XFS_DINODE_FMT_LOCAL) { + error = xfs_dir2_sf_verify(ip); + if (error) { + xfs_idestroy_fork(ip, XFS_DATA_FORK); + return error; + } + } + if (xfs_is_reflink_inode(ip)) { ASSERT(ip->i_cowfp == NULL); xfs_ifork_init_cow(ip); @@ -322,8 +332,6 @@ xfs_iformat_local( int whichfork, int size) { - int error; - /* * If the size is unreasonable, then something * is wrong and we just bail out rather than crash in @@ -339,14 +347,6 @@ xfs_iformat_local( return -EFSCORRUPTED; } - if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) { - error = xfs_dir2_sf_verify(ip->i_mount, - (struct xfs_dir2_sf_hdr *)XFS_DFORK_DPTR(dip), - size); - if (error) - return error; - } - xfs_init_local_fork(ip, whichfork, XFS_DFORK_PTR(dip, whichfork), size); return 0; } @@ -867,7 +867,7 @@ xfs_iextents_copy( * In these cases, the format always takes precedence, because the * format indicates the current state of the fork. */ -int +void xfs_iflush_fork( xfs_inode_t *ip, xfs_dinode_t *dip, @@ -877,7 +877,6 @@ xfs_iflush_fork( char *cp; xfs_ifork_t *ifp; xfs_mount_t *mp; - int error; static const short brootflag[2] = { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; static const short dataflag[2] = @@ -886,7 +885,7 @@ xfs_iflush_fork( { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; if (!iip) - return 0; + return; ifp = XFS_IFORK_PTR(ip, whichfork); /* * This can happen if we gave up in iformat in an error path, @@ -894,19 +893,12 @@ xfs_iflush_fork( */ if (!ifp) { ASSERT(whichfork == XFS_ATTR_FORK); - return 0; + return; } cp = XFS_DFORK_PTR(dip, whichfork); mp = ip->i_mount; switch (XFS_IFORK_FORMAT(ip, whichfork)) { case XFS_DINODE_FMT_LOCAL: - if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) { - error = xfs_dir2_sf_verify(mp, - (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data, - ifp->if_bytes); - if (error) - return error; - } if ((iip->ili_fields & dataflag[whichfork]) && (ifp->if_bytes > 0)) { ASSERT(ifp->if_u1.if_data != NULL); @@ -959,7 +951,6 @@ xfs_iflush_fork( ASSERT(0); break; } - return 0; } /* diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index 132dc59fdde6..7fb8365326d1 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -140,7 +140,7 @@ typedef struct xfs_ifork { struct xfs_ifork *xfs_iext_state_to_fork(struct xfs_inode *ip, int state); int xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *); -int xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *, +void xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *, struct xfs_inode_log_item *, int); void xfs_idestroy_fork(struct xfs_inode *, int); void xfs_idata_realloc(struct xfs_inode *, int, int); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c7fe2c2123ab..7605d8396596 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -50,6 +50,7 @@ #include "xfs_log.h" #include "xfs_bmap_btree.h" #include "xfs_reflink.h" +#include "xfs_dir2_priv.h" kmem_zone_t *xfs_inode_zone; @@ -3475,7 +3476,6 @@ xfs_iflush_int( struct xfs_inode_log_item *iip = ip->i_itemp; struct xfs_dinode *dip; struct xfs_mount *mp = ip->i_mount; - int error; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); ASSERT(xfs_isiflocked(ip)); @@ -3547,6 +3547,12 @@ xfs_iflush_int( if (ip->i_d.di_version < 3) ip->i_d.di_flushiter++; + /* Check the inline directory data. */ + if (S_ISDIR(VFS_I(ip)->i_mode) && + ip->i_d.di_format == XFS_DINODE_FMT_LOCAL && + xfs_dir2_sf_verify(ip)) + goto corrupt_out; + /* * Copy the dirty parts of the inode into the on-disk inode. We always * copy out the core of the inode, because if the inode is dirty at all @@ -3558,14 +3564,9 @@ xfs_iflush_int( if (ip->i_d.di_flushiter == DI_MAX_FLUSH) ip->i_d.di_flushiter = 0; - error = xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK); - if (error) - return error; - if (XFS_IFORK_Q(ip)) { - error = xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK); - if (error) - return error; - } + xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK); + if (XFS_IFORK_Q(ip)) + xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK); xfs_inobp_check(mp, bp); /* -- cgit v1.2.3 From 3dd09d5a8589c640abb49cfcf92b4ed669eafad1 Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Mon, 3 Apr 2017 12:22:29 -0700 Subject: xfs: Honor FALLOC_FL_KEEP_SIZE when punching ends of files When punching past EOF on XFS, fallocate(mode=PUNCH_HOLE|KEEP_SIZE) will round the file size up to the nearest multiple of PAGE_SIZE: calvinow@vm-disks/generic-xfs-1 ~$ dd if=/dev/urandom of=test bs=2048 count=1 calvinow@vm-disks/generic-xfs-1 ~$ stat test Size: 2048 Blocks: 8 IO Block: 4096 regular file calvinow@vm-disks/generic-xfs-1 ~$ fallocate -n -l 2048 -o 2048 -p test calvinow@vm-disks/generic-xfs-1 ~$ stat test Size: 4096 Blocks: 8 IO Block: 4096 regular file Commit 3c2bdc912a1cc050 ("xfs: kill xfs_zero_remaining_bytes") replaced xfs_zero_remaining_bytes() with calls to iomap helpers. The new helpers don't enforce that [pos,offset) lies strictly on [0,i_size) when being called from xfs_free_file_space(), so by "leaking" these ranges into xfs_zero_range() we get this buggy behavior. Fix this by reintroducing the checks xfs_zero_remaining_bytes() did against i_size at the bottom of xfs_free_file_space(). Reported-by: Aaron Gao Fixes: 3c2bdc912a1cc050 ("xfs: kill xfs_zero_remaining_bytes") Cc: Christoph Hellwig Cc: Brian Foster Cc: # 4.8+ Signed-off-by: Calvin Owens Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_bmap_util.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 8b75dcea5966..828532ce0adc 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1311,8 +1311,16 @@ xfs_free_file_space( /* * Now that we've unmap all full blocks we'll have to zero out any * partial block at the beginning and/or end. xfs_zero_range is - * smart enough to skip any holes, including those we just created. + * smart enough to skip any holes, including those we just created, + * but we must take care not to zero beyond EOF and enlarge i_size. */ + + if (offset >= XFS_ISIZE(ip)) + return 0; + + if (offset + len > XFS_ISIZE(ip)) + len = XFS_ISIZE(ip) - offset; + return xfs_zero_range(ip, offset, len, NULL); } -- cgit v1.2.3 From bf9216f922612d2db7666aae01e65064da2ffb3a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 3 Apr 2017 12:22:39 -0700 Subject: xfs: fix kernel memory exposure problems Fix a memory exposure problems in inumbers where we allocate an array of structures with holes, fail to zero the holes, then blindly copy the kernel memory contents (junk and all) into userspace. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_itable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 2a6d9b1558e0..26d67ce3c18d 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -583,7 +583,7 @@ xfs_inumbers( return error; bcount = MIN(left, (int)(PAGE_SIZE / sizeof(*buffer))); - buffer = kmem_alloc(bcount * sizeof(*buffer), KM_SLEEP); + buffer = kmem_zalloc(bcount * sizeof(*buffer), KM_SLEEP); do { struct xfs_inobt_rec_incore r; int stat; -- cgit v1.2.3 From 280489daa68bd20364b322c11e3f429a0212c611 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 13 Mar 2017 17:43:48 +0100 Subject: drm/msm: adreno: fix build error without debugfs The newly added a5xx support fails to build when debugfs is diabled: drivers/gpu/drm/msm/adreno/a5xx_gpu.c:849:4: error: 'struct msm_gpu_funcs' has no member named 'show' drivers/gpu/drm/msm/adreno/a5xx_gpu.c:849:11: error: 'a5xx_show' undeclared here (not in a function); did you mean 'a5xx_irq'? This adds a missing #ifdef. Fixes: b5f103ab98c7 ("drm/msm: gpu: Add A5XX target support") Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 4414cf73735d..f0c8bd74ca91 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -860,7 +860,9 @@ static const struct adreno_gpu_funcs funcs = { .idle = a5xx_idle, .irq = a5xx_irq, .destroy = a5xx_destroy, +#ifdef CONFIG_DEBUG_FS .show = a5xx_show, +#endif }, .get_timestamp = a5xx_get_timestamp, }; -- cgit v1.2.3 From f456d348b6320a2597a3f1fff3ad0011c5678603 Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Tue, 7 Mar 2017 09:50:27 -0700 Subject: drm/msm: Fix wrong pointer check in a5xx_destroy Instead of checking for a5xx_gpu->gpmu_iova during destroy we accidently check a5xx_gpu->gpmu_bo. Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index f0c8bd74ca91..36602ac7e248 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2016 The Linux Foundation. All rights reserved. +/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -534,7 +534,7 @@ static void a5xx_destroy(struct msm_gpu *gpu) } if (a5xx_gpu->gpmu_bo) { - if (a5xx_gpu->gpmu_bo) + if (a5xx_gpu->gpmu_iova) msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->id); drm_gem_object_unreference_unlocked(a5xx_gpu->gpmu_bo); } -- cgit v1.2.3 From 1a5dff5d74e55608a9632a1d030bb79196e0755c Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Tue, 7 Mar 2017 10:02:51 -0700 Subject: drm/msm: Don't allow zero sized buffer objects Zero sized buffer objects tend to make various bits of the GEM infrastructure complain: WARNING: CPU: 1 PID: 2323 at drivers/gpu/drm/drm_mm.c:389 drm_mm_insert_node_generic+0x258/0x2f0 Modules linked in: CPU: 1 PID: 2323 Comm: drm-api-test Tainted: G W 4.9.0-rc4-00906-g693af44 #213 Hardware name: Qualcomm Technologies, Inc. DB820c (DT) task: ffff8000d7353400 task.stack: ffff8000d7720000 PC is at drm_mm_insert_node_generic+0x258/0x2f0 LR is at drm_vma_offset_add+0x4c/0x70 Zero sized buffers serve no appreciable value to the user so disallow them at create time. Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 59811f29607d..68e509b3b9e4 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -812,6 +812,12 @@ struct drm_gem_object *msm_gem_new(struct drm_device *dev, size = PAGE_ALIGN(size); + /* Disallow zero sized objects as they make the underlying + * infrastructure grumpy + */ + if (size == 0) + return ERR_PTR(-EINVAL); + ret = msm_gem_new_impl(dev, size, flags, NULL, &obj); if (ret) goto fail; -- cgit v1.2.3 From a5fef535c529b64b622f9d7eafb3ab86850f5f8f Mon Sep 17 00:00:00 2001 From: Archit Taneja Date: Thu, 16 Feb 2017 16:29:04 +0530 Subject: drm/msm/dsi: Fix bug in dsi_mgr_phy_enable A recent commit introduces a bug in dsi_mgr_phy_enable. In the non dual DSI mode, we reset the mdsi (master DSI) PHY. This isn't right since master and slave DSI exist only in dual DSI mode. For the normal mode of operation, we should simply reset the PHY of the DSI device (i.e. msm_dsi) corresponding to the current bridge. Usage of the wrong DSI pointer also resulted in a static checker warning. That too is resolved with this fix. Fixes: b62aa70a98c5 (drm/msm/dsi: Move PHY operations out of host) Reported-by: Dan Carpenter Signed-off-by: Archit Taneja Reviewed-by: Rob Clark Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dsi/dsi_manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c index 921270ea6059..a879ffa534b4 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_manager.c +++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c @@ -171,7 +171,7 @@ dsi_mgr_phy_enable(int id, } } } else { - msm_dsi_host_reset_phy(mdsi->host); + msm_dsi_host_reset_phy(msm_dsi->host); ret = enable_phy(msm_dsi, src_pll_id, &shared_timings[id]); if (ret) return ret; -- cgit v1.2.3 From 30512040ed8e1982e5ba16bb3e3a7f000ff65427 Mon Sep 17 00:00:00 2001 From: Archit Taneja Date: Fri, 17 Mar 2017 09:09:48 +0530 Subject: drm/msm/mdp5: Update SSPP_MAX value 'SSPP_MAX + 1' is the max number of hwpipes that can be present on a MDP5 platform. Recently, 2 new cursor hwpipes were added, which caused overflows in arrays that used SSPP_MAX to represent the number of elements. Update the SSPP_MAX value to incorporate the extra hwpipes. Signed-off-by: Archit Taneja Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h index 611da7a660c9..238901987e00 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h @@ -18,7 +18,8 @@ #ifndef __MDP5_PIPE_H__ #define __MDP5_PIPE_H__ -#define SSPP_MAX (SSPP_RGB3 + 1) /* TODO: Add SSPP_MAX in mdp5.xml.h */ +/* TODO: Add SSPP_MAX in mdp5.xml.h */ +#define SSPP_MAX (SSPP_CURSOR1 + 1) /* represents a hw pipe, which is dynamically assigned to a plane */ struct mdp5_hw_pipe { -- cgit v1.2.3 From d322a693f585832130c3d09f2175b8f2b3ae99e1 Mon Sep 17 00:00:00 2001 From: Vinay Simha BN Date: Tue, 14 Mar 2017 10:55:56 +0530 Subject: drm/msm/hdmi: redefinitions of macros not required 4 macros already defined in hdmi.h, which is not required to redefine in hdmi_audio.c Signed-off-by: Vinay Simha BN Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/hdmi/hdmi_audio.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_audio.c b/drivers/gpu/drm/msm/hdmi/hdmi_audio.c index a54d3bb5baad..8177e8511afd 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_audio.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_audio.c @@ -18,13 +18,6 @@ #include #include "hdmi.h" - -/* Supported HDMI Audio channels */ -#define MSM_HDMI_AUDIO_CHANNEL_2 0 -#define MSM_HDMI_AUDIO_CHANNEL_4 1 -#define MSM_HDMI_AUDIO_CHANNEL_6 2 -#define MSM_HDMI_AUDIO_CHANNEL_8 3 - /* maps MSM_HDMI_AUDIO_CHANNEL_n consts used by audio driver to # of channels: */ static int nchannels[] = { 2, 4, 6, 8 }; -- cgit v1.2.3 From 028402d4bcfd3e99421504674cc41b0cd32768c8 Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Mon, 6 Feb 2017 10:39:29 -0700 Subject: drm/msm: Make sure to detach the MMU during GPU cleanup We should be detaching the MMU before destroying the address space. To do this cleanly, the detach has to happen in adreno_gpu_cleanup() because it needs access to structs in adreno_gpu.c. Plus it is better symmetry to have the attach and detach at the same code level. Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 29 +++++++++++++++++++---------- drivers/gpu/drm/msm/msm_gpu.c | 3 --- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index c9bd1e6225f4..5ae65426b4e5 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -418,18 +418,27 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, return 0; } -void adreno_gpu_cleanup(struct adreno_gpu *gpu) +void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu) { - if (gpu->memptrs_bo) { - if (gpu->memptrs) - msm_gem_put_vaddr(gpu->memptrs_bo); + struct msm_gpu *gpu = &adreno_gpu->base; + + if (adreno_gpu->memptrs_bo) { + if (adreno_gpu->memptrs) + msm_gem_put_vaddr(adreno_gpu->memptrs_bo); + + if (adreno_gpu->memptrs_iova) + msm_gem_put_iova(adreno_gpu->memptrs_bo, gpu->id); + + drm_gem_object_unreference_unlocked(adreno_gpu->memptrs_bo); + } + release_firmware(adreno_gpu->pm4); + release_firmware(adreno_gpu->pfp); - if (gpu->memptrs_iova) - msm_gem_put_iova(gpu->memptrs_bo, gpu->base.id); + msm_gpu_cleanup(gpu); - drm_gem_object_unreference_unlocked(gpu->memptrs_bo); + if (gpu->aspace) { + gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu, + iommu_ports, ARRAY_SIZE(iommu_ports)); + msm_gem_address_space_destroy(gpu->aspace); } - release_firmware(gpu->pm4); - release_firmware(gpu->pfp); - msm_gpu_cleanup(&gpu->base); } diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 99e05aacbee1..af5b6ba4095b 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -706,9 +706,6 @@ void msm_gpu_cleanup(struct msm_gpu *gpu) msm_ringbuffer_destroy(gpu->rb); } - if (gpu->aspace) - msm_gem_address_space_destroy(gpu->aspace); - if (gpu->fctx) msm_fence_context_free(gpu->fctx); } -- cgit v1.2.3 From feb199ebef488a9f2c3550fb10524f3dac9d8abe Mon Sep 17 00:00:00 2001 From: Tomasz Nowicki Date: Fri, 31 Mar 2017 17:06:44 +0200 Subject: PCI: thunder-pem: Fix legacy firmware PEM-specific resources SZ_16M PEM resource size includes PEM-specific register and its children resources. Reservation of the whole SZ_16M range leads to child device driver failure when pcieport driver is requesting resources: pcieport 0004:1f:00.0: can't enable device: BAR 0 [mem 0x87e0c0f00000-0x87e0c0ffffff 64bit] not claimed So we cannot reserve full 16M here and instead we want to reserve PEM-specific register only which is SZ_64K. At the end increase PEM resource to SZ_16M since this is what thunder_pem_init() call expects for proper initialization. Fixes: 9abb27c7594a ("PCI: thunder-pem: Add legacy firmware support for Cavium ThunderX host controller") Signed-off-by: Tomasz Nowicki Signed-off-by: Bjorn Helgaas CC: stable@vger.kernel.org # v4.10+ --- drivers/pci/host/pci-thunder-pem.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c index b89c373555c5..6e031b522529 100644 --- a/drivers/pci/host/pci-thunder-pem.c +++ b/drivers/pci/host/pci-thunder-pem.c @@ -375,7 +375,6 @@ static void thunder_pem_legacy_fw(struct acpi_pci_root *root, index -= node * PEM_MAX_DOM_IN_NODE; res_pem->start = PEM_RES_BASE | FIELD_PREP(PEM_NODE_MASK, node) | FIELD_PREP(PEM_INDX_MASK, index); - res_pem->end = res_pem->start + SZ_16M - 1; res_pem->flags = IORESOURCE_MEM; } @@ -399,8 +398,15 @@ static int thunder_pem_acpi_init(struct pci_config_window *cfg) */ if (ret) { thunder_pem_legacy_fw(root, res_pem); - /* Reserve PEM-specific resources and PCI configuration space */ + /* + * Reserve 64K size PEM specific resources. The full 16M range + * size is required for thunder_pem_init() call. + */ + res_pem->end = res_pem->start + SZ_64K - 1; thunder_pem_reserve_range(dev, root->segment, res_pem); + res_pem->end = res_pem->start + SZ_16M - 1; + + /* Reserve PCI configuration space as well. */ thunder_pem_reserve_range(dev, root->segment, &cfg->res); } -- cgit v1.2.3 From 6665f8a307696a0edd4c1233b4cc0f5ed6083525 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 3 Apr 2017 16:17:11 -0500 Subject: PCI: dwc: Select PCI_HOST_COMMON for hisi Without PCI_HOST_COMMON support enabled, we get a link error: drivers/pci/dwc/built-in.o: In function `hisi_pcie_map_bus': pcie-hisi.c:(.text+0x8860): undefined reference to `pci_ecam_map_bus' drivers/pci/dwc/built-in.o: In function `hisi_pcie_almost_ecam_probe': pcie-hisi.c:(.text+0x88b4): undefined reference to `pci_host_common_probe' Add an explicit 'select', as the other users have. Signed-off-by: Arnd Bergmann Signed-off-by: Bjorn Helgaas Acked-by: Jingoo Han --- drivers/pci/dwc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/dwc/Kconfig b/drivers/pci/dwc/Kconfig index dfb8a69afc28..d2d2ba5b8a68 100644 --- a/drivers/pci/dwc/Kconfig +++ b/drivers/pci/dwc/Kconfig @@ -89,6 +89,7 @@ config PCI_HISI depends on PCI_MSI_IRQ_DOMAIN select PCIEPORTBUS select PCIE_DW_HOST + select PCI_HOST_COMMON help Say Y here if you want PCIe controller support on HiSilicon Hip05 and Hip06 SoCs -- cgit v1.2.3 From ac6a3722fed67c658a435187d0254ae119d845d3 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 3 Apr 2017 15:42:58 -0400 Subject: flow dissector: correct size of storage for ARP The last argument to __skb_header_pointer() should be a buffer large enough to store struct arphdr. This can be a pointer to a struct arphdr structure. The code was previously using a pointer to a pointer to struct arphdr. By my counting the storage available both before and after is 8 bytes on x86_64. Fixes: 55733350e5e8 ("flow disector: ARP support") Reported-by: Nicolas Iooss Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index c35aae13c8d2..d98d4998213d 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -390,7 +390,7 @@ mpls: unsigned char ar_tip[4]; } *arp_eth, _arp_eth; const struct arphdr *arp; - struct arphdr *_arp; + struct arphdr _arp; arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data, hlen, &_arp); -- cgit v1.2.3 From df2729c3238ed89fb8ccf850d38c732858a5bade Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 1 Apr 2017 17:15:59 +0800 Subject: sctp: check for dst and pathmtu update in sctp_packet_config This patch is to move sctp_transport_dst_check into sctp_packet_config from sctp_packet_transmit and add pathmtu check in sctp_packet_config. With this fix, sctp can update dst or pathmtu before appending chunks, which can void dropping packets in sctp_packet_transmit when dst is obsolete or dst's mtu is changed. This patch is also to improve some other codes in sctp_packet_config. It updates packet max_size with gso_max_size, checks for dst and pathmtu, and appends ecne chunk only when packet is empty and asoc is not NULL. It makes sctp flush work better, as we only need to set up them once for one flush schedule. It's also safe, since asoc is NULL only when the packet is created by sctp_ootb_pkt_new in which it just gets the new dst, no need to do more things for it other than set packet with transport's pathmtu. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 17 ++++++++++--- net/sctp/output.c | 67 ++++++++++++++++++++++++++----------------------- 2 files changed, 50 insertions(+), 34 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 1f71ee5ab518..d75caa7a629b 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -596,12 +596,23 @@ static inline void sctp_v4_map_v6(union sctp_addr *addr) */ static inline struct dst_entry *sctp_transport_dst_check(struct sctp_transport *t) { - if (t->dst && (!dst_check(t->dst, t->dst_cookie) || - t->pathmtu != max_t(size_t, SCTP_TRUNC4(dst_mtu(t->dst)), - SCTP_DEFAULT_MINSEGMENT))) + if (t->dst && !dst_check(t->dst, t->dst_cookie)) sctp_transport_dst_release(t); return t->dst; } +static inline bool sctp_transport_pmtu_check(struct sctp_transport *t) +{ + __u32 pmtu = max_t(size_t, SCTP_TRUNC4(dst_mtu(t->dst)), + SCTP_DEFAULT_MINSEGMENT); + + if (t->pathmtu == pmtu) + return true; + + t->pathmtu = pmtu; + + return false; +} + #endif /* __net_sctp_h__ */ diff --git a/net/sctp/output.c b/net/sctp/output.c index 73fd178007a3..ec4d50a38713 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -86,43 +86,53 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag, { struct sctp_transport *tp = packet->transport; struct sctp_association *asoc = tp->asoc; + struct sock *sk; pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag); - packet->vtag = vtag; - if (asoc && tp->dst) { - struct sock *sk = asoc->base.sk; - - rcu_read_lock(); - if (__sk_dst_get(sk) != tp->dst) { - dst_hold(tp->dst); - sk_setup_caps(sk, tp->dst); - } - - if (sk_can_gso(sk)) { - struct net_device *dev = tp->dst->dev; + /* do the following jobs only once for a flush schedule */ + if (!sctp_packet_empty(packet)) + return; - packet->max_size = dev->gso_max_size; - } else { - packet->max_size = asoc->pathmtu; - } - rcu_read_unlock(); + /* set packet max_size with pathmtu */ + packet->max_size = tp->pathmtu; + if (!asoc) + return; - } else { - packet->max_size = tp->pathmtu; + /* update dst or transport pathmtu if in need */ + sk = asoc->base.sk; + if (!sctp_transport_dst_check(tp)) { + sctp_transport_route(tp, NULL, sctp_sk(sk)); + if (asoc->param_flags & SPP_PMTUD_ENABLE) + sctp_assoc_sync_pmtu(sk, asoc); + } else if (!sctp_transport_pmtu_check(tp)) { + if (asoc->param_flags & SPP_PMTUD_ENABLE) + sctp_assoc_sync_pmtu(sk, asoc); } - if (ecn_capable && sctp_packet_empty(packet)) { - struct sctp_chunk *chunk; + /* If there a is a prepend chunk stick it on the list before + * any other chunks get appended. + */ + if (ecn_capable) { + struct sctp_chunk *chunk = sctp_get_ecne_prepend(asoc); - /* If there a is a prepend chunk stick it on the list before - * any other chunks get appended. - */ - chunk = sctp_get_ecne_prepend(asoc); if (chunk) sctp_packet_append_chunk(packet, chunk); } + + if (!tp->dst) + return; + + /* set packet max_size with gso_max_size if gso is enabled*/ + rcu_read_lock(); + if (__sk_dst_get(sk) != tp->dst) { + dst_hold(tp->dst); + sk_setup_caps(sk, tp->dst); + } + packet->max_size = sk_can_gso(sk) ? tp->dst->dev->gso_max_size + : asoc->pathmtu; + rcu_read_unlock(); } /* Initialize the packet structure. */ @@ -582,12 +592,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) sh->vtag = htonl(packet->vtag); sh->checksum = 0; - /* update dst if in need */ - if (!sctp_transport_dst_check(tp)) { - sctp_transport_route(tp, NULL, sctp_sk(sk)); - if (asoc && asoc->param_flags & SPP_PMTUD_ENABLE) - sctp_assoc_sync_pmtu(sk, asoc); - } + /* drop packet if no dst */ dst = dst_clone(tp->dst); if (!dst) { IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); -- cgit v1.2.3 From 0b9aefea860063bb39e36bd7fe6c7087fed0ba87 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Sat, 1 Apr 2017 11:00:21 -0300 Subject: tcp: minimize false-positives on TCP/GRO check Markus Trippelsdorf reported that after commit dcb17d22e1c2 ("tcp: warn on bogus MSS and try to amend it") the kernel started logging the warning for a NIC driver that doesn't even support GRO. It was diagnosed that it was possibly caused on connections that were using TCP Timestamps but some packets lacked the Timestamps option. As we reduce rcv_mss when timestamps are used, the lack of them would cause the packets to be bigger than expected, although this is a valid case. As this warning is more as a hint, getting a clean-cut on the threshold is probably not worth the execution time spent on it. This patch thus alleviates the false-positives with 2 quick checks: by accounting for the entire TCP option space and also checking against the interface MTU if it's available. These changes, specially the MTU one, might mask some real positives, though if they are really happening, it's possible that sooner or later it will be triggered anyway. Reported-by: Markus Trippelsdorf Cc: Eric Dumazet Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c43119726a62..97ac6776e47d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -126,7 +126,8 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define REXMIT_LOST 1 /* retransmit packets marked lost */ #define REXMIT_NEW 2 /* FRTO-style transmit of unsent/new packets */ -static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb) +static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb, + unsigned int len) { static bool __once __read_mostly; @@ -137,8 +138,9 @@ static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb) rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif); - pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n", - dev ? dev->name : "Unknown driver"); + if (!dev || len >= dev->mtu) + pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n", + dev ? dev->name : "Unknown driver"); rcu_read_unlock(); } } @@ -161,8 +163,10 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) if (len >= icsk->icsk_ack.rcv_mss) { icsk->icsk_ack.rcv_mss = min_t(unsigned int, len, tcp_sk(sk)->advmss); - if (unlikely(icsk->icsk_ack.rcv_mss != len)) - tcp_gro_dev_warn(sk, skb); + /* Account for possibly-removed options */ + if (unlikely(len > icsk->icsk_ack.rcv_mss + + MAX_TCP_OPTION_SPACE)) + tcp_gro_dev_warn(sk, skb, len); } else { /* Otherwise, we make more careful check taking into account, * that SACKs block is variable. -- cgit v1.2.3 From 09a6adf53d42ca3088fa3fb41f40b768efc711ed Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Mon, 3 Apr 2017 22:51:01 -0700 Subject: arm64: mm: unaligned access by user-land should be received as SIGBUS After 52d7523 (arm64: mm: allow the kernel to handle alignment faults on user accesses) commit user-land accesses that produce unaligned exceptions like in case of aarch32 ldm/stm/ldrd/strd instructions operating on unaligned memory received by user-land as SIGSEGV. It is wrong, it should be reported as SIGBUS as it was before 52d7523 commit. Changed do_bad_area function to take signal and code parameters out of esr value using fault_info table, so in case of do_alignment_fault fault user-land will receive SIGBUS. Wrapped access to fault_info table into esr_to_fault_info function. Cc: Fixes: 52d7523 (arm64: mm: allow the kernel to handle alignment faults on user accesses) Signed-off-by: Victor Kamensky Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 4bf899fb451b..1b35b8bddbfb 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -42,7 +42,20 @@ #include #include -static const char *fault_name(unsigned int esr); +struct fault_info { + int (*fn)(unsigned long addr, unsigned int esr, + struct pt_regs *regs); + int sig; + int code; + const char *name; +}; + +static const struct fault_info fault_info[]; + +static inline const struct fault_info *esr_to_fault_info(unsigned int esr) +{ + return fault_info + (esr & 63); +} #ifdef CONFIG_KPROBES static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) @@ -197,10 +210,12 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, struct pt_regs *regs) { struct siginfo si; + const struct fault_info *inf; if (unhandled_signal(tsk, sig) && show_unhandled_signals_ratelimited()) { + inf = esr_to_fault_info(esr); pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n", - tsk->comm, task_pid_nr(tsk), fault_name(esr), sig, + tsk->comm, task_pid_nr(tsk), inf->name, sig, addr, esr); show_pte(tsk->mm, addr); show_regs(regs); @@ -219,14 +234,16 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re { struct task_struct *tsk = current; struct mm_struct *mm = tsk->active_mm; + const struct fault_info *inf; /* * If we are in kernel mode at this point, we have no context to * handle this fault with. */ - if (user_mode(regs)) - __do_user_fault(tsk, addr, esr, SIGSEGV, SEGV_MAPERR, regs); - else + if (user_mode(regs)) { + inf = esr_to_fault_info(esr); + __do_user_fault(tsk, addr, esr, inf->sig, inf->code, regs); + } else __do_kernel_fault(mm, addr, esr, regs); } @@ -488,12 +505,7 @@ static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) return 1; } -static const struct fault_info { - int (*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs); - int sig; - int code; - const char *name; -} fault_info[] = { +static const struct fault_info fault_info[] = { { do_bad, SIGBUS, 0, "ttbr address size fault" }, { do_bad, SIGBUS, 0, "level 1 address size fault" }, { do_bad, SIGBUS, 0, "level 2 address size fault" }, @@ -560,19 +572,13 @@ static const struct fault_info { { do_bad, SIGBUS, 0, "unknown 63" }, }; -static const char *fault_name(unsigned int esr) -{ - const struct fault_info *inf = fault_info + (esr & 63); - return inf->name; -} - /* * Dispatch a data abort to the relevant handler. */ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs) { - const struct fault_info *inf = fault_info + (esr & 63); + const struct fault_info *inf = esr_to_fault_info(esr); struct siginfo info; if (!inf->fn(addr, esr, regs)) -- cgit v1.2.3 From 8b3405e345b5a098101b0c31b264c812bba045d9 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 3 Apr 2017 15:12:43 +0100 Subject: kvm: arm/arm64: Fix locking for kvm_free_stage2_pgd In kvm_free_stage2_pgd() we don't hold the kvm->mmu_lock while calling unmap_stage2_range() on the entire memory range for the guest. This could cause problems with other callers (e.g, munmap on a memslot) trying to unmap a range. And since we have to unmap the entire Guest memory range holding a spinlock, make sure we yield the lock if necessary, after we unmap each PUD range. Fixes: commit d5d8184d35c9 ("KVM: ARM: Memory virtualization setup") Cc: stable@vger.kernel.org # v3.10+ Cc: Paolo Bonzini Cc: Marc Zyngier Cc: Christoffer Dall Cc: Mark Rutland Signed-off-by: Suzuki K Poulose [ Avoid vCPU starvation and lockup detector warnings ] Signed-off-by: Marc Zyngier Signed-off-by: Suzuki K Poulose Signed-off-by: Christoffer Dall --- arch/arm/kvm/mmu.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 13b9c1fa8961..582a972371cf 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -292,11 +292,18 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) phys_addr_t addr = start, end = start + size; phys_addr_t next; + assert_spin_locked(&kvm->mmu_lock); pgd = kvm->arch.pgd + stage2_pgd_index(addr); do { next = stage2_pgd_addr_end(addr, end); if (!stage2_pgd_none(*pgd)) unmap_stage2_puds(kvm, pgd, addr, next); + /* + * If the range is too large, release the kvm->mmu_lock + * to prevent starvation and lockup detector warnings. + */ + if (next != end) + cond_resched_lock(&kvm->mmu_lock); } while (pgd++, addr = next, addr != end); } @@ -831,7 +838,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm) if (kvm->arch.pgd == NULL) return; + spin_lock(&kvm->mmu_lock); unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); + spin_unlock(&kvm->mmu_lock); + /* Free the HW pgd, one page at a time */ free_pages_exact(kvm->arch.pgd, S2_PGD_SIZE); kvm->arch.pgd = NULL; -- cgit v1.2.3 From 5b0d2cc2805897c14257f6dbb949639c499c3c25 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sat, 18 Mar 2017 13:56:56 +0100 Subject: KVM: arm64: Ensure LRs are clear when they should be We currently have some code to clear the list registers on GICv3, but we never call this code, because the caller got nuked when removing the old vgic. We also used to have a similar GICv2 part, but that got lost in the process too. Let's reintroduce the logic for GICv2 and call the logic when we initialize the use of hypervisors on the CPU, for example when first loading KVM or when exiting a low power state. Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- arch/arm/kvm/arm.c | 3 +++ include/kvm/arm_vgic.h | 1 + virt/kvm/arm/vgic/vgic-init.c | 19 +++++++++++++++++++ virt/kvm/arm/vgic/vgic-v2.c | 15 +++++++++++++++ virt/kvm/arm/vgic/vgic.h | 2 ++ 5 files changed, 40 insertions(+) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 96dba7cd8be7..314eb6abe1ff 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -1124,6 +1124,9 @@ static void cpu_hyp_reinit(void) if (__hyp_get_vectors() == hyp_default_vectors) cpu_init_hyp_mode(NULL); } + + if (vgic_present) + kvm_vgic_init_cpu_hardware(); } static void cpu_hyp_reset(void) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index b72dd2ad5f44..c0b3d999c266 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -295,6 +295,7 @@ void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu); void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); int kvm_vgic_map_resources(struct kvm *kvm); int kvm_vgic_hyp_init(void); +void kvm_vgic_init_cpu_hardware(void); int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, bool level); diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 276139a24e6f..702f8108608d 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -391,6 +391,25 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data) return IRQ_HANDLED; } +/** + * kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware + * + * For a specific CPU, initialize the GIC VE hardware. + */ +void kvm_vgic_init_cpu_hardware(void) +{ + BUG_ON(preemptible()); + + /* + * We want to make sure the list registers start out clear so that we + * only have the program the used registers. + */ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_init_lrs(); + else + kvm_call_hyp(__vgic_v3_init_lrs); +} + /** * kvm_vgic_hyp_init: populates the kvm_vgic_global_state variable * according to the host GIC model. Accordingly calls either diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index b834ecdf3225..94cf4b9b6471 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -36,6 +36,21 @@ static unsigned long *u64_to_bitmask(u64 *val) return (unsigned long *)val; } +static inline void vgic_v2_write_lr(int lr, u32 val) +{ + void __iomem *base = kvm_vgic_global_state.vctrl_base; + + writel_relaxed(val, base + GICH_LR0 + (lr * 4)); +} + +void vgic_v2_init_lrs(void) +{ + int i; + + for (i = 0; i < kvm_vgic_global_state.nr_lr; i++) + vgic_v2_write_lr(i, 0); +} + void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu) { struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index db28f7cadab2..91566f5aac9b 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -130,6 +130,8 @@ int vgic_v2_map_resources(struct kvm *kvm); int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, enum vgic_type); +void vgic_v2_init_lrs(void); + static inline void vgic_get_irq_kref(struct vgic_irq *irq) { if (irq->intid < VGIC_MIN_LPI) -- cgit v1.2.3 From 6d56111c92d247bb64301029fe88365aa4caf16e Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 21 Mar 2017 22:05:22 +0100 Subject: KVM: arm/arm64: vgic: Fix GICC_PMR uaccess on GICv3 and clarify ABI As an oversight, for GICv2, we accidentally export the GICC_PMR register in the format of the GICH_VMCR.VMPriMask field in the lower 5 bits of a word, meaning that userspace must always use the lower 5 bits to communicate with the KVM device and must shift the value left by 3 places to obtain the actual priority mask level. Since GICv3 supports the full 8 bits of priority masking in the ICH_VMCR, we have to fix the value we export when emulating a GICv2 on top of a hardware GICv3 and exporting the emulated GICv2 state to userspace. Take the chance to clarify this aspect of the ABI. Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall --- Documentation/virtual/kvm/devices/arm-vgic.txt | 6 ++++++ include/linux/irqchip/arm-gic.h | 3 +++ virt/kvm/arm/vgic/vgic-mmio-v2.c | 20 ++++++++++++++++++-- virt/kvm/arm/vgic/vgic-v2.c | 8 ++++---- virt/kvm/arm/vgic/vgic.h | 9 ++++++++- 5 files changed, 39 insertions(+), 7 deletions(-) diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt index 76e61c883347..b2f60ca8b60c 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic.txt +++ b/Documentation/virtual/kvm/devices/arm-vgic.txt @@ -83,6 +83,12 @@ Groups: Bits for undefined preemption levels are RAZ/WI. + For historical reasons and to provide ABI compatibility with userspace we + export the GICC_PMR register in the format of the GICH_VMCR.VMPriMask + field in the lower 5 bits of a word, meaning that userspace must always + use the lower 5 bits to communicate with the KVM device and must shift the + value left by 3 places to obtain the actual priority mask level. + Limitations: - Priorities are not implemented, and registers are RAZ/WI - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2. diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index eafc965b3eb8..dc30f3d057eb 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -96,6 +96,9 @@ #define GICH_MISR_EOI (1 << 0) #define GICH_MISR_U (1 << 1) +#define GICV_PMR_PRIORITY_SHIFT 3 +#define GICV_PMR_PRIORITY_MASK (0x1f << GICV_PMR_PRIORITY_SHIFT) + #ifndef __ASSEMBLY__ #include diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index a3ad7ff95c9b..0a4283ed9aa7 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -229,7 +229,15 @@ static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu, val = vmcr.ctlr; break; case GIC_CPU_PRIMASK: - val = vmcr.pmr; + /* + * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the + * the PMR field as GICH_VMCR.VMPriMask rather than + * GICC_PMR.Priority, so we expose the upper five bits of + * priority mask to userspace using the lower bits in the + * unsigned long. + */ + val = (vmcr.pmr & GICV_PMR_PRIORITY_MASK) >> + GICV_PMR_PRIORITY_SHIFT; break; case GIC_CPU_BINPOINT: val = vmcr.bpr; @@ -262,7 +270,15 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu, vmcr.ctlr = val; break; case GIC_CPU_PRIMASK: - vmcr.pmr = val; + /* + * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the + * the PMR field as GICH_VMCR.VMPriMask rather than + * GICC_PMR.Priority, so we expose the upper five bits of + * priority mask to userspace using the lower bits in the + * unsigned long. + */ + vmcr.pmr = (val << GICV_PMR_PRIORITY_SHIFT) & + GICV_PMR_PRIORITY_MASK; break; case GIC_CPU_BINPOINT: vmcr.bpr = val; diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 94cf4b9b6471..b637d9c7afe3 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -206,8 +206,8 @@ void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) GICH_VMCR_ALIAS_BINPOINT_MASK; vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK; - vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & - GICH_VMCR_PRIMASK_MASK; + vmcr |= ((vmcrp->pmr >> GICV_PMR_PRIORITY_SHIFT) << + GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK; vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr; } @@ -222,8 +222,8 @@ void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) GICH_VMCR_ALIAS_BINPOINT_SHIFT; vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT; - vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >> - GICH_VMCR_PRIMASK_SHIFT; + vmcrp->pmr = ((vmcr & GICH_VMCR_PRIMASK_MASK) >> + GICH_VMCR_PRIMASK_SHIFT) << GICV_PMR_PRIORITY_SHIFT; } void vgic_v2_enable(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 91566f5aac9b..6cf557e9f718 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -81,11 +81,18 @@ static inline bool irq_is_pending(struct vgic_irq *irq) return irq->pending_latch || irq->line_level; } +/* + * This struct provides an intermediate representation of the fields contained + * in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC + * state to userspace can generate either GICv2 or GICv3 CPU interface + * registers regardless of the hardware backed GIC used. + */ struct vgic_vmcr { u32 ctlr; u32 abpr; u32 bpr; - u32 pmr; + u32 pmr; /* Priority mask field in the GICC_PMR and + * ICC_PMR_EL1 priority field format */ /* Below member variable are valid only for GICv3 */ u32 grpen0; u32 grpen1; -- cgit v1.2.3 From 48fe9e9488743eec9b7c1addd3c93f12f2123d54 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 4 Apr 2017 14:56:05 +1000 Subject: powerpc: Don't try to fix up misaligned load-with-reservation instructions In the past, there was only one load-with-reservation instruction, lwarx, and if a program attempted a lwarx on a misaligned address, it would take an alignment interrupt and the kernel handler would emulate it as though it was lwzx, which was not really correct, but benign since it is loading the right amount of data, and the lwarx should be paired with a stwcx. to the same address, which would also cause an alignment interrupt which would result in a SIGBUS being delivered to the process. We now have 5 different sizes of load-with-reservation instruction. Of those, lharx and ldarx cause an immediate SIGBUS by luck since their entries in aligninfo[] overlap instructions which were not fixed up, but lqarx overlaps with lhz and will be emulated as such. lbarx can never generate an alignment interrupt since it only operates on 1 byte. To straighten this out and fix the lqarx case, this adds code to detect the l[hwdq]arx instructions and return without fixing them up, resulting in a SIGBUS being delivered to the process. Cc: stable@vger.kernel.org Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/align.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index cbc7c42cdb74..ec7a8b099dd9 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -807,14 +807,25 @@ int fix_alignment(struct pt_regs *regs) nb = aligninfo[instr].len; flags = aligninfo[instr].flags; - /* ldbrx/stdbrx overlap lfs/stfs in the DSISR unfortunately */ - if (IS_XFORM(instruction) && ((instruction >> 1) & 0x3ff) == 532) { - nb = 8; - flags = LD+SW; - } else if (IS_XFORM(instruction) && - ((instruction >> 1) & 0x3ff) == 660) { - nb = 8; - flags = ST+SW; + /* + * Handle some cases which give overlaps in the DSISR values. + */ + if (IS_XFORM(instruction)) { + switch (get_xop(instruction)) { + case 532: /* ldbrx */ + nb = 8; + flags = LD+SW; + break; + case 660: /* stdbrx */ + nb = 8; + flags = ST+SW; + break; + case 20: /* lwarx */ + case 84: /* ldarx */ + case 116: /* lharx */ + case 276: /* lqarx */ + return 0; /* not emulated ever */ + } } /* Byteswap little endian loads and stores */ -- cgit v1.2.3 From 794a8604fe6e4a311373cde57a86ad4aab9d32b8 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Mon, 3 Apr 2017 17:35:12 -0500 Subject: PCI: dwc: Fix dw_pcie_ops NULL pointer dereference Fix a crash from dereferencing a NULL dw_pcie_ops pointer. For example, on ARTPEC-6: Unable to handle kernel NULL pointer dereference at virtual address 00000004 pgd = c0204000 [00000004] *pgd=00000000 Internal error: Oops: 5 [#1] SMP ARM Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.11.0-rc3-next-20170321 #1 Hardware name: Axis ARTPEC-6 Platform task: db098000 task.stack: db096000 PC is at dw_pcie_writel_dbi+0x2c/0xd0 Prior to 442ec4c04d12 ("PCI: dwc: all: Split struct pcie_port into host-only and core structures"), every driver had a struct pcie_host_ops with function pointers, typically used as: if (pp->ops->readl_rc) return pp->ops->readl_rc(...); 442ec4c04d12 split struct pcie_host_ops into two pieces: struct dw_pcie_host_ops and struct dw_pcie_ops, so the above became: if (pci->ops->readl_dbi) return pci->ops->readl_dbi(...); But pcie-artpec6.c and pcie-designware-plat.c don't need the dw_pcie_ops pointers and didn't supply a pci->ops struct, which leads to NULL pointer dereferences. Supply an empty struct dw_pcie_ops to avoid the NULL pointer dereferences. [bhelgaas: changelog] Fixes: 442ec4c04d12 ("PCI: dwc: all: Split struct pcie_port into host-only and core structures") Signed-off-by: Niklas Cassel Signed-off-by: Bjorn Helgaas Acked-by: Kishon Vijay Abraham I Acked-by: Joao Pinto --- drivers/pci/dwc/pcie-artpec6.c | 4 ++++ drivers/pci/dwc/pcie-designware-plat.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/drivers/pci/dwc/pcie-artpec6.c b/drivers/pci/dwc/pcie-artpec6.c index fcd3ef845883..6d23683c0892 100644 --- a/drivers/pci/dwc/pcie-artpec6.c +++ b/drivers/pci/dwc/pcie-artpec6.c @@ -234,6 +234,9 @@ static int artpec6_add_pcie_port(struct artpec6_pcie *artpec6_pcie, return 0; } +static const struct dw_pcie_ops dw_pcie_ops = { +}; + static int artpec6_pcie_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -252,6 +255,7 @@ static int artpec6_pcie_probe(struct platform_device *pdev) return -ENOMEM; pci->dev = dev; + pci->ops = &dw_pcie_ops; artpec6_pcie->pci = pci; diff --git a/drivers/pci/dwc/pcie-designware-plat.c b/drivers/pci/dwc/pcie-designware-plat.c index b6c832ba39dd..f20d494922ab 100644 --- a/drivers/pci/dwc/pcie-designware-plat.c +++ b/drivers/pci/dwc/pcie-designware-plat.c @@ -86,6 +86,9 @@ static int dw_plat_add_pcie_port(struct pcie_port *pp, return 0; } +static const struct dw_pcie_ops dw_pcie_ops = { +}; + static int dw_plat_pcie_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -103,6 +106,7 @@ static int dw_plat_pcie_probe(struct platform_device *pdev) return -ENOMEM; pci->dev = dev; + pci->ops = &dw_pcie_ops; dw_plat_pcie->pci = pci; -- cgit v1.2.3 From ab007cc94ff9d82f5a8db8363b3becbd946e58cf Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Fri, 31 Mar 2017 10:19:26 +0200 Subject: KVM: nVMX: do not leak PML full vmexit to L1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PML feature is not exposed to guests so we should not be forwarding the vmexit either. This commit fixes BSOD 0x20001 (HYPERVISOR_ERROR) when running Hyper-V enabled Windows Server 2016 in L1 on hardware that supports PML. Fixes: 843e4330573c ("KVM: VMX: Add PML support in VMX") Signed-off-by: Ladi Prosek Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2ee00dbbbd51..605183291069 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8198,6 +8198,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); case EXIT_REASON_PREEMPTION_TIMER: return false; + case EXIT_REASON_PML_FULL: + /* We don't expose PML support to L1. */ + return false; default: return true; } -- cgit v1.2.3 From ac4cde398a96c1d28b1c28a0f69b6efd892a1c8a Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 4 Apr 2017 06:27:22 -0600 Subject: xenbus: remove transaction holder from list before freeing After allocation the item is being placed on the list right away. Consequently it needs to be taken off the list before freeing in the case xenbus_dev_request_and_reply() failed, as in that case the callback (xenbus_dev_queue_reply()) is not being called (and if it was called, it should do both). Fixes: 5584ea250ae44f929feb4c7bd3877d1c5edbf813 Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky --- drivers/xen/xenbus/xenbus_dev_frontend.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index 1f4733b80c87..f3b089b7c0b6 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -442,8 +442,10 @@ static int xenbus_write_transaction(unsigned msg_type, return xenbus_command_reply(u, XS_ERROR, "ENOENT"); rc = xenbus_dev_request_and_reply(&u->u.msg, u); - if (rc) + if (rc && trans) { + list_del(&trans->list); kfree(trans); + } out: return rc; -- cgit v1.2.3 From 1fb883bb827ee8efc1cc9ea0154f953f8a219d38 Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Tue, 4 Apr 2017 14:18:53 +0200 Subject: KVM: nVMX: initialize PML fields in vmcs02 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit L2 was running with uninitialized PML fields which led to incomplete dirty bitmap logging. This manifested as all kinds of subtle erratic behavior of the nested guest. Fixes: 843e4330573c ("KVM: VMX: Add PML support in VMX") Signed-off-by: Ladi Prosek Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 605183291069..259e9b28ccf8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10270,6 +10270,18 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, } + if (enable_pml) { + /* + * Conceptually we want to copy the PML address and index from + * vmcs01 here, and then back to vmcs01 on nested vmexit. But, + * since we always flush the log on each vmexit, this happens + * to be equivalent to simply resetting the fields in vmcs02. + */ + ASSERT(vmx->pml_pg); + vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); + vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); + } + if (nested_cpu_has_ept(vmcs12)) { kvm_mmu_unload(vcpu); nested_ept_init_mmu_context(vcpu); -- cgit v1.2.3 From e08293a4ccbcc993ded0fdc46f1e57926b833d63 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 3 Apr 2017 12:03:13 +0200 Subject: l2tp: take reference on sessions being dumped Take a reference on the sessions returned by l2tp_session_find_nth() (and rename it l2tp_session_get_nth() to reflect this change), so that caller is assured that the session isn't going to disappear while processing it. For procfs and debugfs handlers, the session is held in the .start() callback and dropped in .show(). Given that pppol2tp_seq_session_show() dereferences the associated PPPoL2TP socket and that l2tp_dfs_seq_session_show() might call pppol2tp_show(), we also need to call the session's .ref() callback to prevent the socket from going away from under us. Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts") Fixes: 0ad6614048cf ("l2tp: Add debugfs files for dumping l2tp debug info") Fixes: 309795f4bec2 ("l2tp: Add netlink control API for L2TP") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 8 ++++++-- net/l2tp/l2tp_core.h | 3 ++- net/l2tp/l2tp_debugfs.c | 10 +++++++--- net/l2tp/l2tp_netlink.c | 7 +++++-- net/l2tp/l2tp_ppp.c | 10 +++++++--- 5 files changed, 27 insertions(+), 11 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index e927422d8c58..e37d9554da7b 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -327,7 +327,8 @@ struct l2tp_session *l2tp_session_get(struct net *net, } EXPORT_SYMBOL_GPL(l2tp_session_get); -struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth) +struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth, + bool do_ref) { int hash; struct l2tp_session *session; @@ -337,6 +338,9 @@ struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth) for (hash = 0; hash < L2TP_HASH_SIZE; hash++) { hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) { if (++count > nth) { + l2tp_session_inc_refcount(session); + if (do_ref && session->ref) + session->ref(session); read_unlock_bh(&tunnel->hlist_lock); return session; } @@ -347,7 +351,7 @@ struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth) return NULL; } -EXPORT_SYMBOL_GPL(l2tp_session_find_nth); +EXPORT_SYMBOL_GPL(l2tp_session_get_nth); /* Lookup a session by interface name. * This is very inefficient but is only used by management interfaces. diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 3b9b704a84e4..8ce7818c7a9d 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -236,7 +236,8 @@ struct l2tp_session *l2tp_session_get(struct net *net, struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id); -struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth); +struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth, + bool do_ref); struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname, bool do_ref); struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id); diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index 2d6760a2ae34..d100aed3d06f 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -53,7 +53,7 @@ static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd) static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd) { - pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx); + pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true); pd->session_idx++; if (pd->session == NULL) { @@ -238,10 +238,14 @@ static int l2tp_dfs_seq_show(struct seq_file *m, void *v) } /* Show the tunnel or session context */ - if (pd->session == NULL) + if (!pd->session) { l2tp_dfs_seq_tunnel_show(m, pd->tunnel); - else + } else { l2tp_dfs_seq_session_show(m, pd->session); + if (pd->session->deref) + pd->session->deref(pd->session); + l2tp_session_dec_refcount(pd->session); + } out: return 0; diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 93e317377c66..7e3e669baac4 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -867,7 +867,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback goto out; } - session = l2tp_session_find_nth(tunnel, si); + session = l2tp_session_get_nth(tunnel, si, false); if (session == NULL) { ti++; tunnel = NULL; @@ -877,8 +877,11 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - session, L2TP_CMD_SESSION_GET) < 0) + session, L2TP_CMD_SESSION_GET) < 0) { + l2tp_session_dec_refcount(session); break; + } + l2tp_session_dec_refcount(session); si++; } diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 26501902d1a7..7bf73091baa2 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1560,7 +1560,7 @@ static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd) static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd) { - pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx); + pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true); pd->session_idx++; if (pd->session == NULL) { @@ -1687,10 +1687,14 @@ static int pppol2tp_seq_show(struct seq_file *m, void *v) /* Show the tunnel or session context. */ - if (pd->session == NULL) + if (!pd->session) { pppol2tp_seq_tunnel_show(m, pd->tunnel); - else + } else { pppol2tp_seq_session_show(m, pd->session); + if (pd->session->deref) + pd->session->deref(pd->session); + l2tp_session_dec_refcount(pd->session); + } out: return 0; -- cgit v1.2.3 From a8919661d78b90d747b3514197e49ecf8727d08c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 3 Apr 2017 11:19:10 +0100 Subject: bnx2x: fix spelling mistake in macros HW_INTERRUT_ASSERT_SET_* Trival fix, rename HW_INTERRUT_ASSERT_SET_* to HW_INTERRUPT_ASSERT_SET_* Signed-off-by: Colin Ian King Acked-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 6 +++--- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 0a23034bbe3f..352beff796ae 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -2277,7 +2277,7 @@ void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id, GENERAL_ATTEN_OFFSET(LATCHED_ATTN_RBCP) | \ GENERAL_ATTEN_OFFSET(LATCHED_ATTN_RSVD_GRC)) -#define HW_INTERRUT_ASSERT_SET_0 \ +#define HW_INTERRUPT_ASSERT_SET_0 \ (AEU_INPUTS_ATTN_BITS_TSDM_HW_INTERRUPT | \ AEU_INPUTS_ATTN_BITS_TCM_HW_INTERRUPT | \ AEU_INPUTS_ATTN_BITS_TSEMI_HW_INTERRUPT | \ @@ -2290,7 +2290,7 @@ void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id, AEU_INPUTS_ATTN_BITS_TSEMI_PARITY_ERROR |\ AEU_INPUTS_ATTN_BITS_TCM_PARITY_ERROR |\ AEU_INPUTS_ATTN_BITS_PBCLIENT_PARITY_ERROR) -#define HW_INTERRUT_ASSERT_SET_1 \ +#define HW_INTERRUPT_ASSERT_SET_1 \ (AEU_INPUTS_ATTN_BITS_QM_HW_INTERRUPT | \ AEU_INPUTS_ATTN_BITS_TIMERS_HW_INTERRUPT | \ AEU_INPUTS_ATTN_BITS_XSDM_HW_INTERRUPT | \ @@ -2318,7 +2318,7 @@ void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id, AEU_INPUTS_ATTN_BITS_UPB_PARITY_ERROR | \ AEU_INPUTS_ATTN_BITS_CSDM_PARITY_ERROR |\ AEU_INPUTS_ATTN_BITS_CCM_PARITY_ERROR) -#define HW_INTERRUT_ASSERT_SET_2 \ +#define HW_INTERRUPT_ASSERT_SET_2 \ (AEU_INPUTS_ATTN_BITS_CSEMI_HW_INTERRUPT | \ AEU_INPUTS_ATTN_BITS_CDU_HW_INTERRUPT | \ AEU_INPUTS_ATTN_BITS_DMAE_HW_INTERRUPT | \ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index ac76fc251d26..a851f95c307a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -4166,14 +4166,14 @@ static void bnx2x_attn_int_deasserted0(struct bnx2x *bp, u32 attn) bnx2x_release_phy_lock(bp); } - if (attn & HW_INTERRUT_ASSERT_SET_0) { + if (attn & HW_INTERRUPT_ASSERT_SET_0) { val = REG_RD(bp, reg_offset); - val &= ~(attn & HW_INTERRUT_ASSERT_SET_0); + val &= ~(attn & HW_INTERRUPT_ASSERT_SET_0); REG_WR(bp, reg_offset, val); BNX2X_ERR("FATAL HW block attention set0 0x%x\n", - (u32)(attn & HW_INTERRUT_ASSERT_SET_0)); + (u32)(attn & HW_INTERRUPT_ASSERT_SET_0)); bnx2x_panic(); } } @@ -4191,7 +4191,7 @@ static void bnx2x_attn_int_deasserted1(struct bnx2x *bp, u32 attn) BNX2X_ERR("FATAL error from DORQ\n"); } - if (attn & HW_INTERRUT_ASSERT_SET_1) { + if (attn & HW_INTERRUPT_ASSERT_SET_1) { int port = BP_PORT(bp); int reg_offset; @@ -4200,11 +4200,11 @@ static void bnx2x_attn_int_deasserted1(struct bnx2x *bp, u32 attn) MISC_REG_AEU_ENABLE1_FUNC_0_OUT_1); val = REG_RD(bp, reg_offset); - val &= ~(attn & HW_INTERRUT_ASSERT_SET_1); + val &= ~(attn & HW_INTERRUPT_ASSERT_SET_1); REG_WR(bp, reg_offset, val); BNX2X_ERR("FATAL HW block attention set1 0x%x\n", - (u32)(attn & HW_INTERRUT_ASSERT_SET_1)); + (u32)(attn & HW_INTERRUPT_ASSERT_SET_1)); bnx2x_panic(); } } @@ -4235,7 +4235,7 @@ static void bnx2x_attn_int_deasserted2(struct bnx2x *bp, u32 attn) } } - if (attn & HW_INTERRUT_ASSERT_SET_2) { + if (attn & HW_INTERRUPT_ASSERT_SET_2) { int port = BP_PORT(bp); int reg_offset; @@ -4244,11 +4244,11 @@ static void bnx2x_attn_int_deasserted2(struct bnx2x *bp, u32 attn) MISC_REG_AEU_ENABLE1_FUNC_0_OUT_2); val = REG_RD(bp, reg_offset); - val &= ~(attn & HW_INTERRUT_ASSERT_SET_2); + val &= ~(attn & HW_INTERRUPT_ASSERT_SET_2); REG_WR(bp, reg_offset, val); BNX2X_ERR("FATAL HW block attention set2 0x%x\n", - (u32)(attn & HW_INTERRUT_ASSERT_SET_2)); + (u32)(attn & HW_INTERRUPT_ASSERT_SET_2)); bnx2x_panic(); } } -- cgit v1.2.3 From 249ee819e24c180909f43c1173c8ef6724d21faf Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 3 Apr 2017 13:23:15 +0200 Subject: l2tp: fix PPP pseudo-wire auto-loading PPP pseudo-wire type is 7 (11 is L2TP_PWTYPE_IP). Fixes: f1f39f911027 ("l2tp: auto load type modules") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ppp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 7bf73091baa2..861b255a2d51 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1853,4 +1853,4 @@ MODULE_DESCRIPTION("PPP over L2TP over UDP"); MODULE_LICENSE("GPL"); MODULE_VERSION(PPPOL2TP_DRV_VERSION); MODULE_ALIAS_NET_PF_PROTO(PF_PPPOX, PX_PROTO_OL2TP); -MODULE_ALIAS_L2TP_PWTYPE(11); +MODULE_ALIAS_L2TP_PWTYPE(7); -- cgit v1.2.3 From 30c57f073449e09ae42f908bfff56c08c8751a6f Mon Sep 17 00:00:00 2001 From: Sekhar Nori Date: Mon, 3 Apr 2017 17:34:28 +0530 Subject: net: ethernet: ti: cpsw: fix race condition during open() TI's cpsw driver handles both OF and non-OF case for phy connect. Unfortunately of_phy_connect() returns NULL on error while phy_connect() returns ERR_PTR(). To handle this, cpsw_slave_open() overrides the return value from phy_connect() to make it NULL or error. This leaves a small window, where cpsw_adjust_link() may be invoked for a slave while slave->phy pointer is temporarily set to -ENODEV (or some other error) before it is finally set to NULL. _cpsw_adjust_link() only handles the NULL case, and an oops results when ERR_PTR() is seen by it. Note that cpsw_adjust_link() checks PHY status for each slave whenever it is invoked. It can so happen that even though phy_connect() for a given slave returns error, _cpsw_adjust_link() is still called for that slave because the link status of another slave changed. Fix this by using a temporary pointer to store return value of {of_}phy_connect() and do a one-time write to slave->phy. Reviewed-by: Grygorii Strashko Reported-by: Yan Liu Signed-off-by: Sekhar Nori Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 58cdc066ef2c..fa674a8bda0c 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1267,6 +1267,7 @@ static void soft_reset_slave(struct cpsw_slave *slave) static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) { u32 slave_port; + struct phy_device *phy; struct cpsw_common *cpsw = priv->cpsw; soft_reset_slave(slave); @@ -1300,27 +1301,28 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) 1 << slave_port, 0, 0, ALE_MCAST_FWD_2); if (slave->data->phy_node) { - slave->phy = of_phy_connect(priv->ndev, slave->data->phy_node, + phy = of_phy_connect(priv->ndev, slave->data->phy_node, &cpsw_adjust_link, 0, slave->data->phy_if); - if (!slave->phy) { + if (!phy) { dev_err(priv->dev, "phy \"%s\" not found on slave %d\n", slave->data->phy_node->full_name, slave->slave_num); return; } } else { - slave->phy = phy_connect(priv->ndev, slave->data->phy_id, + phy = phy_connect(priv->ndev, slave->data->phy_id, &cpsw_adjust_link, slave->data->phy_if); - if (IS_ERR(slave->phy)) { + if (IS_ERR(phy)) { dev_err(priv->dev, "phy \"%s\" not found on slave %d, err %ld\n", slave->data->phy_id, slave->slave_num, - PTR_ERR(slave->phy)); - slave->phy = NULL; + PTR_ERR(phy)); return; } } + slave->phy = phy; + phy_attached_info(slave->phy); phy_start(slave->phy); -- cgit v1.2.3 From 8f5f525d5b83f7d76a6baf9c4e94d4bf312ea7f6 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Mon, 3 Apr 2017 13:25:12 +1000 Subject: powerpc/64: Fix flush_(d|i)cache_range() called from modules When the kernel is compiled to use 64bit ABIv2 the _GLOBAL() macro does not include a global entry point. A function's global entry point is used when the function is called from a different TOC context and in the kernel this typically means a call from a module into the vmlinux (or vice-versa). There are a few exported asm functions declared with _GLOBAL() and calling them from a module will likely crash the kernel since any TOC relative load will yield garbage. flush_icache_range() and flush_dcache_range() are both exported to modules, and use the TOC, so must use _GLOBAL_TOC(). Fixes: 721aeaa9fdf3 ("powerpc: Build little endian ppc64 kernel with ABIv2") Cc: stable@vger.kernel.org # v3.16+ Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/misc_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index ae179cb1bb3c..c119044cad0d 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -67,7 +67,7 @@ PPC64_CACHES: * flush all bytes from start through stop-1 inclusive */ -_GLOBAL(flush_icache_range) +_GLOBAL_TOC(flush_icache_range) BEGIN_FTR_SECTION PURGE_PREFETCHED_INS blr @@ -120,7 +120,7 @@ EXPORT_SYMBOL(flush_icache_range) * * flush all bytes from start to stop-1 inclusive */ -_GLOBAL(flush_dcache_range) +_GLOBAL_TOC(flush_dcache_range) /* * Flush the data cache to memory -- cgit v1.2.3 From 88b1bf7268f56887ca88eb09c6fb0f4fc970121a Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Wed, 29 Mar 2017 19:19:42 +0200 Subject: powerpc/mm: Add missing global TLB invalidate if cxl is active Commit 4c6d9acce1f4 ("powerpc/mm: Add hooks for cxl") converted local TLB invalidates to global if the cxl driver is active. This is necessary because the CAPP snoops invalidations to forward them to the PSL on the cxl adapter. However one path was forgotten. native_flush_hash_range() still does local TLB invalidates, as found out the hard way recently. This patch fixes it by following the same logic as previously: if the cxl driver is active, the local TLB invalidates are 'upgraded' to global. Fixes: 4c6d9acce1f4 ("powerpc/mm: Add hooks for cxl") Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Frederic Barrat Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_native_64.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index cc332608e656..65bb8f33b399 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -638,6 +638,10 @@ static void native_flush_hash_range(unsigned long number, int local) unsigned long psize = batch->psize; int ssize = batch->ssize; int i; + unsigned int use_local; + + use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) && + mmu_psize_defs[psize].tlbiel && !cxl_ctx_in_use(); local_irq_save(flags); @@ -667,8 +671,7 @@ static void native_flush_hash_range(unsigned long number, int local) } pte_iterate_hashed_end(); } - if (mmu_has_feature(MMU_FTR_TLBIEL) && - mmu_psize_defs[psize].tlbiel && local) { + if (use_local) { asm volatile("ptesync":::"memory"); for (i = 0; i < number; i++) { vpn = batch->vpn[i]; -- cgit v1.2.3 From b2376407f98920c9b0c411948675f58a9640be35 Mon Sep 17 00:00:00 2001 From: Vic Yang Date: Fri, 24 Mar 2017 18:44:01 +0100 Subject: mfd: cros-ec: Fix host command buffer size For SPI, we can get up to 32 additional bytes for response preamble. The current overhead (2 bytes) may cause problems when we try to receive a big response. Update it to 32 bytes. Without this fix we could see a kernel BUG when we receive a big response from the Chrome EC when is connected via SPI. Signed-off-by: Vic Yang Tested-by: Enric Balletbo i Serra Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 7a01c94496f1..3eef9fb9968a 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -35,10 +35,11 @@ * Max bus-specific overhead incurred by request/responses. * I2C requires 1 additional byte for requests. * I2C requires 2 additional bytes for responses. + * SPI requires up to 32 additional bytes for responses. * */ #define EC_PROTO_VERSION_UNKNOWN 0 #define EC_MAX_REQUEST_OVERHEAD 1 -#define EC_MAX_RESPONSE_OVERHEAD 2 +#define EC_MAX_RESPONSE_OVERHEAD 32 /* * Command interface between EC and AP, for LPC, I2C and SPI interfaces. -- cgit v1.2.3 From ec360607a25fae97c81eef2f02268ae8ed3649b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Mon, 3 Apr 2017 18:12:04 +0300 Subject: crypto: caam - fix JR platform device subsequent (re)creations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The way Job Ring platform devices are created and released does not allow for multiple create-release cycles. JR0 Platform device creation error JR0 Platform device creation error caam 2100000.caam: no queues configured, terminating caam: probe of 2100000.caam failed with error -12 The reason is that platform devices are created for each job ring: for_each_available_child_of_node(nprop, np) if (of_device_is_compatible(np, "fsl,sec-v4.0-job-ring") || of_device_is_compatible(np, "fsl,sec4.0-job-ring")) { ctrlpriv->jrpdev[ring] = of_platform_device_create(np, NULL, dev); which sets OF_POPULATED on the device node, but then it cleans these up: /* Remove platform devices for JobRs */ for (ring = 0; ring < ctrlpriv->total_jobrs; ring++) { if (ctrlpriv->jrpdev[ring]) of_device_unregister(ctrlpriv->jrpdev[ring]); } which leaves OF_POPULATED set. Use of_platform_populate / of_platform_depopulate instead. This allows for a bit of driver clean-up, jrpdev is no longer needed. Logic changes a bit too: -exit in case of_platform_populate fails, since currently even QI backend depends on JR; true, we no longer support the case when "some" of the JR DT nodes are incorrect -when cleaning up, caam_remove() would also depopulate RTIC in case it would have been populated somewhere else - not the case for now Cc: Fixes: 313ea293e9c4d ("crypto: caam - Add Platform driver for Job Ring") Reported-by: Russell King Suggested-by: Rob Herring Signed-off-by: Horia Geantă Acked-by: Rob Herring Signed-off-by: Herbert Xu --- drivers/crypto/caam/ctrl.c | 63 +++++++++++++------------------------------- drivers/crypto/caam/intern.h | 1 - 2 files changed, 19 insertions(+), 45 deletions(-) diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index fef39f9f41ee..220f94bd1635 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -301,15 +301,13 @@ static int caam_remove(struct platform_device *pdev) struct device *ctrldev; struct caam_drv_private *ctrlpriv; struct caam_ctrl __iomem *ctrl; - int ring; ctrldev = &pdev->dev; ctrlpriv = dev_get_drvdata(ctrldev); ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl; - /* Remove platform devices for JobRs */ - for (ring = 0; ring < ctrlpriv->total_jobrs; ring++) - of_device_unregister(ctrlpriv->jrpdev[ring]); + /* Remove platform devices under the crypto node */ + of_platform_depopulate(ctrldev); /* De-initialize RNG state handles initialized by this driver. */ if (ctrlpriv->rng4_sh_init) @@ -418,10 +416,21 @@ DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u32_ro, caam_debugfs_u32_get, NULL, "%llu\n"); DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u64_ro, caam_debugfs_u64_get, NULL, "%llu\n"); #endif +static const struct of_device_id caam_match[] = { + { + .compatible = "fsl,sec-v4.0", + }, + { + .compatible = "fsl,sec4.0", + }, + {}, +}; +MODULE_DEVICE_TABLE(of, caam_match); + /* Probe routine for CAAM top (controller) level */ static int caam_probe(struct platform_device *pdev) { - int ret, ring, ridx, rspec, gen_sk, ent_delay = RTSDCTL_ENT_DLY_MIN; + int ret, ring, gen_sk, ent_delay = RTSDCTL_ENT_DLY_MIN; u64 caam_id; struct device *dev; struct device_node *nprop, *np; @@ -597,47 +606,24 @@ static int caam_probe(struct platform_device *pdev) goto iounmap_ctrl; } - /* - * Detect and enable JobRs - * First, find out how many ring spec'ed, allocate references - * for all, then go probe each one. - */ - rspec = 0; - for_each_available_child_of_node(nprop, np) - if (of_device_is_compatible(np, "fsl,sec-v4.0-job-ring") || - of_device_is_compatible(np, "fsl,sec4.0-job-ring")) - rspec++; - - ctrlpriv->jrpdev = devm_kcalloc(&pdev->dev, rspec, - sizeof(*ctrlpriv->jrpdev), GFP_KERNEL); - if (ctrlpriv->jrpdev == NULL) { - ret = -ENOMEM; + ret = of_platform_populate(nprop, caam_match, NULL, dev); + if (ret) { + dev_err(dev, "JR platform devices creation error\n"); goto iounmap_ctrl; } ring = 0; - ridx = 0; - ctrlpriv->total_jobrs = 0; for_each_available_child_of_node(nprop, np) if (of_device_is_compatible(np, "fsl,sec-v4.0-job-ring") || of_device_is_compatible(np, "fsl,sec4.0-job-ring")) { - ctrlpriv->jrpdev[ring] = - of_platform_device_create(np, NULL, dev); - if (!ctrlpriv->jrpdev[ring]) { - pr_warn("JR physical index %d: Platform device creation error\n", - ridx); - ridx++; - continue; - } ctrlpriv->jr[ring] = (struct caam_job_ring __iomem __force *) ((__force uint8_t *)ctrl + - (ridx + JR_BLOCK_NUMBER) * + (ring + JR_BLOCK_NUMBER) * BLOCK_OFFSET ); ctrlpriv->total_jobrs++; ring++; - ridx++; - } + } /* Check to see if QI present. If so, enable */ ctrlpriv->qi_present = @@ -847,17 +833,6 @@ disable_caam_ipg: return ret; } -static struct of_device_id caam_match[] = { - { - .compatible = "fsl,sec-v4.0", - }, - { - .compatible = "fsl,sec4.0", - }, - {}, -}; -MODULE_DEVICE_TABLE(of, caam_match); - static struct platform_driver caam_driver = { .driver = { .name = "caam", diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h index e2bcacc1a921..dbed8baeebe5 100644 --- a/drivers/crypto/caam/intern.h +++ b/drivers/crypto/caam/intern.h @@ -66,7 +66,6 @@ struct caam_drv_private_jr { struct caam_drv_private { struct device *dev; - struct platform_device **jrpdev; /* Alloc'ed array per sub-device */ struct platform_device *pdev; /* Physical-presence section */ -- cgit v1.2.3 From 33fa46d7b310e06d2cb2ab5417c100af120bfb65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Mon, 3 Apr 2017 18:30:07 +0300 Subject: crypto: caam - fix invalid dereference in caam_rsa_init_tfm() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case caam_jr_alloc() fails, ctx->dev carries the error code, thus accessing it with dev_err() is incorrect. Cc: # 4.8+ Fixes: 8c419778ab57e ("crypto: caam - add support for RSA algorithm") Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caampkc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c index 32100c4851dd..49cbdcba7883 100644 --- a/drivers/crypto/caam/caampkc.c +++ b/drivers/crypto/caam/caampkc.c @@ -506,7 +506,7 @@ static int caam_rsa_init_tfm(struct crypto_akcipher *tfm) ctx->dev = caam_jr_alloc(); if (IS_ERR(ctx->dev)) { - dev_err(ctx->dev, "Job Ring Device allocation for transform failed\n"); + pr_err("Job Ring Device allocation for transform failed\n"); return PTR_ERR(ctx->dev); } -- cgit v1.2.3 From 40c98cb57cdbc377456116ad4582c89e329721b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Wed, 5 Apr 2017 11:41:03 +0300 Subject: crypto: caam - fix RNG deinstantiation error checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RNG instantiation was previously fixed by commit 62743a4145bb9 ("crypto: caam - fix RNG init descriptor ret. code checking") while deinstantiation was not addressed. Since the descriptors used are similar, in the sense that they both end with a JUMP HALT command, checking for errors should be similar too, i.e. status code 7000_0000h should be considered successful. Cc: # 3.13+ Fixes: 1005bccd7a4a6 ("crypto: caam - enable instantiation of all RNG4 state handles") Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/ctrl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 220f94bd1635..5d7f73d60515 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -281,7 +281,8 @@ static int deinstantiate_rng(struct device *ctrldev, int state_handle_mask) /* Try to run it through DECO0 */ ret = run_descriptor_deco0(ctrldev, desc, &status); - if (ret || status) { + if (ret || + (status && status != JRSTA_SSRC_JUMP_HALT_CC)) { dev_err(ctrldev, "Failed to deinstantiate RNG4 SH%d\n", sh_idx); -- cgit v1.2.3 From 62277de758b155dc04b78f195a1cb5208c37b2df Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 17 Jun 2016 17:33:59 +0000 Subject: ring-buffer: Fix return value check in test_ringbuffer() In case of error, the function kthread_run() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Link: http://lkml.kernel.org/r/1466184839-14927-1-git-send-email-weiyj_lk@163.com Cc: stable@vger.kernel.org Fixes: 6c43e554a ("ring-buffer: Add ring buffer startup selftest") Signed-off-by: Wei Yongjun Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ring_buffer.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 96fc3c043ad6..54e7a90db848 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -4826,9 +4826,9 @@ static __init int test_ringbuffer(void) rb_data[cpu].cnt = cpu; rb_threads[cpu] = kthread_create(rb_test, &rb_data[cpu], "rbtester/%d", cpu); - if (WARN_ON(!rb_threads[cpu])) { + if (WARN_ON(IS_ERR(rb_threads[cpu]))) { pr_cont("FAILED\n"); - ret = -1; + ret = PTR_ERR(rb_threads[cpu]); goto out_free; } @@ -4838,9 +4838,9 @@ static __init int test_ringbuffer(void) /* Now create the rb hammer! */ rb_hammer = kthread_run(rb_hammer_test, NULL, "rbhammer"); - if (WARN_ON(!rb_hammer)) { + if (WARN_ON(IS_ERR(rb_hammer))) { pr_cont("FAILED\n"); - ret = -1; + ret = PTR_ERR(rb_hammer); goto out_free; } -- cgit v1.2.3 From ef62a2d81f73d9cddef14bc3d9097a57010d551c Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 31 Mar 2017 10:37:44 +0100 Subject: metag/usercopy: Drop unused macros Metag's lib/usercopy.c has a bunch of copy_from_user macros for larger copies between 5 and 16 bytes which are completely unused. Before fixing zeroing lets drop these macros so there is less to fix. Signed-off-by: James Hogan Cc: Al Viro Cc: linux-metag@vger.kernel.org Cc: stable@vger.kernel.org --- arch/metag/lib/usercopy.c | 113 ---------------------------------------------- 1 file changed, 113 deletions(-) diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c index b3ebfe9c8e88..b4eb1f17069f 100644 --- a/arch/metag/lib/usercopy.c +++ b/arch/metag/lib/usercopy.c @@ -651,119 +651,6 @@ EXPORT_SYMBOL(__copy_user); #define __asm_copy_from_user_4(to, from, ret) \ __asm_copy_from_user_4x_cont(to, from, ret, "", "", "") -#define __asm_copy_from_user_5(to, from, ret) \ - __asm_copy_from_user_4x_cont(to, from, ret, \ - " GETB D1Ar1,[%1++]\n" \ - "4: SETB [%0++],D1Ar1\n", \ - "5: ADD %2,%2,#1\n" \ - " SETB [%0++],D1Ar1\n", \ - " .long 4b,5b\n") - -#define __asm_copy_from_user_6x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ - __asm_copy_from_user_4x_cont(to, from, ret, \ - " GETW D1Ar1,[%1++]\n" \ - "4: SETW [%0++],D1Ar1\n" COPY, \ - "5: ADD %2,%2,#2\n" \ - " SETW [%0++],D1Ar1\n" FIXUP, \ - " .long 4b,5b\n" TENTRY) - -#define __asm_copy_from_user_6(to, from, ret) \ - __asm_copy_from_user_6x_cont(to, from, ret, "", "", "") - -#define __asm_copy_from_user_7(to, from, ret) \ - __asm_copy_from_user_6x_cont(to, from, ret, \ - " GETB D1Ar1,[%1++]\n" \ - "6: SETB [%0++],D1Ar1\n", \ - "7: ADD %2,%2,#1\n" \ - " SETB [%0++],D1Ar1\n", \ - " .long 6b,7b\n") - -#define __asm_copy_from_user_8x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ - __asm_copy_from_user_4x_cont(to, from, ret, \ - " GETD D1Ar1,[%1++]\n" \ - "4: SETD [%0++],D1Ar1\n" COPY, \ - "5: ADD %2,%2,#4\n" \ - " SETD [%0++],D1Ar1\n" FIXUP, \ - " .long 4b,5b\n" TENTRY) - -#define __asm_copy_from_user_8(to, from, ret) \ - __asm_copy_from_user_8x_cont(to, from, ret, "", "", "") - -#define __asm_copy_from_user_9(to, from, ret) \ - __asm_copy_from_user_8x_cont(to, from, ret, \ - " GETB D1Ar1,[%1++]\n" \ - "6: SETB [%0++],D1Ar1\n", \ - "7: ADD %2,%2,#1\n" \ - " SETB [%0++],D1Ar1\n", \ - " .long 6b,7b\n") - -#define __asm_copy_from_user_10x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ - __asm_copy_from_user_8x_cont(to, from, ret, \ - " GETW D1Ar1,[%1++]\n" \ - "6: SETW [%0++],D1Ar1\n" COPY, \ - "7: ADD %2,%2,#2\n" \ - " SETW [%0++],D1Ar1\n" FIXUP, \ - " .long 6b,7b\n" TENTRY) - -#define __asm_copy_from_user_10(to, from, ret) \ - __asm_copy_from_user_10x_cont(to, from, ret, "", "", "") - -#define __asm_copy_from_user_11(to, from, ret) \ - __asm_copy_from_user_10x_cont(to, from, ret, \ - " GETB D1Ar1,[%1++]\n" \ - "8: SETB [%0++],D1Ar1\n", \ - "9: ADD %2,%2,#1\n" \ - " SETB [%0++],D1Ar1\n", \ - " .long 8b,9b\n") - -#define __asm_copy_from_user_12x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ - __asm_copy_from_user_8x_cont(to, from, ret, \ - " GETD D1Ar1,[%1++]\n" \ - "6: SETD [%0++],D1Ar1\n" COPY, \ - "7: ADD %2,%2,#4\n" \ - " SETD [%0++],D1Ar1\n" FIXUP, \ - " .long 6b,7b\n" TENTRY) - -#define __asm_copy_from_user_12(to, from, ret) \ - __asm_copy_from_user_12x_cont(to, from, ret, "", "", "") - -#define __asm_copy_from_user_13(to, from, ret) \ - __asm_copy_from_user_12x_cont(to, from, ret, \ - " GETB D1Ar1,[%1++]\n" \ - "8: SETB [%0++],D1Ar1\n", \ - "9: ADD %2,%2,#1\n" \ - " SETB [%0++],D1Ar1\n", \ - " .long 8b,9b\n") - -#define __asm_copy_from_user_14x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ - __asm_copy_from_user_12x_cont(to, from, ret, \ - " GETW D1Ar1,[%1++]\n" \ - "8: SETW [%0++],D1Ar1\n" COPY, \ - "9: ADD %2,%2,#2\n" \ - " SETW [%0++],D1Ar1\n" FIXUP, \ - " .long 8b,9b\n" TENTRY) - -#define __asm_copy_from_user_14(to, from, ret) \ - __asm_copy_from_user_14x_cont(to, from, ret, "", "", "") - -#define __asm_copy_from_user_15(to, from, ret) \ - __asm_copy_from_user_14x_cont(to, from, ret, \ - " GETB D1Ar1,[%1++]\n" \ - "10: SETB [%0++],D1Ar1\n", \ - "11: ADD %2,%2,#1\n" \ - " SETB [%0++],D1Ar1\n", \ - " .long 10b,11b\n") - -#define __asm_copy_from_user_16x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ - __asm_copy_from_user_12x_cont(to, from, ret, \ - " GETD D1Ar1,[%1++]\n" \ - "8: SETD [%0++],D1Ar1\n" COPY, \ - "9: ADD %2,%2,#4\n" \ - " SETD [%0++],D1Ar1\n" FIXUP, \ - " .long 8b,9b\n" TENTRY) - -#define __asm_copy_from_user_16(to, from, ret) \ - __asm_copy_from_user_16x_cont(to, from, ret, "", "", "") #define __asm_copy_from_user_8x64(to, from, ret) \ asm volatile ( \ -- cgit v1.2.3 From 2257211942bbbf6c798ab70b487d7e62f7835a1a Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 31 Mar 2017 11:23:18 +0100 Subject: metag/usercopy: Fix alignment error checking Fix the error checking of the alignment adjustment code in raw_copy_from_user(), which mistakenly considers it safe to skip the error check when aligning the source buffer on a 2 or 4 byte boundary. If the destination buffer was unaligned it may have started to copy using byte or word accesses, which could well be at the start of a new (valid) source page. This would result in it appearing to have copied 1 or 2 bytes at the end of the first (invalid) page rather than none at all. Fixes: 373cd784d0fc ("metag: Memory handling") Signed-off-by: James Hogan Cc: linux-metag@vger.kernel.org Cc: stable@vger.kernel.org --- arch/metag/lib/usercopy.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c index b4eb1f17069f..a6ced9691ddb 100644 --- a/arch/metag/lib/usercopy.c +++ b/arch/metag/lib/usercopy.c @@ -717,6 +717,8 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, if ((unsigned long) src & 1) { __asm_copy_from_user_1(dst, src, retn); n--; + if (retn) + goto copy_exception_bytes; } if ((unsigned long) dst & 1) { /* Worst case - byte copy */ @@ -730,6 +732,8 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, if (((unsigned long) src & 2) && n >= 2) { __asm_copy_from_user_2(dst, src, retn); n -= 2; + if (retn) + goto copy_exception_bytes; } if ((unsigned long) dst & 2) { /* Second worst case - word copy */ @@ -741,12 +745,6 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, } } - /* We only need one check after the unalignment-adjustments, - because if both adjustments were done, either both or - neither reference had an exception. */ - if (retn != 0) - goto copy_exception_bytes; - #ifdef USE_RAPF /* 64 bit copy loop */ if (!(((unsigned long) src | (unsigned long) dst) & 7)) { -- cgit v1.2.3 From fb8ea062a8f2e85256e13f55696c5c5f0dfdcc8b Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 31 Mar 2017 13:35:01 +0100 Subject: metag/usercopy: Add early abort to copy_to_user When copying to userland on Meta, if any faults are encountered immediately abort the copy instead of continuing on and repeatedly faulting, and worse potentially copying further bytes successfully to subsequent valid pages. Fixes: 373cd784d0fc ("metag: Memory handling") Reported-by: Al Viro Signed-off-by: James Hogan Cc: linux-metag@vger.kernel.org Cc: stable@vger.kernel.org --- arch/metag/lib/usercopy.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c index a6ced9691ddb..714d8562aa20 100644 --- a/arch/metag/lib/usercopy.c +++ b/arch/metag/lib/usercopy.c @@ -538,23 +538,31 @@ unsigned long __copy_user(void __user *pdst, const void *psrc, if ((unsigned long) src & 1) { __asm_copy_to_user_1(dst, src, retn); n--; + if (retn) + return retn + n; } if ((unsigned long) dst & 1) { /* Worst case - byte copy */ while (n > 0) { __asm_copy_to_user_1(dst, src, retn); n--; + if (retn) + return retn + n; } } if (((unsigned long) src & 2) && n >= 2) { __asm_copy_to_user_2(dst, src, retn); n -= 2; + if (retn) + return retn + n; } if ((unsigned long) dst & 2) { /* Second worst case - word copy */ while (n >= 2) { __asm_copy_to_user_2(dst, src, retn); n -= 2; + if (retn) + return retn + n; } } @@ -569,6 +577,8 @@ unsigned long __copy_user(void __user *pdst, const void *psrc, while (n >= 8) { __asm_copy_to_user_8x64(dst, src, retn); n -= 8; + if (retn) + return retn + n; } } if (n >= RAPF_MIN_BUF_SIZE) { @@ -581,6 +591,8 @@ unsigned long __copy_user(void __user *pdst, const void *psrc, while (n >= 8) { __asm_copy_to_user_8x64(dst, src, retn); n -= 8; + if (retn) + return retn + n; } } #endif @@ -588,11 +600,15 @@ unsigned long __copy_user(void __user *pdst, const void *psrc, while (n >= 16) { __asm_copy_to_user_16(dst, src, retn); n -= 16; + if (retn) + return retn + n; } while (n >= 4) { __asm_copy_to_user_4(dst, src, retn); n -= 4; + if (retn) + return retn + n; } switch (n) { @@ -609,6 +625,10 @@ unsigned long __copy_user(void __user *pdst, const void *psrc, break; } + /* + * If we get here, retn correctly reflects the number of failing + * bytes. + */ return retn; } EXPORT_SYMBOL(__copy_user); -- cgit v1.2.3 From 3ebfdf082184d04f6e73b30cd9446613dc7f8c02 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 4 Apr 2017 13:39:55 +0800 Subject: sctp: get sock from transport in sctp_transport_update_pmtu This patch is almost to revert commit 02f3d4ce9e81 ("sctp: Adjust PMTU updates to accomodate route invalidation."). As t->asoc can't be NULL in sctp_transport_update_pmtu, it could get sk from asoc, and no need to pass sk into that function. It is also to remove some duplicated codes from that function. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 5 ++--- include/net/sctp/structs.h | 6 +++--- net/sctp/associola.c | 6 +++--- net/sctp/input.c | 4 ++-- net/sctp/output.c | 4 ++-- net/sctp/socket.c | 6 +++--- net/sctp/transport.c | 19 +++++++------------ 7 files changed, 22 insertions(+), 28 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index d75caa7a629b..069582ee5d7f 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -448,10 +448,9 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu) return frag; } -static inline void sctp_assoc_pending_pmtu(struct sock *sk, struct sctp_association *asoc) +static inline void sctp_assoc_pending_pmtu(struct sctp_association *asoc) { - - sctp_assoc_sync_pmtu(sk, asoc); + sctp_assoc_sync_pmtu(asoc); asoc->pmtu_pending = 0; } diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index a127b7c2c3c9..138f8615acf0 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -952,8 +952,8 @@ void sctp_transport_lower_cwnd(struct sctp_transport *, sctp_lower_cwnd_t); void sctp_transport_burst_limited(struct sctp_transport *); void sctp_transport_burst_reset(struct sctp_transport *); unsigned long sctp_transport_timeout(struct sctp_transport *); -void sctp_transport_reset(struct sctp_transport *); -void sctp_transport_update_pmtu(struct sock *, struct sctp_transport *, u32); +void sctp_transport_reset(struct sctp_transport *t); +void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu); void sctp_transport_immediate_rtx(struct sctp_transport *); void sctp_transport_dst_release(struct sctp_transport *t); void sctp_transport_dst_confirm(struct sctp_transport *t); @@ -1954,7 +1954,7 @@ void sctp_assoc_update(struct sctp_association *old, __u32 sctp_association_get_next_tsn(struct sctp_association *); -void sctp_assoc_sync_pmtu(struct sock *, struct sctp_association *); +void sctp_assoc_sync_pmtu(struct sctp_association *asoc); void sctp_assoc_rwnd_increase(struct sctp_association *, unsigned int); void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned int); void sctp_assoc_set_primary(struct sctp_association *, diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 0b26df5f6188..a9708da28eb5 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1412,7 +1412,7 @@ sctp_assoc_choose_alter_transport(struct sctp_association *asoc, /* Update the association's pmtu and frag_point by going through all the * transports. This routine is called when a transport's PMTU has changed. */ -void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) +void sctp_assoc_sync_pmtu(struct sctp_association *asoc) { struct sctp_transport *t; __u32 pmtu = 0; @@ -1424,8 +1424,8 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) { if (t->pmtu_pending && t->dst) { - sctp_transport_update_pmtu(sk, t, - SCTP_TRUNC4(dst_mtu(t->dst))); + sctp_transport_update_pmtu( + t, SCTP_TRUNC4(dst_mtu(t->dst))); t->pmtu_pending = 0; } if (!pmtu || (t->pathmtu < pmtu)) diff --git a/net/sctp/input.c b/net/sctp/input.c index 2a28ab20487f..0e06a278d2a9 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -401,10 +401,10 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, if (t->param_flags & SPP_PMTUD_ENABLE) { /* Update transports view of the MTU */ - sctp_transport_update_pmtu(sk, t, pmtu); + sctp_transport_update_pmtu(t, pmtu); /* Update association pmtu. */ - sctp_assoc_sync_pmtu(sk, asoc); + sctp_assoc_sync_pmtu(asoc); } /* Retransmit with the new pmtu setting. diff --git a/net/sctp/output.c b/net/sctp/output.c index ec4d50a38713..1409a875ad8e 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -105,10 +105,10 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag, if (!sctp_transport_dst_check(tp)) { sctp_transport_route(tp, NULL, sctp_sk(sk)); if (asoc->param_flags & SPP_PMTUD_ENABLE) - sctp_assoc_sync_pmtu(sk, asoc); + sctp_assoc_sync_pmtu(asoc); } else if (!sctp_transport_pmtu_check(tp)) { if (asoc->param_flags & SPP_PMTUD_ENABLE) - sctp_assoc_sync_pmtu(sk, asoc); + sctp_assoc_sync_pmtu(asoc); } /* If there a is a prepend chunk stick it on the list before diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 12fbae2c1002..c1401f43d40f 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1907,7 +1907,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) } if (asoc->pmtu_pending) - sctp_assoc_pending_pmtu(sk, asoc); + sctp_assoc_pending_pmtu(asoc); /* If fragmentation is disabled and the message length exceeds the * association fragmentation point, return EMSGSIZE. The I-D @@ -2435,7 +2435,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, if ((params->spp_flags & SPP_PMTUD_DISABLE) && params->spp_pathmtu) { if (trans) { trans->pathmtu = params->spp_pathmtu; - sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc); + sctp_assoc_sync_pmtu(asoc); } else if (asoc) { asoc->pathmtu = params->spp_pathmtu; } else { @@ -2451,7 +2451,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, (trans->param_flags & ~SPP_PMTUD) | pmtud_change; if (update) { sctp_transport_pmtu(trans, sctp_opt2sk(sp)); - sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc); + sctp_assoc_sync_pmtu(asoc); } } else if (asoc) { asoc->param_flags = diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 3379668af368..721eeebfcd8a 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -251,14 +251,13 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } -void sctp_transport_update_pmtu(struct sock *sk, struct sctp_transport *t, u32 pmtu) +void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) { - struct dst_entry *dst; + struct dst_entry *dst = sctp_transport_dst_check(t); if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n", - __func__, pmtu, - SCTP_DEFAULT_MINSEGMENT); + __func__, pmtu, SCTP_DEFAULT_MINSEGMENT); /* Use default minimum segment size and disable * pmtu discovery on this transport. */ @@ -267,17 +266,13 @@ void sctp_transport_update_pmtu(struct sock *sk, struct sctp_transport *t, u32 p t->pathmtu = pmtu; } - dst = sctp_transport_dst_check(t); - if (!dst) - t->af_specific->get_dst(t, &t->saddr, &t->fl, sk); - if (dst) { - dst->ops->update_pmtu(dst, sk, NULL, pmtu); - + dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu); dst = sctp_transport_dst_check(t); - if (!dst) - t->af_specific->get_dst(t, &t->saddr, &t->fl, sk); } + + if (!dst) + t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk); } /* Caches the dst entry and source address for a transport's destination -- cgit v1.2.3 From 563ddc1076109f2b3f88e6d355eab7b6fd4662cb Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 31 Mar 2017 11:14:02 +0100 Subject: metag/usercopy: Zero rest of buffer from copy_from_user Currently we try to zero the destination for a failed read from userland in fixup code in the usercopy.c macros. The rest of the destination buffer is then zeroed from __copy_user_zeroing(), which is used for both copy_from_user() and __copy_from_user(). Unfortunately we fail to zero in the fixup code as D1Ar1 is set to 0 before the fixup code entry labels, and __copy_from_user() shouldn't even be zeroing the rest of the buffer. Move the zeroing out into copy_from_user() and rename __copy_user_zeroing() to raw_copy_from_user() since it no longer does any zeroing. This also conveniently matches the name needed for RAW_COPY_USER support in a later patch. Fixes: 373cd784d0fc ("metag: Memory handling") Reported-by: Al Viro Signed-off-by: James Hogan Cc: linux-metag@vger.kernel.org Cc: stable@vger.kernel.org --- arch/metag/include/asm/uaccess.h | 15 ++++++----- arch/metag/lib/usercopy.c | 57 +++++++++++++--------------------------- 2 files changed, 26 insertions(+), 46 deletions(-) diff --git a/arch/metag/include/asm/uaccess.h b/arch/metag/include/asm/uaccess.h index 273e61225c27..07238b39638c 100644 --- a/arch/metag/include/asm/uaccess.h +++ b/arch/metag/include/asm/uaccess.h @@ -197,20 +197,21 @@ extern long __must_check strnlen_user(const char __user *src, long count); #define strlen_user(str) strnlen_user(str, 32767) -extern unsigned long __must_check __copy_user_zeroing(void *to, - const void __user *from, - unsigned long n); +extern unsigned long raw_copy_from_user(void *to, const void __user *from, + unsigned long n); static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) { + unsigned long res = n; if (likely(access_ok(VERIFY_READ, from, n))) - return __copy_user_zeroing(to, from, n); - memset(to, 0, n); - return n; + res = raw_copy_from_user(to, from, n); + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; } -#define __copy_from_user(to, from, n) __copy_user_zeroing(to, from, n) +#define __copy_from_user(to, from, n) raw_copy_from_user(to, from, n) #define __copy_from_user_inatomic __copy_from_user extern unsigned long __must_check __copy_user(void __user *to, diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c index 714d8562aa20..e1d553872fd7 100644 --- a/arch/metag/lib/usercopy.c +++ b/arch/metag/lib/usercopy.c @@ -29,7 +29,6 @@ COPY \ "1:\n" \ " .section .fixup,\"ax\"\n" \ - " MOV D1Ar1,#0\n" \ FIXUP \ " MOVT D1Ar1,#HI(1b)\n" \ " JUMP D1Ar1,#LO(1b)\n" \ @@ -637,16 +636,14 @@ EXPORT_SYMBOL(__copy_user); __asm_copy_user_cont(to, from, ret, \ " GETB D1Ar1,[%1++]\n" \ "2: SETB [%0++],D1Ar1\n", \ - "3: ADD %2,%2,#1\n" \ - " SETB [%0++],D1Ar1\n", \ + "3: ADD %2,%2,#1\n", \ " .long 2b,3b\n") #define __asm_copy_from_user_2x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ __asm_copy_user_cont(to, from, ret, \ " GETW D1Ar1,[%1++]\n" \ "2: SETW [%0++],D1Ar1\n" COPY, \ - "3: ADD %2,%2,#2\n" \ - " SETW [%0++],D1Ar1\n" FIXUP, \ + "3: ADD %2,%2,#2\n" FIXUP, \ " .long 2b,3b\n" TENTRY) #define __asm_copy_from_user_2(to, from, ret) \ @@ -656,32 +653,26 @@ EXPORT_SYMBOL(__copy_user); __asm_copy_from_user_2x_cont(to, from, ret, \ " GETB D1Ar1,[%1++]\n" \ "4: SETB [%0++],D1Ar1\n", \ - "5: ADD %2,%2,#1\n" \ - " SETB [%0++],D1Ar1\n", \ + "5: ADD %2,%2,#1\n", \ " .long 4b,5b\n") #define __asm_copy_from_user_4x_cont(to, from, ret, COPY, FIXUP, TENTRY) \ __asm_copy_user_cont(to, from, ret, \ " GETD D1Ar1,[%1++]\n" \ "2: SETD [%0++],D1Ar1\n" COPY, \ - "3: ADD %2,%2,#4\n" \ - " SETD [%0++],D1Ar1\n" FIXUP, \ + "3: ADD %2,%2,#4\n" FIXUP, \ " .long 2b,3b\n" TENTRY) #define __asm_copy_from_user_4(to, from, ret) \ __asm_copy_from_user_4x_cont(to, from, ret, "", "", "") - #define __asm_copy_from_user_8x64(to, from, ret) \ asm volatile ( \ " GETL D0Ar2,D1Ar1,[%1++]\n" \ "2: SETL [%0++],D0Ar2,D1Ar1\n" \ "1:\n" \ " .section .fixup,\"ax\"\n" \ - " MOV D1Ar1,#0\n" \ - " MOV D0Ar2,#0\n" \ "3: ADD %2,%2,#8\n" \ - " SETL [%0++],D0Ar2,D1Ar1\n" \ " MOVT D0Ar2,#HI(1b)\n" \ " JUMP D0Ar2,#LO(1b)\n" \ " .previous\n" \ @@ -721,11 +712,12 @@ EXPORT_SYMBOL(__copy_user); "SUB %1, %1, #4\n") -/* Copy from user to kernel, zeroing the bytes that were inaccessible in - userland. The return-value is the number of bytes that were - inaccessible. */ -unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, - unsigned long n) +/* + * Copy from user to kernel. The return-value is the number of bytes that were + * inaccessible. + */ +unsigned long raw_copy_from_user(void *pdst, const void __user *psrc, + unsigned long n) { register char *dst asm ("A0.2") = pdst; register const char __user *src asm ("A1.2") = psrc; @@ -738,7 +730,7 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, __asm_copy_from_user_1(dst, src, retn); n--; if (retn) - goto copy_exception_bytes; + return retn + n; } if ((unsigned long) dst & 1) { /* Worst case - byte copy */ @@ -746,14 +738,14 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, __asm_copy_from_user_1(dst, src, retn); n--; if (retn) - goto copy_exception_bytes; + return retn + n; } } if (((unsigned long) src & 2) && n >= 2) { __asm_copy_from_user_2(dst, src, retn); n -= 2; if (retn) - goto copy_exception_bytes; + return retn + n; } if ((unsigned long) dst & 2) { /* Second worst case - word copy */ @@ -761,7 +753,7 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, __asm_copy_from_user_2(dst, src, retn); n -= 2; if (retn) - goto copy_exception_bytes; + return retn + n; } } @@ -777,7 +769,7 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, __asm_copy_from_user_8x64(dst, src, retn); n -= 8; if (retn) - goto copy_exception_bytes; + return retn + n; } } @@ -793,7 +785,7 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, __asm_copy_from_user_8x64(dst, src, retn); n -= 8; if (retn) - goto copy_exception_bytes; + return retn + n; } } #endif @@ -803,7 +795,7 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, n -= 4; if (retn) - goto copy_exception_bytes; + return retn + n; } /* If we get here, there were no memory read faults. */ @@ -829,21 +821,8 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc, /* If we get here, retn correctly reflects the number of failing bytes. */ return retn; - - copy_exception_bytes: - /* We already have "retn" bytes cleared, and need to clear the - remaining "n" bytes. A non-optimized simple byte-for-byte in-line - memset is preferred here, since this isn't speed-critical code and - we'd rather have this a leaf-function than calling memset. */ - { - char *endp; - for (endp = dst + n; dst < endp; dst++) - *dst = 0; - } - - return retn + n; } -EXPORT_SYMBOL(__copy_user_zeroing); +EXPORT_SYMBOL(raw_copy_from_user); #define __asm_clear_8x64(to, ret) \ asm volatile ( \ -- cgit v1.2.3 From fd40eee1290ad7add7aa665e3ce6b0f9fe9734b4 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 4 Apr 2017 11:43:26 +0100 Subject: metag/usercopy: Set flags before ADDZ The fixup code for the copy_to_user rapf loops reads TXStatus.LSM_STEP to decide how far to rewind the source pointer. There is a special case for the last execution of an MGETL/MGETD, since it leaves LSM_STEP=0 even though the number of MGETLs/MGETDs attempted was 4. This uses ADDZ which is conditional upon the Z condition flag, but the AND instruction which masked the TXStatus.LSM_STEP field didn't set the condition flags based on the result. Fix that now by using ANDS which does set the flags, and also marking the condition codes as clobbered by the inline assembly. Fixes: 373cd784d0fc ("metag: Memory handling") Signed-off-by: James Hogan Cc: linux-metag@vger.kernel.org Cc: stable@vger.kernel.org --- arch/metag/lib/usercopy.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c index e1d553872fd7..4422928a1746 100644 --- a/arch/metag/lib/usercopy.c +++ b/arch/metag/lib/usercopy.c @@ -315,7 +315,7 @@ " .previous\n" \ : "=r" (to), "=r" (from), "=r" (ret), "=d" (n) \ : "0" (to), "1" (from), "2" (ret), "3" (n) \ - : "D1Ar1", "D0Ar2", "memory") + : "D1Ar1", "D0Ar2", "cc", "memory") /* rewind 'to' and 'from' pointers when a fault occurs * @@ -341,7 +341,7 @@ #define __asm_copy_to_user_64bit_rapf_loop(to, from, ret, n, id)\ __asm_copy_user_64bit_rapf_loop(to, from, ret, n, id, \ "LSR D0Ar2, D0Ar2, #8\n" \ - "AND D0Ar2, D0Ar2, #0x7\n" \ + "ANDS D0Ar2, D0Ar2, #0x7\n" \ "ADDZ D0Ar2, D0Ar2, #4\n" \ "SUB D0Ar2, D0Ar2, #1\n" \ "MOV D1Ar1, #4\n" \ @@ -486,7 +486,7 @@ " .previous\n" \ : "=r" (to), "=r" (from), "=r" (ret), "=d" (n) \ : "0" (to), "1" (from), "2" (ret), "3" (n) \ - : "D1Ar1", "D0Ar2", "memory") + : "D1Ar1", "D0Ar2", "cc", "memory") /* rewind 'to' and 'from' pointers when a fault occurs * @@ -512,7 +512,7 @@ #define __asm_copy_to_user_32bit_rapf_loop(to, from, ret, n, id)\ __asm_copy_user_32bit_rapf_loop(to, from, ret, n, id, \ "LSR D0Ar2, D0Ar2, #8\n" \ - "AND D0Ar2, D0Ar2, #0x7\n" \ + "ANDS D0Ar2, D0Ar2, #0x7\n" \ "ADDZ D0Ar2, D0Ar2, #4\n" \ "SUB D0Ar2, D0Ar2, #1\n" \ "MOV D1Ar1, #4\n" \ -- cgit v1.2.3 From 2c0b1df88b987a12d95ea1d6beaf01894f3cc725 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Mon, 3 Apr 2017 17:41:40 +0100 Subject: metag/usercopy: Fix src fixup in from user rapf loops The fixup code to rewind the source pointer in __asm_copy_from_user_{32,64}bit_rapf_loop() always rewound the source by a single unit (4 or 8 bytes), however this is insufficient if the fault didn't occur on the first load in the loop, as the source pointer will have been incremented but nothing will have been stored until all 4 register [pairs] are loaded. Read the LSM_STEP field of TXSTATUS (which is already loaded into a register), a bit like the copy_to_user versions, to determine how many iterations of MGET[DL] have taken place, all of which need rewinding. Fixes: 373cd784d0fc ("metag: Memory handling") Signed-off-by: James Hogan Cc: linux-metag@vger.kernel.org Cc: stable@vger.kernel.org --- arch/metag/lib/usercopy.c | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c index 4422928a1746..e09c95ba028c 100644 --- a/arch/metag/lib/usercopy.c +++ b/arch/metag/lib/usercopy.c @@ -687,29 +687,49 @@ EXPORT_SYMBOL(__copy_user); * * Rationale: * A fault occurs while reading from user buffer, which is the - * source. Since the fault is at a single address, we only - * need to rewind by 8 bytes. + * source. * Since we don't write to kernel buffer until we read first, * the kernel buffer is at the right state and needn't be - * corrected. + * corrected, but the source must be rewound to the beginning of + * the block, which is LSM_STEP*8 bytes. + * LSM_STEP is bits 10:8 in TXSTATUS which is already read + * and stored in D0Ar2 + * + * NOTE: If a fault occurs at the last operation in M{G,S}ETL + * LSM_STEP will be 0. ie: we do 4 writes in our case, if + * a fault happens at the 4th write, LSM_STEP will be 0 + * instead of 4. The code copes with that. */ #define __asm_copy_from_user_64bit_rapf_loop(to, from, ret, n, id) \ __asm_copy_user_64bit_rapf_loop(to, from, ret, n, id, \ - "SUB %1, %1, #8\n") + "LSR D0Ar2, D0Ar2, #5\n" \ + "ANDS D0Ar2, D0Ar2, #0x38\n" \ + "ADDZ D0Ar2, D0Ar2, #32\n" \ + "SUB %1, %1, D0Ar2\n") /* rewind 'from' pointer when a fault occurs * * Rationale: * A fault occurs while reading from user buffer, which is the - * source. Since the fault is at a single address, we only - * need to rewind by 4 bytes. + * source. * Since we don't write to kernel buffer until we read first, * the kernel buffer is at the right state and needn't be - * corrected. + * corrected, but the source must be rewound to the beginning of + * the block, which is LSM_STEP*4 bytes. + * LSM_STEP is bits 10:8 in TXSTATUS which is already read + * and stored in D0Ar2 + * + * NOTE: If a fault occurs at the last operation in M{G,S}ETL + * LSM_STEP will be 0. ie: we do 4 writes in our case, if + * a fault happens at the 4th write, LSM_STEP will be 0 + * instead of 4. The code copes with that. */ #define __asm_copy_from_user_32bit_rapf_loop(to, from, ret, n, id) \ __asm_copy_user_32bit_rapf_loop(to, from, ret, n, id, \ - "SUB %1, %1, #4\n") + "LSR D0Ar2, D0Ar2, #6\n" \ + "ANDS D0Ar2, D0Ar2, #0x1c\n" \ + "ADDZ D0Ar2, D0Ar2, #16\n" \ + "SUB %1, %1, D0Ar2\n") /* -- cgit v1.2.3 From b884a190afcecdbef34ca508ea5ee88bb7c77861 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 4 Apr 2017 08:51:34 +0100 Subject: metag/usercopy: Add missing fixups The rapf copy loops in the Meta usercopy code is missing some extable entries for HTP cores with unaligned access checking enabled, where faults occur on the instruction immediately after the faulting access. Add the fixup labels and extable entries for these cases so that corner case user copy failures don't cause kernel crashes. Fixes: 373cd784d0fc ("metag: Memory handling") Signed-off-by: James Hogan Cc: linux-metag@vger.kernel.org Cc: stable@vger.kernel.org --- arch/metag/lib/usercopy.c | 72 +++++++++++++++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 24 deletions(-) diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c index e09c95ba028c..2792fc621088 100644 --- a/arch/metag/lib/usercopy.c +++ b/arch/metag/lib/usercopy.c @@ -259,27 +259,31 @@ "MGETL D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ "22:\n" \ "MSETL [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ - "SUB %3, %3, #32\n" \ "23:\n" \ - "MGETL D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ + "SUB %3, %3, #32\n" \ "24:\n" \ + "MGETL D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ + "25:\n" \ "MSETL [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "26:\n" \ "SUB %3, %3, #32\n" \ "DCACHE [%1+#-64], D0Ar6\n" \ "BR $Lloop"id"\n" \ \ "MOV RAPF, %1\n" \ - "25:\n" \ + "27:\n" \ "MGETL D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ - "26:\n" \ + "28:\n" \ "MSETL [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "29:\n" \ "SUB %3, %3, #32\n" \ - "27:\n" \ + "30:\n" \ "MGETL D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ - "28:\n" \ + "31:\n" \ "MSETL [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "32:\n" \ "SUB %0, %0, #8\n" \ - "29:\n" \ + "33:\n" \ "SETL [%0++], D0.7, D1.7\n" \ "SUB %3, %3, #32\n" \ "1:" \ @@ -311,7 +315,11 @@ " .long 26b,3b\n" \ " .long 27b,3b\n" \ " .long 28b,3b\n" \ - " .long 29b,4b\n" \ + " .long 29b,3b\n" \ + " .long 30b,3b\n" \ + " .long 31b,3b\n" \ + " .long 32b,3b\n" \ + " .long 33b,4b\n" \ " .previous\n" \ : "=r" (to), "=r" (from), "=r" (ret), "=d" (n) \ : "0" (to), "1" (from), "2" (ret), "3" (n) \ @@ -402,47 +410,55 @@ "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ "22:\n" \ "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ - "SUB %3, %3, #16\n" \ "23:\n" \ - "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ - "24:\n" \ - "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ "SUB %3, %3, #16\n" \ - "25:\n" \ + "24:\n" \ "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ - "26:\n" \ + "25:\n" \ "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "26:\n" \ "SUB %3, %3, #16\n" \ "27:\n" \ "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ "28:\n" \ "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "29:\n" \ + "SUB %3, %3, #16\n" \ + "30:\n" \ + "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ + "31:\n" \ + "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "32:\n" \ "SUB %3, %3, #16\n" \ "DCACHE [%1+#-64], D0Ar6\n" \ "BR $Lloop"id"\n" \ \ "MOV RAPF, %1\n" \ - "29:\n" \ + "33:\n" \ "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ - "30:\n" \ + "34:\n" \ "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "35:\n" \ "SUB %3, %3, #16\n" \ - "31:\n" \ + "36:\n" \ "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ - "32:\n" \ + "37:\n" \ "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "38:\n" \ "SUB %3, %3, #16\n" \ - "33:\n" \ + "39:\n" \ "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ - "34:\n" \ + "40:\n" \ "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "41:\n" \ "SUB %3, %3, #16\n" \ - "35:\n" \ + "42:\n" \ "MGETD D0FrT, D0.5, D0.6, D0.7, [%1++]\n" \ - "36:\n" \ + "43:\n" \ "MSETD [%0++], D0FrT, D0.5, D0.6, D0.7\n" \ + "44:\n" \ "SUB %0, %0, #4\n" \ - "37:\n" \ + "45:\n" \ "SETD [%0++], D0.7\n" \ "SUB %3, %3, #16\n" \ "1:" \ @@ -482,7 +498,15 @@ " .long 34b,3b\n" \ " .long 35b,3b\n" \ " .long 36b,3b\n" \ - " .long 37b,4b\n" \ + " .long 37b,3b\n" \ + " .long 38b,3b\n" \ + " .long 39b,3b\n" \ + " .long 40b,3b\n" \ + " .long 41b,3b\n" \ + " .long 42b,3b\n" \ + " .long 43b,3b\n" \ + " .long 44b,3b\n" \ + " .long 45b,4b\n" \ " .previous\n" \ : "=r" (to), "=r" (from), "=r" (ret), "=d" (n) \ : "0" (to), "1" (from), "2" (ret), "3" (n) \ -- cgit v1.2.3 From ecde8f36f8a05a023b9d026e9094571aab421d36 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Tue, 4 Apr 2017 14:15:39 -0700 Subject: tcp: fix lost retransmit SNMP under-counting The lost retransmit SNMP stat is under-counting retransmission that uses segment offloading. This patch fixes that so all retransmission related SNMP counters are consistent. Fixes: 10d3be569243 ("tcp-tso: do not split TSO packets at retransmit time") Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_recovery.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 4ecb38ae8504..d8acbd9f477a 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -12,7 +12,8 @@ static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb) /* Account for retransmits that are lost again */ TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT); + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT, + tcp_skb_pcount(skb)); } } -- cgit v1.2.3 From 2d2517ee314ef1de0517f74d06c2825fbf597ba3 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Tue, 4 Apr 2017 14:15:40 -0700 Subject: tcp: fix reordering SNMP under-counting Currently the reordering SNMP counters only increase if a connection sees a higher degree then it has previously seen. It ignores if the reordering degree is not greater than the default system threshold. This significantly under-counts the number of reordering events and falsely convey that reordering is rare on the network. This patch properly and faithfully records the number of reordering events detected by the TCP stack, just like the comment says "this exciting event is worth to be remembered". Note that even so TCP still under-estimate the actual reordering events because TCP requires TS options or certain packet sequences to detect reordering (i.e. ACKing never-retransmitted sequence in recovery or disordered state). Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 97ac6776e47d..2c1f59386a7b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -878,22 +878,11 @@ static void tcp_update_reordering(struct sock *sk, const int metric, const int ts) { struct tcp_sock *tp = tcp_sk(sk); - if (metric > tp->reordering) { - int mib_idx; + int mib_idx; + if (metric > tp->reordering) { tp->reordering = min(sysctl_tcp_max_reordering, metric); - /* This exciting event is worth to be remembered. 8) */ - if (ts) - mib_idx = LINUX_MIB_TCPTSREORDER; - else if (tcp_is_reno(tp)) - mib_idx = LINUX_MIB_TCPRENOREORDER; - else if (tcp_is_fack(tp)) - mib_idx = LINUX_MIB_TCPFACKREORDER; - else - mib_idx = LINUX_MIB_TCPSACKREORDER; - - NET_INC_STATS(sock_net(sk), mib_idx); #if FASTRETRANS_DEBUG > 1 pr_debug("Disorder%d %d %u f%u s%u rr%d\n", tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, @@ -906,6 +895,18 @@ static void tcp_update_reordering(struct sock *sk, const int metric, } tp->rack.reord = 1; + + /* This exciting event is worth to be remembered. 8) */ + if (ts) + mib_idx = LINUX_MIB_TCPTSREORDER; + else if (tcp_is_reno(tp)) + mib_idx = LINUX_MIB_TCPRENOREORDER; + else if (tcp_is_fack(tp)) + mib_idx = LINUX_MIB_TCPFACKREORDER; + else + mib_idx = LINUX_MIB_TCPSACKREORDER; + + NET_INC_STATS(sock_net(sk), mib_idx); } /* This must be called before lost_out is incremented */ -- cgit v1.2.3 From c383bdd14f91562babd269aa7c36b46fee7b6c75 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 4 Apr 2017 15:56:55 -0700 Subject: nfp: fix potential use after free on xdp prog We should unregister the net_device first, before we give back our reference on xdp_prog. Otherwise xdp_prog may be freed before .ndo_stop() disabled the datapath. Found by code inspection. Fixes: ecd63a0217d5 ("nfp: add XDP support in the driver") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 9179a99563af..a41377e26c07 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -3275,9 +3275,10 @@ void nfp_net_netdev_clean(struct net_device *netdev) { struct nfp_net *nn = netdev_priv(netdev); + unregister_netdev(nn->netdev); + if (nn->xdp_prog) bpf_prog_put(nn->xdp_prog); if (nn->bpf_offload_xdp) nfp_net_xdp_offload(nn, NULL); - unregister_netdev(nn->netdev); } -- cgit v1.2.3 From a34f83639490a5cc11a9d5c1b3773d4b6eb69a9e Mon Sep 17 00:00:00 2001 From: Min He Date: Thu, 6 Apr 2017 11:01:45 +0800 Subject: drm/i915/gvt: set the correct default value of CTX STATUS PTR Fix wrong initial csb read pointer value. This fixes the random engine timeout issue in guest when guest boots up. Fixes: 8453d674ae7e ("drm/i915/gvt: vGPU execlist virtualization") Cc: stable@vger.kernel.org # v4.10+ Signed-off-by: Min He Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/execlist.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index f1f426a97aa9..d186c157f65f 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -775,7 +775,8 @@ static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id) _EL_OFFSET_STATUS_PTR); ctx_status_ptr.dw = vgpu_vreg(vgpu, ctx_status_ptr_reg); - ctx_status_ptr.read_ptr = ctx_status_ptr.write_ptr = 0x7; + ctx_status_ptr.read_ptr = 0; + ctx_status_ptr.write_ptr = 0x7; vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw; } -- cgit v1.2.3 From 83bce9c2baa51e439480a713119a73d3c8b61083 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 18 Mar 2017 21:53:05 -0400 Subject: drm/nouveau/mpeg: mthd returns true on success now Signed-off-by: Ilia Mirkin Fixes: 590801c1a3 ("drm/nouveau/mpeg: remove dependence on namedb/engctx lookup") Cc: stable@vger.kernel.org # v4.3+ Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c | 2 +- drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c index 003ac915eaad..8a8895246d26 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c @@ -198,7 +198,7 @@ nv31_mpeg_intr(struct nvkm_engine *engine) } if (type == 0x00000010) { - if (!nv31_mpeg_mthd(mpeg, mthd, data)) + if (nv31_mpeg_mthd(mpeg, mthd, data)) show &= ~0x01000000; } } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c index e536f37e24b0..c3cf02ed468e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c @@ -172,7 +172,7 @@ nv44_mpeg_intr(struct nvkm_engine *engine) } if (type == 0x00000010) { - if (!nv44_mpeg_mthd(subdev->device, mthd, data)) + if (nv44_mpeg_mthd(subdev->device, mthd, data)) show &= ~0x01000000; } } -- cgit v1.2.3 From f94773b9f5ecd1df7c88c2e921924dd41d2020cc Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sat, 18 Mar 2017 16:23:10 -0400 Subject: drm/nouveau/mmu/nv4a: use nv04 mmu rather than the nv44 one The NV4A (aka NV44A) is an oddity in the family. It only comes in AGP and PCI varieties, rather than a core PCIE chip with a bridge for AGP/PCI as necessary. As a result, it appears that the MMU is also non-functional. For AGP cards, the vast majority of the NV4A lineup, this worked out since we force AGP cards to use the nv04 mmu. However for PCI variants, this did not work. Switching to the NV04 MMU makes it work like a charm. Thanks to mwk for the suggestion. This should be a no-op for NV4A AGP boards, as they were using it already. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=70388 Signed-off-by: Ilia Mirkin Cc: stable@vger.kernel.org Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 273562dd6bbd..0fc41db34522 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -714,7 +714,7 @@ nv4a_chipset = { .i2c = nv04_i2c_new, .imem = nv40_instmem_new, .mc = nv44_mc_new, - .mmu = nv44_mmu_new, + .mmu = nv04_mmu_new, .pci = nv40_pci_new, .therm = nv40_therm_new, .timer = nv41_timer_new, -- cgit v1.2.3 From d639fbcc102745187f747a21bdcffcf628e174c8 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 5 Apr 2017 09:12:54 +1000 Subject: drm/nouveau/kms/nv50: fix setting of HeadSetRasterVertBlankDmi method Cc: stable@vger.kernel.org [4.10+] Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nv50_display.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index 0b4440ffbeae..4405ca1fb3e2 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -2036,6 +2036,7 @@ nv50_head_atomic_check_mode(struct nv50_head *head, struct nv50_head_atom *asyh) u32 vbackp = (mode->vtotal - mode->vsync_end) * vscan / ilace; u32 hfrontp = mode->hsync_start - mode->hdisplay; u32 vfrontp = (mode->vsync_start - mode->vdisplay) * vscan / ilace; + u32 blankus; struct nv50_head_mode *m = &asyh->mode; m->h.active = mode->htotal; @@ -2049,9 +2050,10 @@ nv50_head_atomic_check_mode(struct nv50_head *head, struct nv50_head_atom *asyh) m->v.blanks = m->v.active - vfrontp - 1; /*XXX: Safe underestimate, even "0" works */ - m->v.blankus = (m->v.active - mode->vdisplay - 2) * m->h.active; - m->v.blankus *= 1000; - m->v.blankus /= mode->clock; + blankus = (m->v.active - mode->vdisplay - 2) * m->h.active; + blankus *= 1000; + blankus /= mode->clock; + m->v.blankus = blankus; if (mode->flags & DRM_MODE_FLAG_INTERLACE) { m->v.blank2e = m->v.active + m->v.synce + vbackp; -- cgit v1.2.3 From 2907e8670b6ef253bffb33bf47fd2182969cf2a0 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 5 Apr 2017 18:16:14 +1000 Subject: drm/nouveau/kms/nv50: fix double dma_fence_put() when destroying plane state When the atomic support was added to nouveau, the DRM core did not do this. However, later in the same merge window, a commit (drm/fence: add in-fences support) was merged that added it, leading to use-after-frees of the fence object. Cc: stable@vger.kernel.org [4.10+] Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nv50_display.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index 4405ca1fb3e2..a9182d5e6011 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -995,7 +995,6 @@ nv50_wndw_atomic_destroy_state(struct drm_plane *plane, { struct nv50_wndw_atom *asyw = nv50_wndw_atom(state); __drm_atomic_helper_plane_destroy_state(&asyw->state); - dma_fence_put(asyw->state.fence); kfree(asyw); } @@ -1007,7 +1006,6 @@ nv50_wndw_atomic_duplicate_state(struct drm_plane *plane) if (!(asyw = kmalloc(sizeof(*asyw), GFP_KERNEL))) return NULL; __drm_atomic_helper_plane_duplicate_state(plane, &asyw->state); - asyw->state.fence = NULL; asyw->interval = 1; asyw->sema = armw->sema; asyw->ntfy = armw->ntfy; -- cgit v1.2.3 From da2ba564a6dcf46df4f828624ff55531ff11d5b0 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 6 Apr 2017 10:35:26 +1000 Subject: drm/nouveau: initial support (display-only) for GP107 Forked from GP106 implementation. Split out from commit enabling secboot/gr support so that it can be added to earlier kernels. Cc: stable@vger.kernel.org [4.10+] Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 30 +++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 0fc41db34522..3b86a7399567 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2271,6 +2271,35 @@ nv136_chipset = { .fifo = gp100_fifo_new, }; +static const struct nvkm_device_chip +nv137_chipset = { + .name = "GP107", + .bar = gf100_bar_new, + .bios = nvkm_bios_new, + .bus = gf100_bus_new, + .devinit = gm200_devinit_new, + .fb = gp102_fb_new, + .fuse = gm107_fuse_new, + .gpio = gk104_gpio_new, + .i2c = gm200_i2c_new, + .ibus = gm200_ibus_new, + .imem = nv50_instmem_new, + .ltc = gp100_ltc_new, + .mc = gp100_mc_new, + .mmu = gf100_mmu_new, + .pci = gp100_pci_new, + .pmu = gp102_pmu_new, + .timer = gk20a_timer_new, + .top = gk104_top_new, + .ce[0] = gp102_ce_new, + .ce[1] = gp102_ce_new, + .ce[2] = gp102_ce_new, + .ce[3] = gp102_ce_new, + .disp = gp102_disp_new, + .dma = gf119_dma_new, + .fifo = gp100_fifo_new, +}; + static int nvkm_device_event_ctor(struct nvkm_object *object, void *data, u32 size, struct nvkm_notify *notify) @@ -2708,6 +2737,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func, case 0x132: device->chip = &nv132_chipset; break; case 0x134: device->chip = &nv134_chipset; break; case 0x136: device->chip = &nv136_chipset; break; + case 0x137: device->chip = &nv137_chipset; break; default: nvdev_error(device, "unknown chipset (%08x)\n", boot0); goto done; -- cgit v1.2.3 From abd80dcbc400fac878202136645e9acf0e0bfbd9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 17 Mar 2017 23:41:14 +0300 Subject: KVM: PPC: Book3S HV: Check for kmalloc errors in ioctl kzalloc() won't actually fail because sizeof(*resize) is small, but static checkers complain. Signed-off-by: Dan Carpenter Acked-by: David Gibson Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 8c68145ba1bd..710e491206ed 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1487,6 +1487,10 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm, /* start new resize */ resize = kzalloc(sizeof(*resize), GFP_KERNEL); + if (!resize) { + ret = -ENOMEM; + goto out; + } resize->order = shift; resize->kvm = kvm; INIT_WORK(&resize->work, resize_hpt_prepare_work); -- cgit v1.2.3 From 3c1460e934f371bf91963b49a2cf173ee73c2cae Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 6 Apr 2017 14:54:00 +0300 Subject: pwm: lpss: Split Tangier configuration As a preparation for special treatment for Broxton we split Tangier configuration. Fixes: b89b4b7a3d0a ("pwm: lpss: pci: Enable PWM module on Intel Edison") Signed-off-by: Andy Shevchenko Tested-by: Hans de Goede Signed-off-by: Thierry Reding --- drivers/pwm/pwm-lpss-pci.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-lpss-pci.c b/drivers/pwm/pwm-lpss-pci.c index 053088b9b66e..073dfb2337e0 100644 --- a/drivers/pwm/pwm-lpss-pci.c +++ b/drivers/pwm/pwm-lpss-pci.c @@ -38,6 +38,13 @@ static const struct pwm_lpss_boardinfo pwm_lpss_bxt_info = { .base_unit_bits = 22, }; +/* Tangier */ +static const struct pwm_lpss_boardinfo pwm_lpss_tng_info = { + .clk_rate = 19200000, + .npwm = 4, + .base_unit_bits = 22, +}; + static int pwm_lpss_probe_pci(struct pci_dev *pdev, const struct pci_device_id *id) { @@ -97,7 +104,7 @@ static const struct pci_device_id pwm_lpss_pci_ids[] = { { PCI_VDEVICE(INTEL, 0x0ac8), (unsigned long)&pwm_lpss_bxt_info}, { PCI_VDEVICE(INTEL, 0x0f08), (unsigned long)&pwm_lpss_byt_info}, { PCI_VDEVICE(INTEL, 0x0f09), (unsigned long)&pwm_lpss_byt_info}, - { PCI_VDEVICE(INTEL, 0x11a5), (unsigned long)&pwm_lpss_bxt_info}, + { PCI_VDEVICE(INTEL, 0x11a5), (unsigned long)&pwm_lpss_tng_info}, { PCI_VDEVICE(INTEL, 0x1ac8), (unsigned long)&pwm_lpss_bxt_info}, { PCI_VDEVICE(INTEL, 0x2288), (unsigned long)&pwm_lpss_bsw_info}, { PCI_VDEVICE(INTEL, 0x2289), (unsigned long)&pwm_lpss_bsw_info}, -- cgit v1.2.3 From b997e3edca4fb4ab7732ab7240c545da0c360a44 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 6 Apr 2017 14:54:01 +0300 Subject: pwm: lpss: Set enable-bit before waiting for update-bit to go low At least on cherrytrail, the update bit will never go low when the enabled bit is not set. This causes the backlight on my cube iwork8 air tablet to never turn on again after being turned off because in the pwm_lpss_apply enable path pwm_lpss_update will fail causing an error exit and the enable-bit to never get set. Any following pwm_lpss_apply calls will fail the pwm_lpss_is_updating check. Since the docs say that the update bit should be set before the enable-bit, split pwm_lpss_update into setting the update-bit and pwm_lpss_wait_for_update, and move the pwm_lpss_wait_for_update call in the enable path to after setting the enable-bit. Fixes: 10d56a4 ("pwm: lpss: Avoid reconfiguring while UPDATE bit...") Cc: Ilkka Koskinen Signed-off-by: Hans de Goede Signed-off-by: Andy Shevchenko Tested-by: Hans de Goede Signed-off-by: Thierry Reding --- drivers/pwm/pwm-lpss-pci.c | 1 + drivers/pwm/pwm-lpss-platform.c | 1 + drivers/pwm/pwm-lpss.c | 19 +++++++++++++------ drivers/pwm/pwm-lpss.h | 1 + 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/pwm/pwm-lpss-pci.c b/drivers/pwm/pwm-lpss-pci.c index 073dfb2337e0..c1527cb645be 100644 --- a/drivers/pwm/pwm-lpss-pci.c +++ b/drivers/pwm/pwm-lpss-pci.c @@ -36,6 +36,7 @@ static const struct pwm_lpss_boardinfo pwm_lpss_bxt_info = { .clk_rate = 19200000, .npwm = 4, .base_unit_bits = 22, + .bypass = true, }; /* Tangier */ diff --git a/drivers/pwm/pwm-lpss-platform.c b/drivers/pwm/pwm-lpss-platform.c index b22b6fdadb9a..5d6ed1507d29 100644 --- a/drivers/pwm/pwm-lpss-platform.c +++ b/drivers/pwm/pwm-lpss-platform.c @@ -37,6 +37,7 @@ static const struct pwm_lpss_boardinfo pwm_lpss_bxt_info = { .clk_rate = 19200000, .npwm = 4, .base_unit_bits = 22, + .bypass = true, }; static int pwm_lpss_probe_platform(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c index 689d2c1cbead..8db0d40ccacd 100644 --- a/drivers/pwm/pwm-lpss.c +++ b/drivers/pwm/pwm-lpss.c @@ -57,7 +57,7 @@ static inline void pwm_lpss_write(const struct pwm_device *pwm, u32 value) writel(value, lpwm->regs + pwm->hwpwm * PWM_SIZE + PWM); } -static int pwm_lpss_update(struct pwm_device *pwm) +static int pwm_lpss_wait_for_update(struct pwm_device *pwm) { struct pwm_lpss_chip *lpwm = to_lpwm(pwm->chip); const void __iomem *addr = lpwm->regs + pwm->hwpwm * PWM_SIZE + PWM; @@ -65,8 +65,6 @@ static int pwm_lpss_update(struct pwm_device *pwm) u32 val; int err; - pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_SW_UPDATE); - /* * PWM Configuration register has SW_UPDATE bit that is set when a new * configuration is written to the register. The bit is automatically @@ -122,6 +120,12 @@ static void pwm_lpss_prepare(struct pwm_lpss_chip *lpwm, struct pwm_device *pwm, pwm_lpss_write(pwm, ctrl); } +static inline void pwm_lpss_cond_enable(struct pwm_device *pwm, bool cond) +{ + if (cond) + pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_ENABLE); +} + static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_state *state) { @@ -137,18 +141,21 @@ static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm, return ret; } pwm_lpss_prepare(lpwm, pwm, state->duty_cycle, state->period); - ret = pwm_lpss_update(pwm); + pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_SW_UPDATE); + pwm_lpss_cond_enable(pwm, lpwm->info->bypass == false); + ret = pwm_lpss_wait_for_update(pwm); if (ret) { pm_runtime_put(chip->dev); return ret; } - pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_ENABLE); + pwm_lpss_cond_enable(pwm, lpwm->info->bypass == true); } else { ret = pwm_lpss_is_updating(pwm); if (ret) return ret; pwm_lpss_prepare(lpwm, pwm, state->duty_cycle, state->period); - return pwm_lpss_update(pwm); + pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_SW_UPDATE); + return pwm_lpss_wait_for_update(pwm); } } else if (pwm_is_enabled(pwm)) { pwm_lpss_write(pwm, pwm_lpss_read(pwm) & ~PWM_ENABLE); diff --git a/drivers/pwm/pwm-lpss.h b/drivers/pwm/pwm-lpss.h index c94cd7c2695d..98306bb02cfe 100644 --- a/drivers/pwm/pwm-lpss.h +++ b/drivers/pwm/pwm-lpss.h @@ -22,6 +22,7 @@ struct pwm_lpss_boardinfo { unsigned long clk_rate; unsigned int npwm; unsigned long base_unit_bits; + bool bypass; }; struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, struct resource *r, -- cgit v1.2.3 From fe1a83b43869790501ed82b58dc8229023c69f74 Mon Sep 17 00:00:00 2001 From: Xiaolei Yu Date: Thu, 30 Mar 2017 19:43:09 +0800 Subject: HID: uclogic: add support for Ugee Tablet EX07S This device has a different vendor id but responds to initialization. Signed-off-by: Xiaolei Yu Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 1 + drivers/hid/hid-ids.h | 3 +++ drivers/hid/hid-uclogic.c | 2 ++ 3 files changed, 6 insertions(+) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 63ec1993eaaa..86bbd8a1fd4a 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -2096,6 +2096,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UGEE_TABLET_45) }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_DRAWIMAGE_G3) }, { HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_GP0610) }, + { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_TABLET_EX07S) }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SMARTJOY_PLUS) }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SUPER_JOY_BOX_3) }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_DUAL_USB_JOYPAD) }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 4e2648c86c8c..b26c030926c1 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -1028,6 +1028,9 @@ #define USB_DEVICE_ID_UGEE_TABLET_45 0x0045 #define USB_DEVICE_ID_YIYNOVA_TABLET 0x004d +#define USB_VENDOR_ID_UGEE 0x28bd +#define USB_DEVICE_ID_UGEE_TABLET_EX07S 0x0071 + #define USB_VENDOR_ID_UNITEC 0x227d #define USB_DEVICE_ID_UNITEC_USB_TOUCH_0709 0x0709 #define USB_DEVICE_ID_UNITEC_USB_TOUCH_0A19 0x0a19 diff --git a/drivers/hid/hid-uclogic.c b/drivers/hid/hid-uclogic.c index 1509d7287ff3..e3e6e5c893cc 100644 --- a/drivers/hid/hid-uclogic.c +++ b/drivers/hid/hid-uclogic.c @@ -977,6 +977,7 @@ static int uclogic_probe(struct hid_device *hdev, } break; case USB_DEVICE_ID_UGTIZER_TABLET_GP0610: + case USB_DEVICE_ID_UGEE_TABLET_EX07S: /* If this is the pen interface */ if (intf->cur_altsetting->desc.bInterfaceNumber == 1) { rc = uclogic_tablet_enable(hdev); @@ -1069,6 +1070,7 @@ static const struct hid_device_id uclogic_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UGEE_TABLET_45) }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_DRAWIMAGE_G3) }, { HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_GP0610) }, + { HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_TABLET_EX07S) }, { } }; MODULE_DEVICE_TABLE(hid, uclogic_devices); -- cgit v1.2.3 From a900152b5c29aea8134cc7a4c5db25552b3cd8f7 Mon Sep 17 00:00:00 2001 From: David Wu Date: Wed, 1 Mar 2017 19:10:55 +0800 Subject: pwm: rockchip: State of PWM clock should synchronize with PWM enabled state If the PWM was not enabled at U-Boot loader, PWM could not work for clock always disabled at PWM driver. The PWM clock is enabled at beginning of pwm_apply(), but disabled at end of pwm_apply(). If the PWM was enabled at U-Boot loader, PWM clock is always enabled unless closed by ATF. The pwm-backlight might turn off the power at early suspend, should disable PWM clock for saving power consume. It is important to provide opportunity to enable/disable clock at PWM driver, the PWM consumer should ensure correct order to call PWM enable and disable, and PWM driver ensure state of PWM clock synchronized with PWM enabled state. Fixes: 2bf1c98aa5a4 ("pwm: rockchip: Add support for atomic update") Cc: stable@vger.kernel.org Signed-off-by: David Wu Reviewed-by: Boris Brezillon Signed-off-by: Thierry Reding --- drivers/pwm/pwm-rockchip.c | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/drivers/pwm/pwm-rockchip.c b/drivers/pwm/pwm-rockchip.c index ef89df1f7336..744d56197286 100644 --- a/drivers/pwm/pwm-rockchip.c +++ b/drivers/pwm/pwm-rockchip.c @@ -191,6 +191,28 @@ static int rockchip_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, return 0; } +static int rockchip_pwm_enable(struct pwm_chip *chip, + struct pwm_device *pwm, + bool enable, + enum pwm_polarity polarity) +{ + struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip); + int ret; + + if (enable) { + ret = clk_enable(pc->clk); + if (ret) + return ret; + } + + pc->data->set_enable(chip, pwm, enable, polarity); + + if (!enable) + clk_disable(pc->clk); + + return 0; +} + static int rockchip_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_state *state) { @@ -207,22 +229,26 @@ static int rockchip_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, return ret; if (state->polarity != curstate.polarity && enabled) { - pc->data->set_enable(chip, pwm, false, state->polarity); + ret = rockchip_pwm_enable(chip, pwm, false, state->polarity); + if (ret) + goto out; enabled = false; } ret = rockchip_pwm_config(chip, pwm, state->duty_cycle, state->period); if (ret) { if (enabled != curstate.enabled) - pc->data->set_enable(chip, pwm, !enabled, - state->polarity); - + rockchip_pwm_enable(chip, pwm, !enabled, + state->polarity); goto out; } - if (state->enabled != enabled) - pc->data->set_enable(chip, pwm, state->enabled, - state->polarity); + if (state->enabled != enabled) { + ret = rockchip_pwm_enable(chip, pwm, state->enabled, + state->polarity); + if (ret) + goto out; + } /* * Update the state with the real hardware, which can differ a bit -- cgit v1.2.3 From 9ae34dbd8afd790cb5f52467e4f816434379eafa Mon Sep 17 00:00:00 2001 From: Tom Hromatka Date: Fri, 31 Mar 2017 16:31:42 -0600 Subject: sparc64: Fix kernel panic due to erroneous #ifdef surrounding pmd_write() This commit moves sparc64's prototype of pmd_write() outside of the CONFIG_TRANSPARENT_HUGEPAGE ifdef. In 2013, commit a7b9403f0e6d ("sparc64: Encode huge PMDs using PTE encoding.") exposed a path where pmd_write() could be called without CONFIG_TRANSPARENT_HUGEPAGE defined. This can result in the panic below. The diff is awkward to read, but the changes are straightforward. pmd_write() was moved outside of #ifdef CONFIG_TRANSPARENT_HUGEPAGE. Also, __HAVE_ARCH_PMD_WRITE was defined. kernel BUG at include/asm-generic/pgtable.h:576! \|/ ____ \|/ "@'/ .. \`@" /_| \__/ |_\ \__U_/ oracle_8114_cdb(8114): Kernel bad sw trap 5 [#1] CPU: 120 PID: 8114 Comm: oracle_8114_cdb Not tainted 4.1.12-61.7.1.el6uek.rc1.sparc64 #1 task: fff8400700a24d60 ti: fff8400700bc4000 task.ti: fff8400700bc4000 TSTATE: 0000004411e01607 TPC: 00000000004609f8 TNPC: 00000000004609fc Y: 00000005 Not tainted TPC: g0: 000000000001c000 g1: 0000000000ef3954 g2: 0000000000000000 g3: 0000000000000001 g4: fff8400700a24d60 g5: fff8001fa5c10000 g6: fff8400700bc4000 g7: 0000000000000720 o0: 0000000000bc5058 o1: 0000000000000240 o2: 0000000000006000 o3: 0000000000001c00 o4: 0000000000000000 o5: 0000048000080000 sp: fff8400700bc6ab1 ret_pc: 00000000004609f0 RPC: l0: fff8400700bc74fc l1: 0000000000020000 l2: 0000000000002000 l3: 0000000000000000 l4: fff8001f93250950 l5: 000000000113f800 l6: 0000000000000004 l7: 0000000000000000 i0: fff8400700ca46a0 i1: bd0000085e800453 i2: 000000026a0c4000 i3: 000000026a0c6000 i4: 0000000000000001 i5: fff800070c958de8 i6: fff8400700bc6b61 i7: 0000000000460dd0 I7: Call Trace: [0000000000460dd0] gup_pud_range+0x170/0x1a0 [0000000000460e84] get_user_pages_fast+0x84/0x120 [00000000006f5a18] iov_iter_get_pages+0x98/0x240 [00000000005fa744] do_direct_IO+0xf64/0x1e00 [00000000005fbbc0] __blockdev_direct_IO+0x360/0x15a0 [00000000101f74fc] ext4_ind_direct_IO+0xdc/0x400 [ext4] [00000000101af690] ext4_ext_direct_IO+0x1d0/0x2c0 [ext4] [00000000101af86c] ext4_direct_IO+0xec/0x220 [ext4] [0000000000553bd4] generic_file_read_iter+0x114/0x140 [00000000005bdc2c] __vfs_read+0xac/0x100 [00000000005bf254] vfs_read+0x54/0x100 [00000000005bf368] SyS_pread64+0x68/0x80 Signed-off-by: Tom Hromatka Signed-off-by: David S. Miller --- arch/sparc/include/asm/pgtable_64.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 8a598528ec1f..6fbd931f0570 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -679,26 +679,27 @@ static inline unsigned long pmd_pfn(pmd_t pmd) return pte_pfn(pte); } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline unsigned long pmd_dirty(pmd_t pmd) +#define __HAVE_ARCH_PMD_WRITE +static inline unsigned long pmd_write(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); - return pte_dirty(pte); + return pte_write(pte); } -static inline unsigned long pmd_young(pmd_t pmd) +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline unsigned long pmd_dirty(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); - return pte_young(pte); + return pte_dirty(pte); } -static inline unsigned long pmd_write(pmd_t pmd) +static inline unsigned long pmd_young(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); - return pte_write(pte); + return pte_young(pte); } static inline unsigned long pmd_trans_huge(pmd_t pmd) -- cgit v1.2.3 From 76811263b3fa6347699a446cddeb63badf3e6095 Mon Sep 17 00:00:00 2001 From: Nitin Gupta Date: Fri, 31 Mar 2017 15:48:53 -0700 Subject: sparc64: Fix memory corruption when THP is enabled The memory corruption was happening due to incorrect TLB/TSB flushing of hugepages. Reported-by: David S. Miller Signed-off-by: Nitin Gupta Signed-off-by: David S. Miller --- arch/sparc/mm/tlb.c | 6 +++--- arch/sparc/mm/tsb.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index afda3bbf7854..ee8066c3d96c 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -154,7 +154,7 @@ static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr, if (pte_val(*pte) & _PAGE_VALID) { bool exec = pte_exec(*pte); - tlb_batch_add_one(mm, vaddr, exec, false); + tlb_batch_add_one(mm, vaddr, exec, PAGE_SHIFT); } pte++; vaddr += PAGE_SIZE; @@ -209,9 +209,9 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, pte_t orig_pte = __pte(pmd_val(orig)); bool exec = pte_exec(orig_pte); - tlb_batch_add_one(mm, addr, exec, true); + tlb_batch_add_one(mm, addr, exec, REAL_HPAGE_SHIFT); tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec, - true); + REAL_HPAGE_SHIFT); } else { tlb_batch_pmd_scan(mm, addr, orig); } diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index 0a04811f06b7..bedf08b22a47 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -122,7 +122,7 @@ void flush_tsb_user(struct tlb_batch *tb) spin_lock_irqsave(&mm->context.lock, flags); - if (tb->hugepage_shift < HPAGE_SHIFT) { + if (tb->hugepage_shift < REAL_HPAGE_SHIFT) { base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; if (tlb_type == cheetah_plus || tlb_type == hypervisor) @@ -155,7 +155,7 @@ void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr, spin_lock_irqsave(&mm->context.lock, flags); - if (hugepage_shift < HPAGE_SHIFT) { + if (hugepage_shift < REAL_HPAGE_SHIFT) { base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; if (tlb_type == cheetah_plus || tlb_type == hypervisor) -- cgit v1.2.3 From 9d262d95114cf2e2ac5e0ff358347fa2e214eda5 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 1 Apr 2017 13:47:44 -0700 Subject: sparc32: Export vac_cache_size to fix build error sparc32:allmodconfig fails to build with the following error. ERROR: "vac_cache_size" [drivers/infiniband/sw/rxe/rdma_rxe.ko] undefined! Fixes: cb8864559631 ("infiniband: Fix alignment of mmap cookies ...") Cc: Jason Gunthorpe Cc: Doug Ledford Signed-off-by: Guenter Roeck Signed-off-by: David S. Miller --- arch/sparc/mm/srmmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index def82f6d626f..8e76ebba2986 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -54,6 +54,7 @@ enum mbus_module srmmu_modtype; static unsigned int hwbug_bitmask; int vac_cache_size; +EXPORT_SYMBOL(vac_cache_size); int vac_line_size; extern struct resource sparc_iomap; -- cgit v1.2.3 From 86e1066fe2af51dce4351c2357223e0d902dfc7a Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 5 Apr 2017 21:24:20 +0200 Subject: sparc: remove unused wp_works_ok macro It's unused for ages, used to be required for ksyms.c back in the v1.1 times. Signed-off-by: Mathias Krause Acked-by: David S. Miller Signed-off-by: David S. Miller --- arch/sparc/include/asm/processor_32.h | 6 ------ arch/sparc/include/asm/processor_64.h | 4 ---- 2 files changed, 10 deletions(-) diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h index 365d4cb267b4..dd27159819eb 100644 --- a/arch/sparc/include/asm/processor_32.h +++ b/arch/sparc/include/asm/processor_32.h @@ -18,12 +18,6 @@ #include #include -/* - * The sparc has no problems with write protection - */ -#define wp_works_ok 1 -#define wp_works_ok__is_a_macro /* for versions in ksyms.c */ - /* Whee, this is STACK_TOP + PAGE_SIZE and the lowest kernel address too... * That one page is used to protect kernel from intruders, so that * we can make our access_ok test faster diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h index 6448cfc8292f..b58ee9018433 100644 --- a/arch/sparc/include/asm/processor_64.h +++ b/arch/sparc/include/asm/processor_64.h @@ -18,10 +18,6 @@ #include #include -/* The sparc has no problems with write protection */ -#define wp_works_ok 1 -#define wp_works_ok__is_a_macro /* for versions in ksyms.c */ - /* * User lives in his very own context, and cannot reference us. Note * that TASK_SIZE is a misnomer, it really gives maximum user virtual -- cgit v1.2.3 From 6118714275f0a313ecc296a87ed1af32d9691bed Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 30 Mar 2017 09:16:39 -0700 Subject: pinctrl: core: Fix pinctrl_register_and_init() with pinctrl_enable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recent pinctrl changes to allow dynamic allocation of pins exposed one more issue with the pinctrl pins claimed early by the controller itself. This caused a regression for IMX6 pinctrl hogs. Before enabling the pin controller driver we need to wait until it has been properly initialized, then claim the hogs, and only then enable it. To fix the regression, split the code into pinctrl_claim_hogs() and pinctrl_enable(). And then let's require that pinctrl_enable() is always called by the pin controller driver when ready after calling pinctrl_register_and_init(). Depends-on: 950b0d91dc10 ("pinctrl: core: Fix regression caused by delayed work for hogs") Fixes: df61b366af26 ("pinctrl: core: Use delayed work for hogs") Fixes: e566fc11ea76 ("pinctrl: imx: use generic pinctrl helpers for managing groups") Cc: Haojian Zhuang Cc: Masahiro Yamada Cc: Mika Penttilä Cc: Mika Westerberg Cc: Nishanth Menon Cc: Shawn Guo Cc: Stefan Agner Tested-by: Geert Uytterhoeven Tested-by: Gary Bisson Tested-by: Fabio Estevam Signed-off-by: Tony Lindgren Signed-off-by: Linus Walleij --- Documentation/pinctrl.txt | 8 ++- drivers/pinctrl/core.c | 97 +++++++++++++++++++++------------ drivers/pinctrl/freescale/pinctrl-imx.c | 2 +- drivers/pinctrl/pinctrl-single.c | 2 +- drivers/pinctrl/sh-pfc/pinctrl.c | 11 +++- drivers/pinctrl/ti/pinctrl-ti-iodelay.c | 2 + include/linux/pinctrl/pinctrl.h | 3 +- 7 files changed, 83 insertions(+), 42 deletions(-) diff --git a/Documentation/pinctrl.txt b/Documentation/pinctrl.txt index 54bd5faa8782..f2af35f6d6b2 100644 --- a/Documentation/pinctrl.txt +++ b/Documentation/pinctrl.txt @@ -77,9 +77,15 @@ static struct pinctrl_desc foo_desc = { int __init foo_probe(void) { + int error; + struct pinctrl_dev *pctl; - return pinctrl_register_and_init(&foo_desc, , NULL, &pctl); + error = pinctrl_register_and_init(&foo_desc, , NULL, &pctl); + if (error) + return error; + + return pinctrl_enable(pctl); } To enable the pinctrl subsystem and the subgroups for PINMUX and PINCONF and diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index d69046537b75..32822b0d9cd0 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -2010,29 +2010,57 @@ out_err: return ERR_PTR(ret); } -static int pinctrl_create_and_start(struct pinctrl_dev *pctldev) +static int pinctrl_claim_hogs(struct pinctrl_dev *pctldev) { pctldev->p = create_pinctrl(pctldev->dev, pctldev); - if (!IS_ERR(pctldev->p)) { - kref_get(&pctldev->p->users); - pctldev->hog_default = - pinctrl_lookup_state(pctldev->p, PINCTRL_STATE_DEFAULT); - if (IS_ERR(pctldev->hog_default)) { - dev_dbg(pctldev->dev, - "failed to lookup the default state\n"); - } else { - if (pinctrl_select_state(pctldev->p, - pctldev->hog_default)) - dev_err(pctldev->dev, - "failed to select default state\n"); - } + if (PTR_ERR(pctldev->p) == -ENODEV) { + dev_dbg(pctldev->dev, "no hogs found\n"); - pctldev->hog_sleep = - pinctrl_lookup_state(pctldev->p, - PINCTRL_STATE_SLEEP); - if (IS_ERR(pctldev->hog_sleep)) - dev_dbg(pctldev->dev, - "failed to lookup the sleep state\n"); + return 0; + } + + if (IS_ERR(pctldev->p)) { + dev_err(pctldev->dev, "error claiming hogs: %li\n", + PTR_ERR(pctldev->p)); + + return PTR_ERR(pctldev->p); + } + + kref_get(&pctldev->p->users); + pctldev->hog_default = + pinctrl_lookup_state(pctldev->p, PINCTRL_STATE_DEFAULT); + if (IS_ERR(pctldev->hog_default)) { + dev_dbg(pctldev->dev, + "failed to lookup the default state\n"); + } else { + if (pinctrl_select_state(pctldev->p, + pctldev->hog_default)) + dev_err(pctldev->dev, + "failed to select default state\n"); + } + + pctldev->hog_sleep = + pinctrl_lookup_state(pctldev->p, + PINCTRL_STATE_SLEEP); + if (IS_ERR(pctldev->hog_sleep)) + dev_dbg(pctldev->dev, + "failed to lookup the sleep state\n"); + + return 0; +} + +int pinctrl_enable(struct pinctrl_dev *pctldev) +{ + int error; + + error = pinctrl_claim_hogs(pctldev); + if (error) { + dev_err(pctldev->dev, "could not claim hogs: %i\n", + error); + mutex_destroy(&pctldev->mutex); + kfree(pctldev); + + return error; } mutex_lock(&pinctrldev_list_mutex); @@ -2043,6 +2071,7 @@ static int pinctrl_create_and_start(struct pinctrl_dev *pctldev) return 0; } +EXPORT_SYMBOL_GPL(pinctrl_enable); /** * pinctrl_register() - register a pin controller device @@ -2065,25 +2094,30 @@ struct pinctrl_dev *pinctrl_register(struct pinctrl_desc *pctldesc, if (IS_ERR(pctldev)) return pctldev; - error = pinctrl_create_and_start(pctldev); - if (error) { - mutex_destroy(&pctldev->mutex); - kfree(pctldev); - + error = pinctrl_enable(pctldev); + if (error) return ERR_PTR(error); - } return pctldev; } EXPORT_SYMBOL_GPL(pinctrl_register); +/** + * pinctrl_register_and_init() - register and init pin controller device + * @pctldesc: descriptor for this pin controller + * @dev: parent device for this pin controller + * @driver_data: private pin controller data for this pin controller + * @pctldev: pin controller device + * + * Note that pinctrl_enable() still needs to be manually called after + * this once the driver is ready. + */ int pinctrl_register_and_init(struct pinctrl_desc *pctldesc, struct device *dev, void *driver_data, struct pinctrl_dev **pctldev) { struct pinctrl_dev *p; - int error; p = pinctrl_init_controller(pctldesc, dev, driver_data); if (IS_ERR(p)) @@ -2097,15 +2131,6 @@ int pinctrl_register_and_init(struct pinctrl_desc *pctldesc, */ *pctldev = p; - error = pinctrl_create_and_start(p); - if (error) { - mutex_destroy(&p->mutex); - kfree(p); - *pctldev = NULL; - - return error; - } - return 0; } EXPORT_SYMBOL_GPL(pinctrl_register_and_init); diff --git a/drivers/pinctrl/freescale/pinctrl-imx.c b/drivers/pinctrl/freescale/pinctrl-imx.c index a7ace9e1ad81..74bd90dfd7b1 100644 --- a/drivers/pinctrl/freescale/pinctrl-imx.c +++ b/drivers/pinctrl/freescale/pinctrl-imx.c @@ -790,7 +790,7 @@ int imx_pinctrl_probe(struct platform_device *pdev, dev_info(&pdev->dev, "initialized IMX pinctrl driver\n"); - return 0; + return pinctrl_enable(ipctl->pctl); free: imx_free_resources(ipctl); diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index 8b2d45e85bae..9c267dcda094 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -1781,7 +1781,7 @@ static int pcs_probe(struct platform_device *pdev) dev_info(pcs->dev, "%i pins at pa %p size %u\n", pcs->desc.npins, pcs->base, pcs->size); - return 0; + return pinctrl_enable(pcs->pctl); free: pcs_free_resources(pcs); diff --git a/drivers/pinctrl/sh-pfc/pinctrl.c b/drivers/pinctrl/sh-pfc/pinctrl.c index 08150a321be6..a70157f0acf4 100644 --- a/drivers/pinctrl/sh-pfc/pinctrl.c +++ b/drivers/pinctrl/sh-pfc/pinctrl.c @@ -816,6 +816,13 @@ int sh_pfc_register_pinctrl(struct sh_pfc *pfc) pmx->pctl_desc.pins = pmx->pins; pmx->pctl_desc.npins = pfc->info->nr_pins; - return devm_pinctrl_register_and_init(pfc->dev, &pmx->pctl_desc, pmx, - &pmx->pctl); + ret = devm_pinctrl_register_and_init(pfc->dev, &pmx->pctl_desc, pmx, + &pmx->pctl); + if (ret) { + dev_err(pfc->dev, "could not register: %i\n", ret); + + return ret; + } + + return pinctrl_enable(pmx->pctl); } diff --git a/drivers/pinctrl/ti/pinctrl-ti-iodelay.c b/drivers/pinctrl/ti/pinctrl-ti-iodelay.c index 717e3404900c..362c50918c13 100644 --- a/drivers/pinctrl/ti/pinctrl-ti-iodelay.c +++ b/drivers/pinctrl/ti/pinctrl-ti-iodelay.c @@ -893,6 +893,8 @@ static int ti_iodelay_probe(struct platform_device *pdev) platform_set_drvdata(pdev, iod); + return pinctrl_enable(iod->pctl); + exit_out: of_node_put(np); return ret; diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index 8ce2d87a238b..5e45385c5bdc 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -145,8 +145,9 @@ struct pinctrl_desc { extern int pinctrl_register_and_init(struct pinctrl_desc *pctldesc, struct device *dev, void *driver_data, struct pinctrl_dev **pctldev); +extern int pinctrl_enable(struct pinctrl_dev *pctldev); -/* Please use pinctrl_register_and_init() instead */ +/* Please use pinctrl_register_and_init() and pinctrl_enable() instead */ extern struct pinctrl_dev *pinctrl_register(struct pinctrl_desc *pctldesc, struct device *dev, void *driver_data); -- cgit v1.2.3 From 4749228f022893faf54a3dbc70796f78b7d4f342 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 6 Apr 2017 23:34:38 +1000 Subject: powerpc/crypto/crc32c-vpmsum: Fix missing preempt_disable() In crc32c_vpmsum() we call enable_kernel_altivec() without first disabling preemption, which is not allowed: WARNING: CPU: 9 PID: 2949 at ../arch/powerpc/kernel/process.c:277 enable_kernel_altivec+0x100/0x120 Modules linked in: dm_thin_pool dm_persistent_data dm_bio_prison dm_bufio libcrc32c vmx_crypto ... CPU: 9 PID: 2949 Comm: docker Not tainted 4.11.0-rc5-compiler_gcc-6.3.1-00033-g308ac7563944 #381 ... NIP [c00000000001e320] enable_kernel_altivec+0x100/0x120 LR [d000000003df0910] crc32c_vpmsum+0x108/0x150 [crc32c_vpmsum] Call Trace: 0xc138fd09 (unreliable) crc32c_vpmsum+0x108/0x150 [crc32c_vpmsum] crc32c_vpmsum_update+0x3c/0x60 [crc32c_vpmsum] crypto_shash_update+0x88/0x1c0 crc32c+0x64/0x90 [libcrc32c] dm_bm_checksum+0x48/0x80 [dm_persistent_data] sb_check+0x84/0x120 [dm_thin_pool] dm_bm_validate_buffer.isra.0+0xc0/0x1b0 [dm_persistent_data] dm_bm_read_lock+0x80/0xf0 [dm_persistent_data] __create_persistent_data_objects+0x16c/0x810 [dm_thin_pool] dm_pool_metadata_open+0xb0/0x1a0 [dm_thin_pool] pool_ctr+0x4cc/0xb60 [dm_thin_pool] dm_table_add_target+0x16c/0x3c0 table_load+0x184/0x400 ctl_ioctl+0x2f0/0x560 dm_ctl_ioctl+0x38/0x50 do_vfs_ioctl+0xd8/0x920 SyS_ioctl+0x68/0xc0 system_call+0x38/0xfc It used to be sufficient just to call pagefault_disable(), because that also disabled preemption. But the two were decoupled in commit 8222dbe21e79 ("sched/preempt, mm/fault: Decouple preemption from the page fault logic") in mid 2015. So add the missing preempt_disable/enable(). We should also call disable_kernel_fp(), although it does nothing by default, there is a debug switch to make it active and all enables should be paired with disables. Fixes: 6dd7a82cc54e ("crypto: powerpc - Add POWER8 optimised crc32c") Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Michael Ellerman --- arch/powerpc/crypto/crc32c-vpmsum_glue.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c index 411994551afc..f058e0c3e4d4 100644 --- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c +++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c @@ -33,10 +33,13 @@ static u32 crc32c_vpmsum(u32 crc, unsigned char const *p, size_t len) } if (len & ~VMX_ALIGN_MASK) { + preempt_disable(); pagefault_disable(); enable_kernel_altivec(); crc = __crc32c_vpmsum(crc, p, len & ~VMX_ALIGN_MASK); + disable_kernel_altivec(); pagefault_enable(); + preempt_enable(); } tail = len & VMX_ALIGN_MASK; -- cgit v1.2.3 From 6ae979ab39a368c18ceb0424bf824d172d6ab56f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 31 Mar 2017 12:23:43 +0100 Subject: Revert "Revert "arm64: hugetlb: partial revert of 66b3923a1a0f"" The use of the contiguous bit by our hugetlb implementation violates the break-before-make requirements of the architecture and can lead to silent data corruption or TLB conflict aborts. Once again, disable these hugetlb sizes whilst it gets worked out. This reverts commit ab2e1b89230fa80328262c91d2d0a539a2790d6f. Conflicts: arch/arm64/mm/hugetlbpage.c Signed-off-by: Will Deacon --- arch/arm64/mm/hugetlbpage.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index e25584d72396..7514a000e361 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -294,10 +294,6 @@ static __init int setup_hugepagesz(char *opt) hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); } else if (ps == PUD_SIZE) { hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); - } else if (ps == (PAGE_SIZE * CONT_PTES)) { - hugetlb_add_hstate(CONT_PTE_SHIFT); - } else if (ps == (PMD_SIZE * CONT_PMDS)) { - hugetlb_add_hstate((PMD_SHIFT + CONT_PMD_SHIFT) - PAGE_SHIFT); } else { hugetlb_bad_size(); pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10); @@ -306,13 +302,3 @@ static __init int setup_hugepagesz(char *opt) return 1; } __setup("hugepagesz=", setup_hugepagesz); - -#ifdef CONFIG_ARM64_64K_PAGES -static __init int add_default_hugepagesz(void) -{ - if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL) - hugetlb_add_hstate(CONT_PTE_SHIFT); - return 0; -} -arch_initcall(add_default_hugepagesz); -#endif -- cgit v1.2.3 From 38bd49064a1ecb67baad33598e3d824448ab11ec Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Fri, 3 Mar 2017 15:41:38 -0800 Subject: Handle mismatched open calls A signal can interrupt a SendReceive call which result in incoming responses to the call being ignored. This is a problem for calls such as open which results in the successful response being ignored. This results in an open file resource on the server. The patch looks into responses which were cancelled after being sent and in case of successful open closes the open fids. For this patch, the check is only done in SendReceive2() RH-bz: 1403319 Signed-off-by: Sachin Prabhu Reviewed-by: Pavel Shilovsky Cc: Stable --- fs/cifs/cifsglob.h | 11 ++++++++++ fs/cifs/cifsproto.h | 3 ++- fs/cifs/cifssmb.c | 11 ++++++---- fs/cifs/connect.c | 13 ++++++++++-- fs/cifs/smb2misc.c | 46 +++++++++++++++++++++++++++++++++++++++++ fs/cifs/smb2ops.c | 8 +++++-- fs/cifs/smb2proto.h | 7 +++++++ fs/cifs/smb2transport.c | 55 +++++++++++++++++++++++++++++++++++++++++++++---- fs/cifs/transport.c | 2 ++ 9 files changed, 143 insertions(+), 13 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index d42dd3288647..c34bdb12c8e6 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -243,6 +243,7 @@ struct smb_version_operations { /* verify the message */ int (*check_message)(char *, unsigned int, struct TCP_Server_Info *); bool (*is_oplock_break)(char *, struct TCP_Server_Info *); + int (*handle_cancelled_mid)(char *, struct TCP_Server_Info *); void (*downgrade_oplock)(struct TCP_Server_Info *, struct cifsInodeInfo *, bool); /* process transaction2 response */ @@ -1343,6 +1344,7 @@ struct mid_q_entry { void *callback_data; /* general purpose pointer for callback */ void *resp_buf; /* pointer to received SMB header */ int mid_state; /* wish this were enum but can not pass to wait_event */ + unsigned int mid_flags; __le16 command; /* smb command code */ bool large_buf:1; /* if valid response, is pointer to large buf */ bool multiRsp:1; /* multiple trans2 responses for one request */ @@ -1350,6 +1352,12 @@ struct mid_q_entry { bool decrypted:1; /* decrypted entry */ }; +struct close_cancelled_open { + struct cifs_fid fid; + struct cifs_tcon *tcon; + struct work_struct work; +}; + /* Make code in transport.c a little cleaner by moving update of optional stats into function below */ #ifdef CONFIG_CIFS_STATS2 @@ -1481,6 +1489,9 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param, #define MID_RESPONSE_MALFORMED 0x10 #define MID_SHUTDOWN 0x20 +/* Flags */ +#define MID_WAIT_CANCELLED 1 /* Cancelled while waiting for response */ + /* Types of response buffer returned from SendReceive2 */ #define CIFS_NO_BUFFER 0 /* Response buffer not returned */ #define CIFS_SMALL_BUFFER 1 diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 97e5d236d265..ec5e5e514fdd 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -79,7 +79,8 @@ extern void cifs_delete_mid(struct mid_q_entry *mid); extern void cifs_wake_up_task(struct mid_q_entry *mid); extern int cifs_handle_standard(struct TCP_Server_Info *server, struct mid_q_entry *mid); -extern int cifs_discard_remaining_data(struct TCP_Server_Info *server); +extern int cifs_discard_remaining_data(struct TCP_Server_Info *server, + char *buf); extern int cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, mid_receive_t *receive, mid_callback_t *callback, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 066950671929..967b92631807 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1400,9 +1400,9 @@ openRetry: * current bigbuf. */ int -cifs_discard_remaining_data(struct TCP_Server_Info *server) +cifs_discard_remaining_data(struct TCP_Server_Info *server, char *buf) { - unsigned int rfclen = get_rfc1002_length(server->smallbuf); + unsigned int rfclen = get_rfc1002_length(buf); int remaining = rfclen + 4 - server->total_read; while (remaining > 0) { @@ -1426,7 +1426,7 @@ cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) int length; struct cifs_readdata *rdata = mid->callback_data; - length = cifs_discard_remaining_data(server); + length = cifs_discard_remaining_data(server, mid->resp_buf); dequeue_mid(mid, rdata->result); return length; } @@ -1459,7 +1459,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) if (server->ops->is_status_pending && server->ops->is_status_pending(buf, server, 0)) { - cifs_discard_remaining_data(server); + cifs_discard_remaining_data(server, buf); return -1; } @@ -1519,6 +1519,9 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) cifs_dbg(FYI, "0: iov_base=%p iov_len=%u\n", rdata->iov[0].iov_base, server->total_read); + mid->resp_buf = server->smallbuf; + server->smallbuf = NULL; + /* how much data is in the response? */ data_len = server->ops->read_data_length(buf); if (data_offset + data_len > buflen) { diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 9ae695ae3ed7..0c7596cef4b8 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -904,10 +904,19 @@ cifs_demultiplex_thread(void *p) server->lstrp = jiffies; if (mid_entry != NULL) { + if ((mid_entry->mid_flags & MID_WAIT_CANCELLED) && + mid_entry->mid_state == MID_RESPONSE_RECEIVED && + server->ops->handle_cancelled_mid) + server->ops->handle_cancelled_mid( + mid_entry->resp_buf, + server); + if (!mid_entry->multiRsp || mid_entry->multiEnd) mid_entry->callback(mid_entry); - } else if (!server->ops->is_oplock_break || - !server->ops->is_oplock_break(buf, server)) { + } else if (server->ops->is_oplock_break && + server->ops->is_oplock_break(buf, server)) { + cifs_dbg(FYI, "Received oplock break\n"); + } else { cifs_dbg(VFS, "No task to wake, unknown frame received! NumMids %d\n", atomic_read(&midCount)); cifs_dump_mem("Received Data is: ", buf, diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index fd516ea8b8f8..1a04b3a5beb1 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -659,3 +659,49 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) cifs_dbg(FYI, "Can not process oplock break for non-existent connection\n"); return false; } + +void +smb2_cancelled_close_fid(struct work_struct *work) +{ + struct close_cancelled_open *cancelled = container_of(work, + struct close_cancelled_open, work); + + cifs_dbg(VFS, "Close unmatched open\n"); + + SMB2_close(0, cancelled->tcon, cancelled->fid.persistent_fid, + cancelled->fid.volatile_fid); + cifs_put_tcon(cancelled->tcon); + kfree(cancelled); +} + +int +smb2_handle_cancelled_mid(char *buffer, struct TCP_Server_Info *server) +{ + struct smb2_sync_hdr *sync_hdr = get_sync_hdr(buffer); + struct smb2_create_rsp *rsp = (struct smb2_create_rsp *)buffer; + struct cifs_tcon *tcon; + struct close_cancelled_open *cancelled; + + if (sync_hdr->Command != SMB2_CREATE || + sync_hdr->Status != STATUS_SUCCESS) + return 0; + + cancelled = kzalloc(sizeof(*cancelled), GFP_KERNEL); + if (!cancelled) + return -ENOMEM; + + tcon = smb2_find_smb_tcon(server, sync_hdr->SessionId, + sync_hdr->TreeId); + if (!tcon) { + kfree(cancelled); + return -ENOENT; + } + + cancelled->fid.persistent_fid = rsp->PersistentFileId; + cancelled->fid.volatile_fid = rsp->VolatileFileId; + cancelled->tcon = tcon; + INIT_WORK(&cancelled->work, smb2_cancelled_close_fid); + queue_work(cifsiod_wq, &cancelled->work); + + return 0; +} diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 0231108d9387..b6bdf93042eb 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -2188,7 +2188,7 @@ receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid) if (rc) goto free_pages; - rc = cifs_discard_remaining_data(server); + rc = cifs_discard_remaining_data(server, buf); if (rc) goto free_pages; @@ -2214,7 +2214,7 @@ free_pages: kfree(pages); return rc; discard_data: - cifs_discard_remaining_data(server); + cifs_discard_remaining_data(server, buf); goto free_pages; } @@ -2322,6 +2322,7 @@ struct smb_version_operations smb20_operations = { .clear_stats = smb2_clear_stats, .print_stats = smb2_print_stats, .is_oplock_break = smb2_is_valid_oplock_break, + .handle_cancelled_mid = smb2_handle_cancelled_mid, .downgrade_oplock = smb2_downgrade_oplock, .need_neg = smb2_need_neg, .negotiate = smb2_negotiate, @@ -2404,6 +2405,7 @@ struct smb_version_operations smb21_operations = { .clear_stats = smb2_clear_stats, .print_stats = smb2_print_stats, .is_oplock_break = smb2_is_valid_oplock_break, + .handle_cancelled_mid = smb2_handle_cancelled_mid, .downgrade_oplock = smb2_downgrade_oplock, .need_neg = smb2_need_neg, .negotiate = smb2_negotiate, @@ -2488,6 +2490,7 @@ struct smb_version_operations smb30_operations = { .print_stats = smb2_print_stats, .dump_share_caps = smb2_dump_share_caps, .is_oplock_break = smb2_is_valid_oplock_break, + .handle_cancelled_mid = smb2_handle_cancelled_mid, .downgrade_oplock = smb2_downgrade_oplock, .need_neg = smb2_need_neg, .negotiate = smb2_negotiate, @@ -2582,6 +2585,7 @@ struct smb_version_operations smb311_operations = { .print_stats = smb2_print_stats, .dump_share_caps = smb2_dump_share_caps, .is_oplock_break = smb2_is_valid_oplock_break, + .handle_cancelled_mid = smb2_handle_cancelled_mid, .downgrade_oplock = smb2_downgrade_oplock, .need_neg = smb2_need_neg, .negotiate = smb2_negotiate, diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 69e35873b1de..6853454fc871 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -48,6 +48,10 @@ extern struct mid_q_entry *smb2_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst); extern struct mid_q_entry *smb2_setup_async_request( struct TCP_Server_Info *server, struct smb_rqst *rqst); +extern struct cifs_ses *smb2_find_smb_ses(struct TCP_Server_Info *server, + __u64 ses_id); +extern struct cifs_tcon *smb2_find_smb_tcon(struct TCP_Server_Info *server, + __u64 ses_id, __u32 tid); extern int smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server); extern int smb3_calc_signature(struct smb_rqst *rqst, @@ -164,6 +168,9 @@ extern int SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon, extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, const u64 persistent_fid, const u64 volatile_fid, const __u8 oplock_level); +extern int smb2_handle_cancelled_mid(char *buffer, + struct TCP_Server_Info *server); +void smb2_cancelled_close_fid(struct work_struct *work); extern int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id, struct kstatfs *FSData); diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 7c3bb1bd7eed..506b67fc93d9 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -115,23 +115,70 @@ smb3_crypto_shash_allocate(struct TCP_Server_Info *server) return 0; } -struct cifs_ses * -smb2_find_smb_ses(struct TCP_Server_Info *server, __u64 ses_id) +static struct cifs_ses * +smb2_find_smb_ses_unlocked(struct TCP_Server_Info *server, __u64 ses_id) { struct cifs_ses *ses; - spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { if (ses->Suid != ses_id) continue; - spin_unlock(&cifs_tcp_ses_lock); return ses; } + + return NULL; +} + +struct cifs_ses * +smb2_find_smb_ses(struct TCP_Server_Info *server, __u64 ses_id) +{ + struct cifs_ses *ses; + + spin_lock(&cifs_tcp_ses_lock); + ses = smb2_find_smb_ses_unlocked(server, ses_id); spin_unlock(&cifs_tcp_ses_lock); + return ses; +} + +static struct cifs_tcon * +smb2_find_smb_sess_tcon_unlocked(struct cifs_ses *ses, __u32 tid) +{ + struct cifs_tcon *tcon; + + list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { + if (tcon->tid != tid) + continue; + ++tcon->tc_count; + return tcon; + } + return NULL; } +/* + * Obtain tcon corresponding to the tid in the given + * cifs_ses + */ + +struct cifs_tcon * +smb2_find_smb_tcon(struct TCP_Server_Info *server, __u64 ses_id, __u32 tid) +{ + struct cifs_ses *ses; + struct cifs_tcon *tcon; + + spin_lock(&cifs_tcp_ses_lock); + ses = smb2_find_smb_ses_unlocked(server, ses_id); + if (!ses) { + spin_unlock(&cifs_tcp_ses_lock); + return NULL; + } + tcon = smb2_find_smb_sess_tcon_unlocked(ses, tid); + spin_unlock(&cifs_tcp_ses_lock); + + return tcon; +} + int smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) { diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 526f0533cb4e..f6e13a977fc8 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -752,9 +752,11 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses, rc = wait_for_response(ses->server, midQ); if (rc != 0) { + cifs_dbg(FYI, "Cancelling wait for mid %llu\n", midQ->mid); send_cancel(ses->server, rqst, midQ); spin_lock(&GlobalMid_Lock); if (midQ->mid_state == MID_REQUEST_SUBMITTED) { + midQ->mid_flags |= MID_WAIT_CANCELLED; midQ->callback = DeleteMidQEntry; spin_unlock(&GlobalMid_Lock); add_credits(ses->server, 1, optype); -- cgit v1.2.3 From 312bbc5946c4b73dfc1d64c1dd5b0f9df8016587 Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Tue, 4 Apr 2017 02:12:04 -0500 Subject: SMB3: Rename clone_range to copychunk_range Server side copy is one of the most important mechanisms smb2/smb3 supports and it was unintentionally disabled for most use cases. Renaming calls to reflect the underlying smb2 ioctl called. This is similar to the name duplicate_extents used for a similar ioctl which is also used to duplicate files by reusing fs blocks. The name change is to avoid confusion. Signed-off-by: Sachin Prabhu CC: Stable Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky --- fs/cifs/cifsglob.h | 3 ++- fs/cifs/ioctl.c | 16 ++++++++-------- fs/cifs/smb2ops.c | 12 ++++++------ 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index c34bdb12c8e6..57c594827cb3 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -408,7 +408,8 @@ struct smb_version_operations { char * (*create_lease_buf)(u8 *, u8); /* parse lease context buffer and return oplock/epoch info */ __u8 (*parse_lease_buf)(void *, unsigned int *); - int (*clone_range)(const unsigned int, struct cifsFileInfo *src_file, + int (*copychunk_range)(const unsigned int, + struct cifsFileInfo *src_file, struct cifsFileInfo *target_file, u64 src_off, u64 len, u64 dest_off); int (*duplicate_extents)(const unsigned int, struct cifsFileInfo *src, diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 001528781b6b..9bf0f94fae63 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -34,7 +34,7 @@ #include "cifs_ioctl.h" #include -static int cifs_file_clone_range(unsigned int xid, struct file *src_file, +static int cifs_file_copychunk_range(unsigned int xid, struct file *src_file, struct file *dst_file) { struct inode *src_inode = file_inode(src_file); @@ -45,7 +45,7 @@ static int cifs_file_clone_range(unsigned int xid, struct file *src_file, struct cifs_tcon *target_tcon; int rc; - cifs_dbg(FYI, "ioctl clone range\n"); + cifs_dbg(FYI, "ioctl copychunk range\n"); if (!src_file->private_data || !dst_file->private_data) { rc = -EBADF; @@ -75,8 +75,8 @@ static int cifs_file_clone_range(unsigned int xid, struct file *src_file, /* should we flush first and last page first */ truncate_inode_pages(&target_inode->i_data, 0); - if (target_tcon->ses->server->ops->clone_range) - rc = target_tcon->ses->server->ops->clone_range(xid, + if (target_tcon->ses->server->ops->copychunk_range) + rc = target_tcon->ses->server->ops->copychunk_range(xid, smb_file_src, smb_file_target, 0, src_inode->i_size, 0); else rc = -EOPNOTSUPP; @@ -91,14 +91,14 @@ out: return rc; } -static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, +static long cifs_ioctl_copychunk(unsigned int xid, struct file *dst_file, unsigned long srcfd) { int rc; struct fd src_file; struct inode *src_inode; - cifs_dbg(FYI, "ioctl clone range\n"); + cifs_dbg(FYI, "ioctl copychunk range\n"); /* the destination must be opened for writing */ if (!(dst_file->f_mode & FMODE_WRITE)) { cifs_dbg(FYI, "file target not open for write\n"); @@ -129,7 +129,7 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, if (S_ISDIR(src_inode->i_mode)) goto out_fput; - rc = cifs_file_clone_range(xid, src_file.file, dst_file); + rc = cifs_file_copychunk_range(xid, src_file.file, dst_file); out_fput: fdput(src_file); @@ -251,7 +251,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) } break; case CIFS_IOC_COPYCHUNK_FILE: - rc = cifs_ioctl_clone(xid, filep, arg); + rc = cifs_ioctl_copychunk(xid, filep, arg); break; case CIFS_IOC_SET_INTEGRITY: if (pSMBFile == NULL) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index b6bdf93042eb..3f12e0992b9b 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -593,7 +593,7 @@ req_res_key_exit: } static int -smb2_clone_range(const unsigned int xid, +smb2_copychunk_range(const unsigned int xid, struct cifsFileInfo *srcfile, struct cifsFileInfo *trgtfile, u64 src_off, u64 len, u64 dest_off) @@ -611,7 +611,7 @@ smb2_clone_range(const unsigned int xid, if (pcchunk == NULL) return -ENOMEM; - cifs_dbg(FYI, "in smb2_clone_range - about to call request res key\n"); + cifs_dbg(FYI, "in smb2_copychunk_range - about to call request res key\n"); /* Request a key from the server to identify the source of the copy */ rc = SMB2_request_res_key(xid, tlink_tcon(srcfile->tlink), srcfile->fid.persistent_fid, @@ -2378,7 +2378,7 @@ struct smb_version_operations smb20_operations = { .set_oplock_level = smb2_set_oplock_level, .create_lease_buf = smb2_create_lease_buf, .parse_lease_buf = smb2_parse_lease_buf, - .clone_range = smb2_clone_range, + .copychunk_range = smb2_copychunk_range, .wp_retry_size = smb2_wp_retry_size, .dir_needs_close = smb2_dir_needs_close, .get_dfs_refer = smb2_get_dfs_refer, @@ -2461,7 +2461,7 @@ struct smb_version_operations smb21_operations = { .set_oplock_level = smb21_set_oplock_level, .create_lease_buf = smb2_create_lease_buf, .parse_lease_buf = smb2_parse_lease_buf, - .clone_range = smb2_clone_range, + .copychunk_range = smb2_copychunk_range, .wp_retry_size = smb2_wp_retry_size, .dir_needs_close = smb2_dir_needs_close, .enum_snapshots = smb3_enum_snapshots, @@ -2548,7 +2548,7 @@ struct smb_version_operations smb30_operations = { .set_oplock_level = smb3_set_oplock_level, .create_lease_buf = smb3_create_lease_buf, .parse_lease_buf = smb3_parse_lease_buf, - .clone_range = smb2_clone_range, + .copychunk_range = smb2_copychunk_range, .duplicate_extents = smb2_duplicate_extents, .validate_negotiate = smb3_validate_negotiate, .wp_retry_size = smb2_wp_retry_size, @@ -2643,7 +2643,7 @@ struct smb_version_operations smb311_operations = { .set_oplock_level = smb3_set_oplock_level, .create_lease_buf = smb3_create_lease_buf, .parse_lease_buf = smb3_parse_lease_buf, - .clone_range = smb2_clone_range, + .copychunk_range = smb2_copychunk_range, .duplicate_extents = smb2_duplicate_extents, /* .validate_negotiate = smb3_validate_negotiate, */ /* not used in 3.11 */ .wp_retry_size = smb2_wp_retry_size, -- cgit v1.2.3 From 620d8745b35daaf507186c26b40c7ea02aed131e Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Fri, 10 Feb 2017 16:03:51 +0530 Subject: Introduce cifs_copy_file_range() The earlier changes to copy range for cifs unintentionally disabled the more common form of server side copy. The patch introduces the file_operations helper cifs_copy_file_range() which is used by the syscall copy_file_range. The new file operations helper allows us to perform server side copies for SMB2.0 and 2.1 servers as well as SMB 3.0+ servers which do not support the ioctl FSCTL_DUPLICATE_EXTENTS_TO_FILE. The new helper uses the ioctl FSCTL_SRV_COPYCHUNK_WRITE to perform server side copies. The helper is called by vfs_copy_file_range() only once an attempt to clone the file using the ioctl FSCTL_DUPLICATE_EXTENTS_TO_FILE has failed. Signed-off-by: Sachin Prabhu Reviewed-by: Pavel Shilovsky CC: Stable Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/cifs/cifsfs.h | 5 ++++ fs/cifs/cifsglob.h | 6 ++-- fs/cifs/ioctl.c | 60 ++----------------------------------- fs/cifs/smb2ops.c | 20 ++++++++----- 5 files changed, 110 insertions(+), 68 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 15e1db8738ae..dd3f5fabfdf6 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -972,6 +972,86 @@ out: return rc; } +ssize_t cifs_file_copychunk_range(unsigned int xid, + struct file *src_file, loff_t off, + struct file *dst_file, loff_t destoff, + size_t len, unsigned int flags) +{ + struct inode *src_inode = file_inode(src_file); + struct inode *target_inode = file_inode(dst_file); + struct cifsFileInfo *smb_file_src; + struct cifsFileInfo *smb_file_target; + struct cifs_tcon *src_tcon; + struct cifs_tcon *target_tcon; + ssize_t rc; + + cifs_dbg(FYI, "copychunk range\n"); + + if (src_inode == target_inode) { + rc = -EINVAL; + goto out; + } + + if (!src_file->private_data || !dst_file->private_data) { + rc = -EBADF; + cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); + goto out; + } + + rc = -EXDEV; + smb_file_target = dst_file->private_data; + smb_file_src = src_file->private_data; + src_tcon = tlink_tcon(smb_file_src->tlink); + target_tcon = tlink_tcon(smb_file_target->tlink); + + if (src_tcon->ses != target_tcon->ses) { + cifs_dbg(VFS, "source and target of copy not on same server\n"); + goto out; + } + + /* + * Note: cifs case is easier than btrfs since server responsible for + * checks for proper open modes and file type and if it wants + * server could even support copy of range where source = target + */ + lock_two_nondirectories(target_inode, src_inode); + + cifs_dbg(FYI, "about to flush pages\n"); + /* should we flush first and last page first */ + truncate_inode_pages(&target_inode->i_data, 0); + + if (target_tcon->ses->server->ops->copychunk_range) + rc = target_tcon->ses->server->ops->copychunk_range(xid, + smb_file_src, smb_file_target, off, len, destoff); + else + rc = -EOPNOTSUPP; + + /* force revalidate of size and timestamps of target file now + * that target is updated on the server + */ + CIFS_I(target_inode)->time = 0; + /* although unlocking in the reverse order from locking is not + * strictly necessary here it is a little cleaner to be consistent + */ + unlock_two_nondirectories(src_inode, target_inode); + +out: + return rc; +} + +static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off, + struct file *dst_file, loff_t destoff, + size_t len, unsigned int flags) +{ + unsigned int xid = get_xid(); + ssize_t rc; + + rc = cifs_file_copychunk_range(xid, src_file, off, dst_file, destoff, + len, flags); + free_xid(xid); + return rc; +} + const struct file_operations cifs_file_ops = { .read_iter = cifs_loose_read_iter, .write_iter = cifs_file_write_iter, @@ -984,6 +1064,7 @@ const struct file_operations cifs_file_ops = { .splice_read = generic_file_splice_read, .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, + .copy_file_range = cifs_copy_file_range, .clone_file_range = cifs_clone_file_range, .setlease = cifs_setlease, .fallocate = cifs_fallocate, @@ -1001,6 +1082,7 @@ const struct file_operations cifs_file_strict_ops = { .splice_read = generic_file_splice_read, .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, + .copy_file_range = cifs_copy_file_range, .clone_file_range = cifs_clone_file_range, .setlease = cifs_setlease, .fallocate = cifs_fallocate, @@ -1018,6 +1100,7 @@ const struct file_operations cifs_file_direct_ops = { .mmap = cifs_file_mmap, .splice_read = generic_file_splice_read, .unlocked_ioctl = cifs_ioctl, + .copy_file_range = cifs_copy_file_range, .clone_file_range = cifs_clone_file_range, .llseek = cifs_llseek, .setlease = cifs_setlease, @@ -1035,6 +1118,7 @@ const struct file_operations cifs_file_nobrl_ops = { .splice_read = generic_file_splice_read, .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, + .copy_file_range = cifs_copy_file_range, .clone_file_range = cifs_clone_file_range, .setlease = cifs_setlease, .fallocate = cifs_fallocate, @@ -1051,6 +1135,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = { .splice_read = generic_file_splice_read, .llseek = cifs_llseek, .unlocked_ioctl = cifs_ioctl, + .copy_file_range = cifs_copy_file_range, .clone_file_range = cifs_clone_file_range, .setlease = cifs_setlease, .fallocate = cifs_fallocate, @@ -1067,6 +1152,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = { .mmap = cifs_file_mmap, .splice_read = generic_file_splice_read, .unlocked_ioctl = cifs_ioctl, + .copy_file_range = cifs_copy_file_range, .clone_file_range = cifs_clone_file_range, .llseek = cifs_llseek, .setlease = cifs_setlease, @@ -1078,6 +1164,7 @@ const struct file_operations cifs_dir_ops = { .release = cifs_closedir, .read = generic_read_dir, .unlocked_ioctl = cifs_ioctl, + .copy_file_range = cifs_copy_file_range, .clone_file_range = cifs_clone_file_range, .llseek = generic_file_llseek, }; diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index da717fee3026..30bf89b1fd9a 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -139,6 +139,11 @@ extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); # define cifs_listxattr NULL #endif +extern ssize_t cifs_file_copychunk_range(unsigned int xid, + struct file *src_file, loff_t off, + struct file *dst_file, loff_t destoff, + size_t len, unsigned int flags); + extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); #ifdef CONFIG_CIFS_NFSD_EXPORT extern const struct export_operations cifs_export_ops; diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 57c594827cb3..d07f13a63369 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -408,10 +408,10 @@ struct smb_version_operations { char * (*create_lease_buf)(u8 *, u8); /* parse lease context buffer and return oplock/epoch info */ __u8 (*parse_lease_buf)(void *, unsigned int *); - int (*copychunk_range)(const unsigned int, + ssize_t (*copychunk_range)(const unsigned int, struct cifsFileInfo *src_file, - struct cifsFileInfo *target_file, u64 src_off, u64 len, - u64 dest_off); + struct cifsFileInfo *target_file, + u64 src_off, u64 len, u64 dest_off); int (*duplicate_extents)(const unsigned int, struct cifsFileInfo *src, struct cifsFileInfo *target_file, u64 src_off, u64 len, u64 dest_off); diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 9bf0f94fae63..265c45fe4ea5 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -34,63 +34,6 @@ #include "cifs_ioctl.h" #include -static int cifs_file_copychunk_range(unsigned int xid, struct file *src_file, - struct file *dst_file) -{ - struct inode *src_inode = file_inode(src_file); - struct inode *target_inode = file_inode(dst_file); - struct cifsFileInfo *smb_file_src; - struct cifsFileInfo *smb_file_target; - struct cifs_tcon *src_tcon; - struct cifs_tcon *target_tcon; - int rc; - - cifs_dbg(FYI, "ioctl copychunk range\n"); - - if (!src_file->private_data || !dst_file->private_data) { - rc = -EBADF; - cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); - goto out; - } - - rc = -EXDEV; - smb_file_target = dst_file->private_data; - smb_file_src = src_file->private_data; - src_tcon = tlink_tcon(smb_file_src->tlink); - target_tcon = tlink_tcon(smb_file_target->tlink); - - if (src_tcon->ses != target_tcon->ses) { - cifs_dbg(VFS, "source and target of copy not on same server\n"); - goto out; - } - - /* - * Note: cifs case is easier than btrfs since server responsible for - * checks for proper open modes and file type and if it wants - * server could even support copy of range where source = target - */ - lock_two_nondirectories(target_inode, src_inode); - - cifs_dbg(FYI, "about to flush pages\n"); - /* should we flush first and last page first */ - truncate_inode_pages(&target_inode->i_data, 0); - - if (target_tcon->ses->server->ops->copychunk_range) - rc = target_tcon->ses->server->ops->copychunk_range(xid, - smb_file_src, smb_file_target, 0, src_inode->i_size, 0); - else - rc = -EOPNOTSUPP; - - /* force revalidate of size and timestamps of target file now - that target is updated on the server */ - CIFS_I(target_inode)->time = 0; - /* although unlocking in the reverse order from locking is not - strictly necessary here it is a little cleaner to be consistent */ - unlock_two_nondirectories(src_inode, target_inode); -out: - return rc; -} - static long cifs_ioctl_copychunk(unsigned int xid, struct file *dst_file, unsigned long srcfd) { @@ -129,7 +72,8 @@ static long cifs_ioctl_copychunk(unsigned int xid, struct file *dst_file, if (S_ISDIR(src_inode->i_mode)) goto out_fput; - rc = cifs_file_copychunk_range(xid, src_file.file, dst_file); + rc = cifs_file_copychunk_range(xid, src_file.file, 0, dst_file, 0, + src_inode->i_size, 0); out_fput: fdput(src_file); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 3f12e0992b9b..063e59d543f9 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -592,7 +592,7 @@ req_res_key_exit: return rc; } -static int +static ssize_t smb2_copychunk_range(const unsigned int xid, struct cifsFileInfo *srcfile, struct cifsFileInfo *trgtfile, u64 src_off, @@ -605,6 +605,7 @@ smb2_copychunk_range(const unsigned int xid, struct cifs_tcon *tcon; int chunks_copied = 0; bool chunk_sizes_updated = false; + ssize_t bytes_written, total_bytes_written = 0; pcchunk = kmalloc(sizeof(struct copychunk_ioctl), GFP_KERNEL); @@ -669,14 +670,16 @@ smb2_copychunk_range(const unsigned int xid, } chunks_copied++; - src_off += le32_to_cpu(retbuf->TotalBytesWritten); - dest_off += le32_to_cpu(retbuf->TotalBytesWritten); - len -= le32_to_cpu(retbuf->TotalBytesWritten); + bytes_written = le32_to_cpu(retbuf->TotalBytesWritten); + src_off += bytes_written; + dest_off += bytes_written; + len -= bytes_written; + total_bytes_written += bytes_written; - cifs_dbg(FYI, "Chunks %d PartialChunk %d Total %d\n", + cifs_dbg(FYI, "Chunks %d PartialChunk %d Total %zu\n", le32_to_cpu(retbuf->ChunksWritten), le32_to_cpu(retbuf->ChunkBytesWritten), - le32_to_cpu(retbuf->TotalBytesWritten)); + bytes_written); } else if (rc == -EINVAL) { if (ret_data_len != sizeof(struct copychunk_ioctl_rsp)) goto cchunk_out; @@ -713,7 +716,10 @@ smb2_copychunk_range(const unsigned int xid, cchunk_out: kfree(pcchunk); kfree(retbuf); - return rc; + if (rc) + return rc; + else + return total_bytes_written; } static int -- cgit v1.2.3 From 4fa8e504e5c0d7db9280ac96a4ac92192f1041f5 Mon Sep 17 00:00:00 2001 From: Tobias Regnery Date: Thu, 30 Mar 2017 12:34:14 +0200 Subject: CIFS: Fix build failure with smb2 I saw the following build error during a randconfig build: fs/cifs/smb2ops.c: In function 'smb2_new_lease_key': fs/cifs/smb2ops.c:1104:2: error: implicit declaration of function 'generate_random_uuid' [-Werror=implicit-function-declaration] Explicit include the right header to fix this issue. Signed-off-by: Tobias Regnery Reviewed-by: Aurelien Aptel Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 063e59d543f9..7b12a727947e 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "cifsglob.h" #include "smb2pdu.h" -- cgit v1.2.3 From 806a28efe9b78ffae5e2757e1ee924b8e50c08ab Mon Sep 17 00:00:00 2001 From: Jan-Marek Glogowski Date: Mon, 20 Feb 2017 12:25:58 +0100 Subject: Reset TreeId to zero on SMB2 TREE_CONNECT Currently the cifs module breaks the CIFS specs on reconnect as described in http://msdn.microsoft.com/en-us/library/cc246529.aspx: "TreeId (4 bytes): Uniquely identifies the tree connect for the command. This MUST be 0 for the SMB2 TREE_CONNECT Request." Signed-off-by: Jan-Marek Glogowski Reviewed-by: Aurelien Aptel Tested-by: Aurelien Aptel Signed-off-by: Steve French CC: Stable --- fs/cifs/smb2pdu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 7446496850a3..66fa1b941cdf 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1185,6 +1185,10 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, return -EINVAL; } + /* SMB2 TREE_CONNECT request must be called with TreeId == 0 */ + if (tcon) + tcon->tid = 0; + rc = small_smb2_init(SMB2_TREE_CONNECT, tcon, (void **) &req); if (rc) { kfree(unc_path); -- cgit v1.2.3 From 81380ca10778b99dce98940cfc993214712df335 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 7 Apr 2017 08:56:26 -0600 Subject: blk-mq: use the right hctx when getting a driver tag fails While dispatching requests, if we fail to get a driver tag, we mark the hardware queue as waiting for a tag and put the requests on a hctx->dispatch list to be run later when a driver tag is freed. However, blk_mq_dispatch_rq_list() may dispatch requests from multiple hardware queues if using a single-queue scheduler with a multiqueue device. If blk_mq_get_driver_tag() fails, it doesn't update the hardware queue we are processing. This means we end up using the hardware queue of the previous request, which may or may not be the same as that of the current request. If it isn't, the wrong hardware queue will end up waiting for a tag, and the requests will be on the wrong dispatch list, leading to a hang. The fix is twofold: 1. Make sure we save which hardware queue we were trying to get a request for in blk_mq_get_driver_tag() regardless of whether it succeeds or not. 2. Make blk_mq_dispatch_rq_list() take a request_queue instead of a blk_mq_hw_queue to make it clear that it must handle multiple hardware queues, since I've already messed this up on a couple of occasions. This didn't appear in testing with nvme and mq-deadline because nvme has more driver tags than the default number of scheduler tags. However, with the blk_mq_update_nr_hw_queues() fix, it showed up with nbd. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 9 +++++---- block/blk-mq.c | 25 +++++++++++++------------ block/blk-mq.h | 2 +- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 09af8ff18719..fc00f00898d3 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -171,7 +171,8 @@ void blk_mq_sched_put_request(struct request *rq) void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) { - struct elevator_queue *e = hctx->queue->elevator; + struct request_queue *q = hctx->queue; + struct elevator_queue *e = q->elevator; const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request; bool did_work = false; LIST_HEAD(rq_list); @@ -203,10 +204,10 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) */ if (!list_empty(&rq_list)) { blk_mq_sched_mark_restart_hctx(hctx); - did_work = blk_mq_dispatch_rq_list(hctx, &rq_list); + did_work = blk_mq_dispatch_rq_list(q, &rq_list); } else if (!has_sched_dispatch) { blk_mq_flush_busy_ctxs(hctx, &rq_list); - blk_mq_dispatch_rq_list(hctx, &rq_list); + blk_mq_dispatch_rq_list(q, &rq_list); } /* @@ -222,7 +223,7 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) if (!rq) break; list_add(&rq->queuelist, &rq_list); - } while (blk_mq_dispatch_rq_list(hctx, &rq_list)); + } while (blk_mq_dispatch_rq_list(q, &rq_list)); } } diff --git a/block/blk-mq.c b/block/blk-mq.c index 935f2cc7c8c3..828f4bd193f2 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -845,12 +845,8 @@ bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx, .flags = wait ? 0 : BLK_MQ_REQ_NOWAIT, }; - if (rq->tag != -1) { -done: - if (hctx) - *hctx = data.hctx; - return true; - } + if (rq->tag != -1) + goto done; if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag)) data.flags |= BLK_MQ_REQ_RESERVED; @@ -862,10 +858,12 @@ done: atomic_inc(&data.hctx->nr_active); } data.hctx->tags->rqs[rq->tag] = rq; - goto done; } - return false; +done: + if (hctx) + *hctx = data.hctx; + return rq->tag != -1; } static void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, @@ -962,14 +960,17 @@ static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx *hctx) return true; } -bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) +bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list) { - struct request_queue *q = hctx->queue; + struct blk_mq_hw_ctx *hctx; struct request *rq; LIST_HEAD(driver_list); struct list_head *dptr; int errors, queued, ret = BLK_MQ_RQ_QUEUE_OK; + if (list_empty(list)) + return false; + /* * Start off with dptr being NULL, so we start the first request * immediately, even if we have more pending. @@ -980,7 +981,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) * Now process all the entries, sending them to the driver. */ errors = queued = 0; - while (!list_empty(list)) { + do { struct blk_mq_queue_data bd; rq = list_first_entry(list, struct request, queuelist); @@ -1051,7 +1052,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list) */ if (!dptr && list->next != list->prev) dptr = &driver_list; - } + } while (!list_empty(list)); hctx->dispatched[queued_to_index(queued)]++; diff --git a/block/blk-mq.h b/block/blk-mq.h index b79f9a7d8cf6..660a17e1d033 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -31,7 +31,7 @@ void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_free_queue(struct request_queue *q); int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); void blk_mq_wake_waiters(struct request_queue *q); -bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *, struct list_head *); +bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *); void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list); bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx); bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx, -- cgit v1.2.3 From 6917ff0b5bd4139e08a3f3146529dcb3b95ba7a6 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 5 Apr 2017 12:01:30 -0700 Subject: blk-mq-sched: refactor scheduler initialization Preparation cleanup for the next couple of fixes, push blk_mq_sched_setup() and e->ops.mq.init_sched() into a helper. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 82 ++++++++++++++++++++++++++++------------------------ block/blk-mq-sched.h | 2 +- block/elevator.c | 32 ++++++++------------ 3 files changed, 57 insertions(+), 59 deletions(-) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index fc00f00898d3..6bd1758ea29b 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -432,11 +432,45 @@ static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set, } } -int blk_mq_sched_setup(struct request_queue *q) +static int blk_mq_sched_alloc_tags(struct request_queue *q, + struct blk_mq_hw_ctx *hctx, + unsigned int hctx_idx) +{ + struct blk_mq_tag_set *set = q->tag_set; + int ret; + + hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests, + set->reserved_tags); + if (!hctx->sched_tags) + return -ENOMEM; + + ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests); + if (ret) + blk_mq_sched_free_tags(set, hctx, hctx_idx); + + return ret; +} + +void blk_mq_sched_teardown(struct request_queue *q) { struct blk_mq_tag_set *set = q->tag_set; struct blk_mq_hw_ctx *hctx; - int ret, i; + int i; + + queue_for_each_hw_ctx(q, hctx, i) + blk_mq_sched_free_tags(set, hctx, i); +} + +int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) +{ + struct blk_mq_hw_ctx *hctx; + unsigned int i; + int ret; + + if (!e) { + q->elevator = NULL; + return 0; + } /* * Default to 256, since we don't split into sync/async like the @@ -444,49 +478,21 @@ int blk_mq_sched_setup(struct request_queue *q) */ q->nr_requests = 2 * BLKDEV_MAX_RQ; - /* - * We're switching to using an IO scheduler, so setup the hctx - * scheduler tags and switch the request map from the regular - * tags to scheduler tags. First allocate what we need, so we - * can safely fail and fallback, if needed. - */ - ret = 0; queue_for_each_hw_ctx(q, hctx, i) { - hctx->sched_tags = blk_mq_alloc_rq_map(set, i, - q->nr_requests, set->reserved_tags); - if (!hctx->sched_tags) { - ret = -ENOMEM; - break; - } - ret = blk_mq_alloc_rqs(set, hctx->sched_tags, i, q->nr_requests); + ret = blk_mq_sched_alloc_tags(q, hctx, i); if (ret) - break; + goto err; } - /* - * If we failed, free what we did allocate - */ - if (ret) { - queue_for_each_hw_ctx(q, hctx, i) { - if (!hctx->sched_tags) - continue; - blk_mq_sched_free_tags(set, hctx, i); - } - - return ret; - } + ret = e->ops.mq.init_sched(q, e); + if (ret) + goto err; return 0; -} -void blk_mq_sched_teardown(struct request_queue *q) -{ - struct blk_mq_tag_set *set = q->tag_set; - struct blk_mq_hw_ctx *hctx; - int i; - - queue_for_each_hw_ctx(q, hctx, i) - blk_mq_sched_free_tags(set, hctx, i); +err: + blk_mq_sched_teardown(q); + return ret; } int blk_mq_sched_init(struct request_queue *q) diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index a75b16b123f7..873f9af5a35b 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -32,7 +32,7 @@ void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx, struct list_head *rq_list, struct request *(*get_rq)(struct blk_mq_hw_ctx *)); -int blk_mq_sched_setup(struct request_queue *q); +int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e); void blk_mq_sched_teardown(struct request_queue *q); int blk_mq_sched_init(struct request_queue *q); diff --git a/block/elevator.c b/block/elevator.c index 01139f549b5b..f236ef1d2be9 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -242,17 +242,12 @@ int elevator_init(struct request_queue *q, char *name) } } - if (e->uses_mq) { - err = blk_mq_sched_setup(q); - if (!err) - err = e->ops.mq.init_sched(q, e); - } else + if (e->uses_mq) + err = blk_mq_init_sched(q, e); + else err = e->ops.sq.elevator_init_fn(q, e); - if (err) { - if (e->uses_mq) - blk_mq_sched_teardown(q); + if (err) elevator_put(e); - } return err; } EXPORT_SYMBOL(elevator_init); @@ -987,21 +982,18 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) } /* allocate, init and register new elevator */ - if (new_e) { - if (new_e->uses_mq) { - err = blk_mq_sched_setup(q); - if (!err) - err = new_e->ops.mq.init_sched(q, new_e); - } else - err = new_e->ops.sq.elevator_init_fn(q, new_e); - if (err) - goto fail_init; + if (q->mq_ops) + err = blk_mq_init_sched(q, new_e); + else + err = new_e->ops.sq.elevator_init_fn(q, new_e); + if (err) + goto fail_init; + if (new_e) { err = elv_register_queue(q); if (err) goto fail_register; - } else - q->elevator = NULL; + } /* done, kill the old one and finish */ if (old) { -- cgit v1.2.3 From 93252632e828da3e90241a1c0e766556abf71598 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 5 Apr 2017 12:01:31 -0700 Subject: blk-mq-sched: set up scheduler tags when bringing up new queues If a new hardware queue is added at runtime, we don't allocate scheduler tags for it, leading to a crash. This hooks up the scheduler framework to blk_mq_{init,exit}_hctx() to make sure everything gets properly initialized/freed. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 22 ++++++++++++++++++++++ block/blk-mq-sched.h | 5 +++++ block/blk-mq.c | 9 ++++++++- 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 6bd1758ea29b..0bb13bb51daa 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -461,6 +461,28 @@ void blk_mq_sched_teardown(struct request_queue *q) blk_mq_sched_free_tags(set, hctx, i); } +int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, + unsigned int hctx_idx) +{ + struct elevator_queue *e = q->elevator; + + if (!e) + return 0; + + return blk_mq_sched_alloc_tags(q, hctx, hctx_idx); +} + +void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, + unsigned int hctx_idx) +{ + struct elevator_queue *e = q->elevator; + + if (!e) + return; + + blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx); +} + int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) { struct blk_mq_hw_ctx *hctx; diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index 873f9af5a35b..19db25e0c95a 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -35,6 +35,11 @@ void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx, int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e); void blk_mq_sched_teardown(struct request_queue *q); +int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, + unsigned int hctx_idx); +void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, + unsigned int hctx_idx); + int blk_mq_sched_init(struct request_queue *q); static inline bool diff --git a/block/blk-mq.c b/block/blk-mq.c index 828f4bd193f2..72e744cd638c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1924,6 +1924,8 @@ static void blk_mq_exit_hctx(struct request_queue *q, hctx->fq->flush_rq, hctx_idx, flush_start_tag + hctx_idx); + blk_mq_sched_exit_hctx(q, hctx, hctx_idx); + if (set->ops->exit_hctx) set->ops->exit_hctx(hctx, hctx_idx); @@ -1990,9 +1992,12 @@ static int blk_mq_init_hctx(struct request_queue *q, set->ops->init_hctx(hctx, set->driver_data, hctx_idx)) goto free_bitmap; + if (blk_mq_sched_init_hctx(q, hctx, hctx_idx)) + goto exit_hctx; + hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size); if (!hctx->fq) - goto exit_hctx; + goto sched_exit_hctx; if (set->ops->init_request && set->ops->init_request(set->driver_data, @@ -2007,6 +2012,8 @@ static int blk_mq_init_hctx(struct request_queue *q, free_fq: kfree(hctx->fq); + sched_exit_hctx: + blk_mq_sched_exit_hctx(q, hctx, hctx_idx); exit_hctx: if (set->ops->exit_hctx) set->ops->exit_hctx(hctx, hctx_idx); -- cgit v1.2.3 From 54d5329d425650fafaf90660a139c771d2d49cae Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 7 Apr 2017 08:52:27 -0600 Subject: blk-mq-sched: fix crash in switch error path In elevator_switch(), if blk_mq_init_sched() fails, we attempt to fall back to the original scheduler. However, at this point, we've already torn down the original scheduler's tags, so this causes a crash. Doing the fallback like the legacy elevator path is much harder for mq, so fix it by just falling back to none, instead. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 13 +++++-- block/blk-mq-sched.h | 2 +- block/blk-mq.c | 2 -- block/blk-sysfs.c | 2 +- block/elevator.c | 94 +++++++++++++++++++++++++++--------------------- include/linux/elevator.h | 2 +- 6 files changed, 67 insertions(+), 48 deletions(-) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 0bb13bb51daa..e8c2ed654ef0 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -451,7 +451,7 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q, return ret; } -void blk_mq_sched_teardown(struct request_queue *q) +static void blk_mq_sched_tags_teardown(struct request_queue *q) { struct blk_mq_tag_set *set = q->tag_set; struct blk_mq_hw_ctx *hctx; @@ -513,10 +513,19 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) return 0; err: - blk_mq_sched_teardown(q); + blk_mq_sched_tags_teardown(q); + q->elevator = NULL; return ret; } +void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) +{ + if (e->type->ops.mq.exit_sched) + e->type->ops.mq.exit_sched(e); + blk_mq_sched_tags_teardown(q); + q->elevator = NULL; +} + int blk_mq_sched_init(struct request_queue *q) { int ret; diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index 19db25e0c95a..e704956e0862 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -33,7 +33,7 @@ void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx, struct request *(*get_rq)(struct blk_mq_hw_ctx *)); int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e); -void blk_mq_sched_teardown(struct request_queue *q); +void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e); int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx); diff --git a/block/blk-mq.c b/block/blk-mq.c index 72e744cd638c..cfb7c97b14ec 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2240,8 +2240,6 @@ void blk_mq_release(struct request_queue *q) struct blk_mq_hw_ctx *hctx; unsigned int i; - blk_mq_sched_teardown(q); - /* hctx kobj stays in hctx */ queue_for_each_hw_ctx(q, hctx, i) { if (!hctx) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index c44b321335f3..37f0b3ad635e 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -816,7 +816,7 @@ static void blk_release_queue(struct kobject *kobj) if (q->elevator) { ioc_clear_queue(q); - elevator_exit(q->elevator); + elevator_exit(q, q->elevator); } blk_exit_rl(&q->root_rl); diff --git a/block/elevator.c b/block/elevator.c index f236ef1d2be9..dbeecf7be719 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -252,11 +252,11 @@ int elevator_init(struct request_queue *q, char *name) } EXPORT_SYMBOL(elevator_init); -void elevator_exit(struct elevator_queue *e) +void elevator_exit(struct request_queue *q, struct elevator_queue *e) { mutex_lock(&e->sysfs_lock); if (e->uses_mq && e->type->ops.mq.exit_sched) - e->type->ops.mq.exit_sched(e); + blk_mq_exit_sched(q, e); else if (!e->uses_mq && e->type->ops.sq.elevator_exit_fn) e->type->ops.sq.elevator_exit_fn(e); mutex_unlock(&e->sysfs_lock); @@ -941,6 +941,45 @@ void elv_unregister(struct elevator_type *e) } EXPORT_SYMBOL_GPL(elv_unregister); +static int elevator_switch_mq(struct request_queue *q, + struct elevator_type *new_e) +{ + int ret; + + blk_mq_freeze_queue(q); + blk_mq_quiesce_queue(q); + + if (q->elevator) { + if (q->elevator->registered) + elv_unregister_queue(q); + ioc_clear_queue(q); + elevator_exit(q, q->elevator); + } + + ret = blk_mq_init_sched(q, new_e); + if (ret) + goto out; + + if (new_e) { + ret = elv_register_queue(q); + if (ret) { + elevator_exit(q, q->elevator); + goto out; + } + } + + if (new_e) + blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name); + else + blk_add_trace_msg(q, "elv switch: none"); + +out: + blk_mq_unfreeze_queue(q); + blk_mq_start_stopped_hw_queues(q, true); + return ret; + +} + /* * switch to new_e io scheduler. be careful not to introduce deadlocks - * we don't free the old io scheduler, before we have allocated what we @@ -953,10 +992,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) bool old_registered = false; int err; - if (q->mq_ops) { - blk_mq_freeze_queue(q); - blk_mq_quiesce_queue(q); - } + if (q->mq_ops) + return elevator_switch_mq(q, new_e); /* * Turn on BYPASS and drain all requests w/ elevator private data. @@ -968,11 +1005,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) if (old) { old_registered = old->registered; - if (old->uses_mq) - blk_mq_sched_teardown(q); - - if (!q->mq_ops) - blk_queue_bypass_start(q); + blk_queue_bypass_start(q); /* unregister and clear all auxiliary data of the old elevator */ if (old_registered) @@ -982,53 +1015,32 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) } /* allocate, init and register new elevator */ - if (q->mq_ops) - err = blk_mq_init_sched(q, new_e); - else - err = new_e->ops.sq.elevator_init_fn(q, new_e); + err = new_e->ops.sq.elevator_init_fn(q, new_e); if (err) goto fail_init; - if (new_e) { - err = elv_register_queue(q); - if (err) - goto fail_register; - } + err = elv_register_queue(q); + if (err) + goto fail_register; /* done, kill the old one and finish */ if (old) { - elevator_exit(old); - if (!q->mq_ops) - blk_queue_bypass_end(q); + elevator_exit(q, old); + blk_queue_bypass_end(q); } - if (q->mq_ops) { - blk_mq_unfreeze_queue(q); - blk_mq_start_stopped_hw_queues(q, true); - } - - if (new_e) - blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name); - else - blk_add_trace_msg(q, "elv switch: none"); + blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name); return 0; fail_register: - if (q->mq_ops) - blk_mq_sched_teardown(q); - elevator_exit(q->elevator); + elevator_exit(q, q->elevator); fail_init: /* switch failed, restore and re-register old elevator */ if (old) { q->elevator = old; elv_register_queue(q); - if (!q->mq_ops) - blk_queue_bypass_end(q); - } - if (q->mq_ops) { - blk_mq_unfreeze_queue(q); - blk_mq_start_stopped_hw_queues(q, true); + blk_queue_bypass_end(q); } return err; diff --git a/include/linux/elevator.h b/include/linux/elevator.h index aebecc4ed088..22d39e8d4de1 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -211,7 +211,7 @@ extern ssize_t elv_iosched_show(struct request_queue *, char *); extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t); extern int elevator_init(struct request_queue *, char *); -extern void elevator_exit(struct elevator_queue *); +extern void elevator_exit(struct request_queue *, struct elevator_queue *); extern int elevator_change(struct request_queue *, const char *); extern bool elv_bio_merge_ok(struct request *, struct bio *); extern struct elevator_queue *elevator_alloc(struct request_queue *, -- cgit v1.2.3 From ebe8bddb6e30d7a02775b9972099271fc5910f37 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 7 Apr 2017 08:53:11 -0600 Subject: blk-mq: remap queues when adding/removing hardware queues blk_mq_update_nr_hw_queues() used to remap hardware queues, which is the behavior that drivers expect. However, commit 4e68a011428a changed blk_mq_queue_reinit() to not remap queues for the case of CPU hotplugging, inadvertently making blk_mq_update_nr_hw_queues() not remap queues as well. This breaks, for example, NBD's multi-connection mode, leaving the added hardware queues unused. Fix it by making blk_mq_update_nr_hw_queues() explicitly remap the queues. Fixes: 4e68a011428a ("blk-mq: don't redistribute hardware queues on a CPU hotplug event") Reviewed-by: Keith Busch Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-mq.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index cfb7c97b14ec..f1be5c3eb600 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2570,6 +2570,14 @@ static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) return 0; } +static int blk_mq_update_queue_map(struct blk_mq_tag_set *set) +{ + if (set->ops->map_queues) + return set->ops->map_queues(set); + else + return blk_mq_map_queues(set); +} + /* * Alloc a tag set to be associated with one or more request queues. * May fail with EINVAL for various error conditions. May adjust the @@ -2624,10 +2632,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) if (!set->mq_map) goto out_free_tags; - if (set->ops->map_queues) - ret = set->ops->map_queues(set); - else - ret = blk_mq_map_queues(set); + ret = blk_mq_update_queue_map(set); if (ret) goto out_free_mq_map; @@ -2719,6 +2724,7 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) blk_mq_freeze_queue(q); set->nr_hw_queues = nr_hw_queues; + blk_mq_update_queue_map(set); list_for_each_entry(q, &set->tag_list, tag_set_list) { blk_mq_realloc_hw_ctxs(set, q); -- cgit v1.2.3 From 1680a3868f00be638a8a213a321e88d11ce7e9f7 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Fri, 7 Apr 2017 23:51:05 +0800 Subject: sysctl: add sanity check for proc_douintvec Commit e7d316a02f68 ("sysctl: handle error writing UINT_MAX to u32 fields") introduced the proc_douintvec helper function, but it forgot to add the related sanity check when doing register_sysctl_table. So add it now. Signed-off-by: Liping Zhang Cc: Subash Abhinov Kasiviswanathan Cc: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_sysctl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 8f91ec66baa3..d04ea4349909 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1074,6 +1074,7 @@ static int sysctl_check_table(const char *path, struct ctl_table *table) if ((table->proc_handler == proc_dostring) || (table->proc_handler == proc_dointvec) || + (table->proc_handler == proc_douintvec) || (table->proc_handler == proc_dointvec_minmax) || (table->proc_handler == proc_dointvec_jiffies) || (table->proc_handler == proc_dointvec_userhz_jiffies) || -- cgit v1.2.3 From 5380e5644afbba9e3d229c36771134976f05c91e Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Fri, 7 Apr 2017 23:51:06 +0800 Subject: sysctl: don't print negative flag for proc_douintvec I saw some very confusing sysctl output on my system: # cat /proc/sys/net/core/xfrm_aevent_rseqth -2 # cat /proc/sys/net/core/xfrm_aevent_etime -10 # cat /proc/sys/net/ipv4/tcp_notsent_lowat -4294967295 Because we forget to set the *negp flag in proc_douintvec, so it will become a garbage value. Since the value related to proc_douintvec is always an unsigned integer, so we can set *negp to false explictily to fix this issue. Fixes: e7d316a02f68 ("sysctl: handle error writing UINT_MAX to u32 fields") Signed-off-by: Liping Zhang Cc: Subash Abhinov Kasiviswanathan Cc: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index acf0a5a06da7..8cca4c7bb21f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2136,6 +2136,7 @@ static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp, *valp = *lvalp; } else { unsigned int val = *valp; + *negp = false; *lvalp = (unsigned long)val; } return 0; -- cgit v1.2.3 From 7587a5ae7eef0439f7be31f1b5959af062bbc5ec Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 7 Apr 2017 11:16:52 -0700 Subject: blk-mq: Introduce blk_mq_delay_run_hw_queue() Introduce a function that runs a hardware queue unconditionally after a delay. Note: there is already a function that stops and restarts a hardware queue after a delay, namely blk_mq_delay_queue(). This function will be used in the next patch in this series. Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Long Li Cc: K. Y. Srinivasan Signed-off-by: Jens Axboe --- block/blk-mq.c | 32 ++++++++++++++++++++++++++++++-- include/linux/blk-mq.h | 2 ++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index f1be5c3eb600..ad45ae743186 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1135,7 +1135,8 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx) return hctx->next_cpu; } -void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) +static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async, + unsigned long msecs) { if (unlikely(blk_mq_hctx_stopped(hctx) || !blk_mq_hw_queue_mapped(hctx))) @@ -1152,7 +1153,24 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) put_cpu(); } - kblockd_schedule_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work); + if (msecs == 0) + kblockd_schedule_work_on(blk_mq_hctx_next_cpu(hctx), + &hctx->run_work); + else + kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx), + &hctx->delayed_run_work, + msecs_to_jiffies(msecs)); +} + +void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) +{ + __blk_mq_delay_run_hw_queue(hctx, true, msecs); +} +EXPORT_SYMBOL(blk_mq_delay_run_hw_queue); + +void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) +{ + __blk_mq_delay_run_hw_queue(hctx, async, 0); } void blk_mq_run_hw_queues(struct request_queue *q, bool async) @@ -1255,6 +1273,15 @@ static void blk_mq_run_work_fn(struct work_struct *work) __blk_mq_run_hw_queue(hctx); } +static void blk_mq_delayed_run_work_fn(struct work_struct *work) +{ + struct blk_mq_hw_ctx *hctx; + + hctx = container_of(work, struct blk_mq_hw_ctx, delayed_run_work.work); + + __blk_mq_run_hw_queue(hctx); +} + static void blk_mq_delay_work_fn(struct work_struct *work) { struct blk_mq_hw_ctx *hctx; @@ -1962,6 +1989,7 @@ static int blk_mq_init_hctx(struct request_queue *q, node = hctx->numa_node = set->numa_node; INIT_WORK(&hctx->run_work, blk_mq_run_work_fn); + INIT_DELAYED_WORK(&hctx->delayed_run_work, blk_mq_delayed_run_work_fn); INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn); spin_lock_init(&hctx->lock); INIT_LIST_HEAD(&hctx->dispatch); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index b296a9006117..9382c5da7a2e 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -51,6 +51,7 @@ struct blk_mq_hw_ctx { atomic_t nr_active; + struct delayed_work delayed_run_work; struct delayed_work delay_work; struct hlist_node cpuhp_dead; @@ -238,6 +239,7 @@ void blk_mq_stop_hw_queues(struct request_queue *q); void blk_mq_start_hw_queues(struct request_queue *q); void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); +void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_run_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, -- cgit v1.2.3 From 36e3cf273977da34a760d513e1bef8431a9abaa0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 7 Apr 2017 11:16:53 -0700 Subject: scsi: Avoid that SCSI queues get stuck If a .queue_rq() function returns BLK_MQ_RQ_QUEUE_BUSY then the block driver that implements that function is responsible for rerunning the hardware queue once requests can be queued again successfully. commit 52d7f1b5c2f3 ("blk-mq: Avoid that requeueing starts stopped queues") removed the blk_mq_stop_hw_queue() call from scsi_queue_rq() for the BLK_MQ_RQ_QUEUE_BUSY case. Hence change all calls to functions that are intended to rerun a busy queue such that these examine all hardware queues instead of only stopped queues. Since no other functions than scsi_internal_device_block() and scsi_internal_device_unblock() should ever stop or restart a SCSI queue, change the blk_mq_delay_queue() call into a blk_mq_delay_run_hw_queue() call. Fixes: commit 52d7f1b5c2f3 ("blk-mq: Avoid that requeueing starts stopped queues") Fixes: commit 7e79dadce222 ("blk-mq: stop hardware queue in blk_mq_delay_queue()") Signed-off-by: Bart Van Assche Cc: Martin K. Petersen Cc: James Bottomley Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Sagi Grimberg Cc: Long Li Cc: K. Y. Srinivasan Signed-off-by: Jens Axboe --- drivers/scsi/scsi_lib.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 19125d72f322..e5a2d590a104 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -496,7 +496,7 @@ static void scsi_run_queue(struct request_queue *q) scsi_starved_list_run(sdev->host); if (q->mq_ops) - blk_mq_start_stopped_hw_queues(q, false); + blk_mq_run_hw_queues(q, false); else blk_run_queue(q); } @@ -667,7 +667,7 @@ static bool scsi_end_request(struct request *req, int error, !list_empty(&sdev->host->starved_list)) kblockd_schedule_work(&sdev->requeue_work); else - blk_mq_start_stopped_hw_queues(q, true); + blk_mq_run_hw_queues(q, true); } else { unsigned long flags; @@ -1974,7 +1974,7 @@ out: case BLK_MQ_RQ_QUEUE_BUSY: if (atomic_read(&sdev->device_busy) == 0 && !scsi_device_blocked(sdev)) - blk_mq_delay_queue(hctx, SCSI_QUEUE_DELAY); + blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY); break; case BLK_MQ_RQ_QUEUE_ERROR: /* -- cgit v1.2.3 From 6077c2d706097c00d8f2fed57d3f3c45cd228ee8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 7 Apr 2017 11:16:54 -0700 Subject: dm rq: Avoid that request processing stalls sporadically While running the srp-test software I noticed that request processing stalls sporadically at the beginning of a test, namely when mkfs is run against a dm-mpath device. Every time when that happened the following command was sufficient to resume request processing: echo run >/sys/kernel/debug/block/dm-0/state This patch avoids that such request processing stalls occur. The test I ran is as follows: while srp-test/run_tests -d -r 30 -t 02-mq; do :; done Signed-off-by: Bart Van Assche Cc: Mike Snitzer Cc: dm-devel@redhat.com Signed-off-by: Jens Axboe --- drivers/md/dm-rq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 28955b94d2b2..0b081d170087 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -755,6 +755,7 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, /* Undo dm_start_request() before requeuing */ rq_end_stats(md, rq); rq_completed(md, rq_data_dir(rq), false); + blk_mq_delay_run_hw_queue(hctx, 100/*ms*/); return BLK_MQ_RQ_QUEUE_BUSY; } -- cgit v1.2.3 From 6d8c6c0f97ad8a3517c42b179c1dc8e77397d0e2 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 7 Apr 2017 12:40:09 -0600 Subject: blk-mq: Restart a single queue if tag sets are shared To improve scalability, if hardware queues are shared, restart a single hardware queue in round-robin fashion. Rename blk_mq_sched_restart_queues() to reflect the new semantics. Remove blk_mq_sched_mark_restart_queue() because this function has no callers. Remove flag QUEUE_FLAG_RESTART because this patch removes the code that uses this flag. Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 63 ++++++++++++++++++++++++++++++++++++++++++-------- block/blk-mq-sched.h | 16 +------------ block/blk-mq.c | 2 +- include/linux/blkdev.h | 1 - 4 files changed, 55 insertions(+), 27 deletions(-) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index e8c2ed654ef0..c974a1bbf4cb 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -318,25 +318,68 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, return true; } -static void blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) +static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) { if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) { clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); - if (blk_mq_hctx_has_pending(hctx)) + if (blk_mq_hctx_has_pending(hctx)) { blk_mq_run_hw_queue(hctx, true); + return true; + } } + return false; } -void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx) -{ - struct request_queue *q = hctx->queue; - unsigned int i; +/** + * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list + * @pos: loop cursor. + * @skip: the list element that will not be examined. Iteration starts at + * @skip->next. + * @head: head of the list to examine. This list must have at least one + * element, namely @skip. + * @member: name of the list_head structure within typeof(*pos). + */ +#define list_for_each_entry_rcu_rr(pos, skip, head, member) \ + for ((pos) = (skip); \ + (pos = (pos)->member.next != (head) ? list_entry_rcu( \ + (pos)->member.next, typeof(*pos), member) : \ + list_entry_rcu((pos)->member.next->next, typeof(*pos), member)), \ + (pos) != (skip); ) - if (test_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) { - if (test_and_clear_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) { - queue_for_each_hw_ctx(q, hctx, i) - blk_mq_sched_restart_hctx(hctx); +/* + * Called after a driver tag has been freed to check whether a hctx needs to + * be restarted. Restarts @hctx if its tag set is not shared. Restarts hardware + * queues in a round-robin fashion if the tag set of @hctx is shared with other + * hardware queues. + */ +void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx) +{ + struct blk_mq_tags *const tags = hctx->tags; + struct blk_mq_tag_set *const set = hctx->queue->tag_set; + struct request_queue *const queue = hctx->queue, *q; + struct blk_mq_hw_ctx *hctx2; + unsigned int i, j; + + if (set->flags & BLK_MQ_F_TAG_SHARED) { + rcu_read_lock(); + list_for_each_entry_rcu_rr(q, queue, &set->tag_list, + tag_set_list) { + queue_for_each_hw_ctx(q, hctx2, i) + if (hctx2->tags == tags && + blk_mq_sched_restart_hctx(hctx2)) + goto done; + } + j = hctx->queue_num + 1; + for (i = 0; i < queue->nr_hw_queues; i++, j++) { + if (j == queue->nr_hw_queues) + j = 0; + hctx2 = queue->queue_hw_ctx[j]; + if (hctx2->tags == tags && + blk_mq_sched_restart_hctx(hctx2)) + break; } +done: + rcu_read_unlock(); } else { blk_mq_sched_restart_hctx(hctx); } diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index e704956e0862..3a9e6e40558b 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -19,7 +19,7 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, struct request **merged_request); bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio); bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq); -void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx); +void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx); void blk_mq_sched_insert_request(struct request *rq, bool at_head, bool run_queue, bool async, bool can_block); @@ -136,20 +136,6 @@ static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx) set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); } -/* - * Mark a hardware queue and the request queue it belongs to as needing a - * restart. - */ -static inline void blk_mq_sched_mark_restart_queue(struct blk_mq_hw_ctx *hctx) -{ - struct request_queue *q = hctx->queue; - - if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) - set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); - if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) - set_bit(QUEUE_FLAG_RESTART, &q->queue_flags); -} - static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx) { return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); diff --git a/block/blk-mq.c b/block/blk-mq.c index ad45ae743186..572966f49596 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -348,7 +348,7 @@ void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag); if (sched_tag != -1) blk_mq_sched_completed_request(hctx, rq); - blk_mq_sched_restart_queues(hctx); + blk_mq_sched_restart(hctx); blk_queue_exit(q); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5a7da607ca04..7548f332121a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -610,7 +610,6 @@ struct request_queue { #define QUEUE_FLAG_FLUSH_NQ 25 /* flush not queueuable */ #define QUEUE_FLAG_DAX 26 /* device supports DAX */ #define QUEUE_FLAG_STATS 27 /* track rq completion times */ -#define QUEUE_FLAG_RESTART 28 /* queue needs restart at completion */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ -- cgit v1.2.3 From cefdc26e86728812aea54248a534fd4a5da2a43d Mon Sep 17 00:00:00 2001 From: Martin Brandenburg Date: Thu, 6 Apr 2017 18:11:00 -0400 Subject: orangefs: move features validation to fix filesystem hang Without this fix (and another to the userspace component itself described later), the kernel will be unable to process any OrangeFS requests after the userspace component is restarted (due to a crash or at the administrator's behest). The bug here is that inside orangefs_remount, the orangefs_request_mutex is locked. When the userspace component restarts while the filesystem is mounted, it sends a ORANGEFS_DEV_REMOUNT_ALL ioctl to the device, which causes the kernel to send it a few requests aimed at synchronizing the state between the two. While this is happening the orangefs_request_mutex is locked to prevent any other requests going through. This is only half of the bugfix. The other half is in the userspace component which outright ignores(!) requests made before it considers the filesystem remounted, which is after the ioctl returns. Of course the ioctl doesn't return until after the userspace component responds to the request it ignores. The userspace component has been changed to allow ORANGEFS_VFS_OP_FEATURES regardless of the mount status. Mike Marshall says: "I've tested this patch against the fixed userspace part. This patch is real important, I hope it can make it into 4.11... Here's what happens when the userspace daemon is restarted, without the patch: ============================================= [ INFO: possible recursive locking detected ] [ 4.10.0-00007-ge98bdb3 #1 Not tainted ] --------------------------------------------- pvfs2-client-co/29032 is trying to acquire lock: (orangefs_request_mutex){+.+.+.}, at: service_operation+0x3c7/0x7b0 [orangefs] but task is already holding lock: (orangefs_request_mutex){+.+.+.}, at: dispatch_ioctl_command+0x1bf/0x330 [orangefs] CPU: 0 PID: 29032 Comm: pvfs2-client-co Not tainted 4.10.0-00007-ge98bdb3 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.3-1.fc25 04/01/2014 Call Trace: __lock_acquire+0x7eb/0x1290 lock_acquire+0xe8/0x1d0 mutex_lock_killable_nested+0x6f/0x6e0 service_operation+0x3c7/0x7b0 [orangefs] orangefs_remount+0xea/0x150 [orangefs] dispatch_ioctl_command+0x227/0x330 [orangefs] orangefs_devreq_ioctl+0x29/0x70 [orangefs] do_vfs_ioctl+0xa3/0x6e0 SyS_ioctl+0x79/0x90" Signed-off-by: Martin Brandenburg Acked-by: Mike Marshall Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- fs/orangefs/super.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 67c24351a67f..cd261c8de53a 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -263,8 +263,13 @@ int orangefs_remount(struct orangefs_sb_info_s *orangefs_sb) if (!new_op) return -ENOMEM; new_op->upcall.req.features.features = 0; - ret = service_operation(new_op, "orangefs_features", 0); - orangefs_features = new_op->downcall.resp.features.features; + ret = service_operation(new_op, "orangefs_features", + ORANGEFS_OP_PRIORITY | ORANGEFS_OP_NO_MUTEX); + if (!ret) + orangefs_features = + new_op->downcall.resp.features.features; + else + orangefs_features = 0; op_release(new_op); } else { orangefs_features = 0; -- cgit v1.2.3 From d75450ff40df0199bf13dfb19f435519ff947138 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 7 Apr 2017 16:04:39 -0700 Subject: mm: fix page_vma_mapped_walk() for ksm pages Doug Smythies reports oops with KSM in this backtrace, I've been seeing the same: page_vma_mapped_walk+0xe6/0x5b0 page_referenced_one+0x91/0x1a0 rmap_walk_ksm+0x100/0x190 rmap_walk+0x4f/0x60 page_referenced+0x149/0x170 shrink_active_list+0x1c2/0x430 shrink_node_memcg+0x67a/0x7a0 shrink_node+0xe1/0x320 kswapd+0x34b/0x720 Just as observed in commit 4b0ece6fa016 ("mm: migrate: fix remove_migration_pte() for ksm pages"), you cannot use page->index calculations on ksm pages. page_vma_mapped_walk() is relying on __vma_address(), where a ksm page can lead it off the end of the page table, and into whatever nonsense is in the next page, ending as an oops inside check_pte()'s pte_page(). KSM tells page_vma_mapped_walk() exactly where to look for the page, it does not need any page->index calculation: and that's so also for all the normal and file and anon pages - just not for THPs and their subpages. Get out early in most cases: instead of a PageKsm test, move down the earlier not-THP-page test, as suggested by Kirill. I'm also slightly worried that this loop can stray into other vmas, so added a vm_end test to prevent surprises; though I have not imagined anything worse than a very contrived case, in which a page mlocked in the next vma might be reclaimed because it is not mlocked in this vma. Fixes: ace71a19cec5 ("mm: introduce page_vma_mapped_walk()") Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1704031104400.1118@eggly.anvils Signed-off-by: Hugh Dickins Reported-by: Doug Smythies Tested-by: Doug Smythies Reviewed-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_vma_mapped.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index c4c9def8ffea..de9c40d7304a 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -111,12 +111,8 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) if (pvmw->pmd && !pvmw->pte) return not_found(pvmw); - /* Only for THP, seek to next pte entry makes sense */ - if (pvmw->pte) { - if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page)) - return not_found(pvmw); + if (pvmw->pte) goto next_pte; - } if (unlikely(PageHuge(pvmw->page))) { /* when pud is not present, pte will be NULL */ @@ -165,9 +161,14 @@ restart: while (1) { if (check_pte(pvmw)) return true; -next_pte: do { +next_pte: + /* Seek to next pte only makes sense for THP */ + if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page)) + return not_found(pvmw); + do { pvmw->address += PAGE_SIZE; - if (pvmw->address >= + if (pvmw->address >= pvmw->vma->vm_end || + pvmw->address >= __vma_address(pvmw->page, pvmw->vma) + hpage_nr_pages(pvmw->page) * PAGE_SIZE) return not_found(pvmw); -- cgit v1.2.3 From 045098e944959d4cbd56bbf33e2f26045863b7ca Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Fri, 7 Apr 2017 16:04:42 -0700 Subject: userfaultfd: report actual registered features in fdinfo fdinfo for userfault file descriptor reports UFFD_API_FEATURES. Up until recently, the UFFD_API_FEATURES was defined as 0, therefore corresponding field in fdinfo always contained zero. Now, with introduction of several additional features, UFFD_API_FEATURES is not longer 0 and it seems better to report actual features requested for the userfaultfd object described by the fdinfo. First, the applications that were using userfault will still see zero at the features field in fdinfo. Next, reporting actual features rather than available features, gives clear indication of what userfault features are used by an application. Link: http://lkml.kernel.org/r/1491140181-22121-1-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport Reviewed-by: Andrea Arcangeli Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 1d227b0fcf49..f7555fc25877 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1756,7 +1756,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f) * protocols: aa:... bb:... */ seq_printf(m, "pending:\t%lu\ntotal:\t%lu\nAPI:\t%Lx:%x:%Lx\n", - pending, total, UFFD_API, UFFD_API_FEATURES, + pending, total, UFFD_API, ctx->features, UFFD_API_IOCTLS|UFFD_API_RANGE_IOCTLS); } #endif -- cgit v1.2.3 From 1f06b81aea5ecba2c1f8afd87e0ba1b9f8f90160 Mon Sep 17 00:00:00 2001 From: Alexander Polakov Date: Fri, 7 Apr 2017 16:04:45 -0700 Subject: mm/page_alloc.c: fix print order in show_free_areas() Fixes: 11fb998986a72a ("mm: move most file-based accounting to the node") Link: http://lkml.kernel.org/r/1490377730.30219.2.camel@beget.ru Signed-off-by: Alexander Polyakov Acked-by: Michal Hocko Cc: Mel Gorman Cc: Vlastimil Babka Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6cbde310abed..d6a665057d61 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4519,13 +4519,13 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) K(node_page_state(pgdat, NR_FILE_MAPPED)), K(node_page_state(pgdat, NR_FILE_DIRTY)), K(node_page_state(pgdat, NR_WRITEBACK)), + K(node_page_state(pgdat, NR_SHMEM)), #ifdef CONFIG_TRANSPARENT_HUGEPAGE K(node_page_state(pgdat, NR_SHMEM_THPS) * HPAGE_PMD_NR), K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR), K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR), #endif - K(node_page_state(pgdat, NR_SHMEM)), K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), K(node_page_state(pgdat, NR_UNSTABLE_NFS)), node_page_state(pgdat, NR_PAGES_SCANNED), -- cgit v1.2.3 From d79bf21e0e02f8ad24219f0587cc599a7e67f6c6 Mon Sep 17 00:00:00 2001 From: Jessica Yu Date: Fri, 7 Apr 2017 16:04:48 -0700 Subject: vmlinux.lds: add missing VMLINUX_SYMBOL macros When __{start,end}_ro_after_init is referenced from C code, we run into the following build errors on blackfin: kernel/extable.c:169: undefined reference to `__start_ro_after_init' kernel/extable.c:169: undefined reference to `__end_ro_after_init' The build error is due to the fact that blackfin is one of the few arches that prepends an underscore '_' to all symbols defined in C. Fix this by wrapping __{start,end}_ro_after_init in vmlinux.lds.h with VMLINUX_SYMBOL(), which adds the necessary prefix for arches that have HAVE_UNDERSCORE_SYMBOL_PREFIX. Link: http://lkml.kernel.org/r/1491259387-15869-1-git-send-email-jeyu@redhat.com Signed-off-by: Jessica Yu Acked-by: Kees Cook Cc: Arnd Bergmann Cc: Eddie Kovsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/vmlinux.lds.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 7cdfe167074f..143db9c523e2 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -261,9 +261,9 @@ */ #ifndef RO_AFTER_INIT_DATA #define RO_AFTER_INIT_DATA \ - __start_ro_after_init = .; \ + VMLINUX_SYMBOL(__start_ro_after_init) = .; \ *(.data..ro_after_init) \ - __end_ro_after_init = .; + VMLINUX_SYMBOL(__end_ro_after_init) = .; #endif /* -- cgit v1.2.3 From 5402e97af667e35e54177af8f6575518bf251d51 Mon Sep 17 00:00:00 2001 From: "bsegall@google.com" Date: Fri, 7 Apr 2017 16:04:51 -0700 Subject: ptrace: fix PTRACE_LISTEN race corrupting task->state In PT_SEIZED + LISTEN mode STOP/CONT signals cause a wakeup against __TASK_TRACED. If this races with the ptrace_unfreeze_traced at the end of a PTRACE_LISTEN, this can wake the task /after/ the check against __TASK_TRACED, but before the reset of state to TASK_TRACED. This causes it to instead clobber TASK_WAKING, allowing a subsequent wakeup against TRACED while the task is still on the rq wake_list, corrupting it. Oleg said: "The kernel can crash or this can lead to other hard-to-debug problems. In short, "task->state = TASK_TRACED" in ptrace_unfreeze_traced() assumes that nobody else can wake it up, but PTRACE_LISTEN breaks the contract. Obviusly it is very wrong to manipulate task->state if this task is already running, or WAKING, or it sleeps again" [akpm@linux-foundation.org: coding-style fixes] Fixes: 9899d11f ("ptrace: ensure arch_ptrace/ptrace_request can never race with SIGKILL") Link: http://lkml.kernel.org/r/xm26y3vfhmkp.fsf_-_@bsegall-linux.mtv.corp.google.com Signed-off-by: Ben Segall Acked-by: Oleg Nesterov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/ptrace.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 0af928712174..266ddcc1d8bb 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -184,11 +184,17 @@ static void ptrace_unfreeze_traced(struct task_struct *task) WARN_ON(!task->ptrace || task->parent != current); + /* + * PTRACE_LISTEN can allow ptrace_trap_notify to wake us up remotely. + * Recheck state under the lock to close this race. + */ spin_lock_irq(&task->sighand->siglock); - if (__fatal_signal_pending(task)) - wake_up_state(task, __TASK_TRACED); - else - task->state = TASK_TRACED; + if (task->state == __TASK_TRACED) { + if (__fatal_signal_pending(task)) + wake_up_state(task, __TASK_TRACED); + else + task->state = TASK_TRACED; + } spin_unlock_irq(&task->sighand->siglock); } -- cgit v1.2.3 From 4fad7fb6b0279a85e16e6a63e0f1f7d98cedddf1 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Fri, 7 Apr 2017 16:04:54 -0700 Subject: mm, thp: fix setting of defer+madvise thp defrag mode Setting thp defrag mode of "defer+madvise" actually sets "defer" in the kernel due to the name similarity and the out-of-order way the string is checked in defrag_store(). Check the string in the correct order so that TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG is set appropriately for "defer+madvise". Fixes: 21440d7eb904 ("mm, thp: add new defer+madvise defrag option") Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1704051814420.137626@chino.kir.corp.google.com Signed-off-by: David Rientjes Acked-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Mel Gorman Cc: "Kirill A. Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 1ebc93e179f3..fef4cf210cc7 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -240,18 +240,18 @@ static ssize_t defrag_store(struct kobject *kobj, clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("defer", buf, - min(sizeof("defer")-1, count))) { - clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); - clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); - clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); - set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); } else if (!memcmp("defer+madvise", buf, min(sizeof("defer+madvise")-1, count))) { clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); + } else if (!memcmp("defer", buf, + min(sizeof("defer")-1, count))) { + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); + set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); } else if (!memcmp("madvise", buf, min(sizeof("madvise")-1, count))) { clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); -- cgit v1.2.3 From e11f8b7b6c4ea13bf8af6b8f42b45e15b554a92b Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 7 Apr 2017 16:04:57 -0700 Subject: dax: fix radix tree insertion race While running generic/340 in my test setup I hit the following race. It can happen with kernels that support FS DAX PMDs, so v4.10 thru v4.11-rc5. Thread 1 Thread 2 -------- -------- dax_iomap_pmd_fault() grab_mapping_entry() spin_lock_irq() get_unlocked_mapping_entry() 'entry' is NULL, can't call lock_slot() spin_unlock_irq() radix_tree_preload() dax_iomap_pmd_fault() grab_mapping_entry() spin_lock_irq() get_unlocked_mapping_entry() ... lock_slot() spin_unlock_irq() dax_pmd_insert_mapping() spin_lock_irq() __radix_tree_insert() fails with -EEXIST The issue is that we have to drop mapping->tree_lock while calling radix_tree_preload(), but since we didn't have a radix tree entry to lock (unlike in the pmd_downgrade case) we have no protection against Thread 2 coming along and inserting a PMD at the same index. For 4k entries we handled this with a special-case response to -EEXIST coming from the __radix_tree_insert(), but this doesn't save us for PMDs because the -EEXIST case can also mean that we collided with a 4k entry in the radix tree at a different index, but one that is covered by our PMD range. So, correctly handle both the 4k and 2M collision cases by explicitly re-checking the radix tree for an entry at our index once we reacquire mapping->tree_lock. This patch has made it through a clean xfstests run with the current v4.11-rc5 based linux/master, and it also ran generic/340 500 times in a loop. It used to fail within the first 10 iterations. Link: http://lkml.kernel.org/r/20170406212944.2866-1-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Cc: "Darrick J. Wong" Cc: Alexander Viro Cc: Christoph Hellwig Cc: Dan Williams Cc: Jan Kara Cc: Matthew Wilcox Cc: [4.10+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dax.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index de622d4282a6..85abd741253d 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -373,6 +373,22 @@ restart: } spin_lock_irq(&mapping->tree_lock); + if (!entry) { + /* + * We needed to drop the page_tree lock while calling + * radix_tree_preload() and we didn't have an entry to + * lock. See if another thread inserted an entry at + * our index during this time. + */ + entry = __radix_tree_lookup(&mapping->page_tree, index, + NULL, &slot); + if (entry) { + radix_tree_preload_end(); + spin_unlock_irq(&mapping->tree_lock); + goto restart; + } + } + if (pmd_downgrade) { radix_tree_delete(&mapping->page_tree, index); mapping->nrexceptional--; @@ -388,19 +404,12 @@ restart: if (err) { spin_unlock_irq(&mapping->tree_lock); /* - * Someone already created the entry? This is a - * normal failure when inserting PMDs in a range - * that already contains PTEs. In that case we want - * to return -EEXIST immediately. - */ - if (err == -EEXIST && !(size_flag & RADIX_DAX_PMD)) - goto restart; - /* - * Our insertion of a DAX PMD entry failed, most - * likely because it collided with a PTE sized entry - * at a different index in the PMD range. We haven't - * inserted anything into the radix tree and have no - * waiters to wake. + * Our insertion of a DAX entry failed, most likely + * because we were inserting a PMD entry and it + * collided with a PTE sized entry at a different + * index in the PMD range. We haven't inserted + * anything into the radix tree and have no waiters to + * wake. */ return ERR_PTR(err); } -- cgit v1.2.3 From 460bcec84e11c75122ace5976214abbc596eb91b Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Fri, 7 Apr 2017 16:05:00 -0700 Subject: mm, swap_cgroup: reschedule when neeed in swap_cgroup_swapoff() We got need_resched() warnings in swap_cgroup_swapoff() because swap_cgroup_ctrl[type].length is particularly large. Reschedule when needed. Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1704061315270.80559@chino.kir.corp.google.com Signed-off-by: David Rientjes Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swap_cgroup.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c index 310ac0b8f974..ac6318a064d3 100644 --- a/mm/swap_cgroup.c +++ b/mm/swap_cgroup.c @@ -201,6 +201,8 @@ void swap_cgroup_swapoff(int type) struct page *page = map[i]; if (page) __free_page(page); + if (!(i % SWAP_CLUSTER_MAX)) + cond_resched(); } vfree(map); } -- cgit v1.2.3 From cdcf4330d5660998d06fcd899b443693ab3d652f Mon Sep 17 00:00:00 2001 From: Jeffy Chen Date: Fri, 7 Apr 2017 16:05:02 -0700 Subject: mailmap: update Yakir Yang email address Set current email address to replace previous employers email addresses. Link: http://lkml.kernel.org/r/1491450722-6633-1-git-send-email-jeffy.chen@rock-chips.com Signed-off-by: Jeffy Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 67dc22ffc9a8..e229922dc7f0 100644 --- a/.mailmap +++ b/.mailmap @@ -171,6 +171,7 @@ Vlad Dogaru Vladimir Davydov Vladimir Davydov Takashi YOSHII +Yakir Yang Yusuke Goda Gustavo Padovan Gustavo Padovan -- cgit v1.2.3 From ce612879ddc78ea7e4de4be80cba4ebf9caa07ee Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 7 Apr 2017 16:05:05 -0700 Subject: mm: move pcp and lru-pcp draining into single wq We currently have 2 specific WQ_RECLAIM workqueues in the mm code. vmstat_wq for updating pcp stats and lru_add_drain_wq dedicated to drain per cpu lru caches. This seems more than necessary because both can run on a single WQ. Both do not block on locks requiring a memory allocation nor perform any allocations themselves. We will save one rescuer thread this way. On the other hand drain_all_pages() queues work on the system wq which doesn't have rescuer and so this depend on memory allocation (when all workers are stuck allocating and new ones cannot be created). Initially we thought this would be more of a theoretical problem but Hugh Dickins has reported: : 4.11-rc has been giving me hangs after hours of swapping load. At : first they looked like memory leaks ("fork: Cannot allocate memory"); : but for no good reason I happened to do "cat /proc/sys/vm/stat_refresh" : before looking at /proc/meminfo one time, and the stat_refresh stuck : in D state, waiting for completion of flush_work like many kworkers. : kthreadd waiting for completion of flush_work in drain_all_pages(). This worker should be using WQ_RECLAIM as well in order to guarantee a forward progress. We can reuse the same one as for lru draining and vmstat. Link: http://lkml.kernel.org/r/20170307131751.24936-1-mhocko@kernel.org Signed-off-by: Michal Hocko Suggested-by: Tetsuo Handa Acked-by: Vlastimil Babka Acked-by: Mel Gorman Tested-by: Yang Li Tested-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/internal.h | 7 +++++++ mm/page_alloc.c | 9 ++++++++- mm/swap.c | 27 ++++++++------------------- mm/vmstat.c | 15 +++++++++------ 4 files changed, 32 insertions(+), 26 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index ccfc2a2969f4..266efaeaa370 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -481,6 +481,13 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, enum ttu_flags; struct tlbflush_unmap_batch; + +/* + * only for MM internal work items which do not depend on + * any allocations or locks which might depend on allocations + */ +extern struct workqueue_struct *mm_percpu_wq; + #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH void try_to_unmap_flush(void); void try_to_unmap_flush_dirty(void); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d6a665057d61..f3d603cef2c0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2373,6 +2373,13 @@ void drain_all_pages(struct zone *zone) */ static cpumask_t cpus_with_pcps; + /* + * Make sure nobody triggers this path before mm_percpu_wq is fully + * initialized. + */ + if (WARN_ON_ONCE(!mm_percpu_wq)) + return; + /* Workqueues cannot recurse */ if (current->flags & PF_WQ_WORKER) return; @@ -2422,7 +2429,7 @@ void drain_all_pages(struct zone *zone) for_each_cpu(cpu, &cpus_with_pcps) { struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu); INIT_WORK(work, drain_local_pages_wq); - schedule_work_on(cpu, work); + queue_work_on(cpu, mm_percpu_wq, work); } for_each_cpu(cpu, &cpus_with_pcps) flush_work(per_cpu_ptr(&pcpu_drain, cpu)); diff --git a/mm/swap.c b/mm/swap.c index c4910f14f957..5dabf444d724 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -670,30 +670,19 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy) static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); -/* - * lru_add_drain_wq is used to do lru_add_drain_all() from a WQ_MEM_RECLAIM - * workqueue, aiding in getting memory freed. - */ -static struct workqueue_struct *lru_add_drain_wq; - -static int __init lru_init(void) -{ - lru_add_drain_wq = alloc_workqueue("lru-add-drain", WQ_MEM_RECLAIM, 0); - - if (WARN(!lru_add_drain_wq, - "Failed to create workqueue lru_add_drain_wq")) - return -ENOMEM; - - return 0; -} -early_initcall(lru_init); - void lru_add_drain_all(void) { static DEFINE_MUTEX(lock); static struct cpumask has_work; int cpu; + /* + * Make sure nobody triggers this path before mm_percpu_wq is fully + * initialized. + */ + if (WARN_ON(!mm_percpu_wq)) + return; + mutex_lock(&lock); get_online_cpus(); cpumask_clear(&has_work); @@ -707,7 +696,7 @@ void lru_add_drain_all(void) pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) || need_activate_page_drain(cpu)) { INIT_WORK(work, lru_add_drain_per_cpu); - queue_work_on(cpu, lru_add_drain_wq, work); + queue_work_on(cpu, mm_percpu_wq, work); cpumask_set_cpu(cpu, &has_work); } } diff --git a/mm/vmstat.c b/mm/vmstat.c index 89f95396ec46..809025ed97ea 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1552,7 +1552,6 @@ static const struct file_operations proc_vmstat_file_operations = { #endif /* CONFIG_PROC_FS */ #ifdef CONFIG_SMP -static struct workqueue_struct *vmstat_wq; static DEFINE_PER_CPU(struct delayed_work, vmstat_work); int sysctl_stat_interval __read_mostly = HZ; @@ -1623,7 +1622,7 @@ static void vmstat_update(struct work_struct *w) * to occur in the future. Keep on running the * update worker thread. */ - queue_delayed_work_on(smp_processor_id(), vmstat_wq, + queue_delayed_work_on(smp_processor_id(), mm_percpu_wq, this_cpu_ptr(&vmstat_work), round_jiffies_relative(sysctl_stat_interval)); } @@ -1702,7 +1701,7 @@ static void vmstat_shepherd(struct work_struct *w) struct delayed_work *dw = &per_cpu(vmstat_work, cpu); if (!delayed_work_pending(dw) && need_update(cpu)) - queue_delayed_work_on(cpu, vmstat_wq, dw, 0); + queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0); } put_online_cpus(); @@ -1718,7 +1717,6 @@ static void __init start_shepherd_timer(void) INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu), vmstat_update); - vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); schedule_delayed_work(&shepherd, round_jiffies_relative(sysctl_stat_interval)); } @@ -1764,11 +1762,16 @@ static int vmstat_cpu_dead(unsigned int cpu) #endif +struct workqueue_struct *mm_percpu_wq; + void __init init_mm_internals(void) { -#ifdef CONFIG_SMP - int ret; + int ret __maybe_unused; + mm_percpu_wq = alloc_workqueue("mm_percpu_wq", + WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + +#ifdef CONFIG_SMP ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead", NULL, vmstat_cpu_dead); if (ret < 0) -- cgit v1.2.3 From 97fbfef6bd597888485b653175fb846c6998b60c Mon Sep 17 00:00:00 2001 From: Shuxiao Zhang Date: Thu, 6 Apr 2017 22:30:29 +0800 Subject: staging: android: ashmem: lseek failed due to no FMODE_LSEEK. vfs_llseek will check whether the file mode has FMODE_LSEEK, no return failure. But ashmem can be lseek, so add FMODE_LSEEK to ashmem file. Comment From Greg Hackmann: ashmem_llseek() passes the llseek() call through to the backing shmem file. 91360b02ab48 ("ashmem: use vfs_llseek()") changed this from directly calling the file's llseek() op into a VFS layer call. This also adds a check for the FMODE_LSEEK bit, so without that bit ashmem_llseek() now always fails with -ESPIPE. Fixes: 91360b02ab48 ("ashmem: use vfs_llseek()") Signed-off-by: Shuxiao Zhang Tested-by: Greg Hackmann Cc: stable # 3.18+ Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ashmem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 7cbad0d45b9c..6ba270e0494d 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -409,6 +409,7 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma) ret = PTR_ERR(vmfile); goto out; } + vmfile->f_mode |= FMODE_LSEEK; asma->file = vmfile; } get_file(asma->file); -- cgit v1.2.3 From cf903e9d3a97f89b224d2d07be37c0f160db8192 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 3 Apr 2017 15:53:34 +0200 Subject: Documentation: stable-kernel-rules: fix stable-tag format A patch documenting how to specify which kernels a particular fix should be backported to (seemingly) inadvertently added a minus sign after the kernel version. This particular stable-tag format had never been used prior to this patch, and was neither present when the patch in question was first submitted (it was added in v2 without any comment). Drop the minus sign to avoid any confusion. Fixes: fdc81b7910ad ("stable_kernel_rules: Add clause about specification of kernel versions to patch.") Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- Documentation/process/stable-kernel-rules.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/process/stable-kernel-rules.rst b/Documentation/process/stable-kernel-rules.rst index 11ec2d93a5e0..61e9c78bd6d1 100644 --- a/Documentation/process/stable-kernel-rules.rst +++ b/Documentation/process/stable-kernel-rules.rst @@ -124,7 +124,7 @@ specified in the following format in the sign-off area: .. code-block:: none - Cc: # 3.3.x- + Cc: # 3.3.x The tag has the meaning of: -- cgit v1.2.3 From c8a139d001a1aab1ea8734db14b22dac9dd143b6 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 3 Apr 2017 11:30:34 +1000 Subject: sysfs: be careful of error returns from ops->show() ops->show() can return a negative error code. Commit 65da3484d9be ("sysfs: correctly handle short reads on PREALLOC attrs.") (in v4.4) caused this to be stored in an unsigned 'size_t' variable, so errors would look like large numbers. As a result, if an error is returned, sysfs_kf_read() will return the value of 'count', typically 4096. Commit 17d0774f8068 ("sysfs: correctly handle read offset on PREALLOC attrs") (in v4.8) extended this error to use the unsigned large 'len' as a size for memmove(). Consequently, if ->show returns an error, then the first read() on the sysfs file will return 4096 and could return uninitialized memory to user-space. If the application performs a subsequent read, this will trigger a memmove() with extremely large count, and is likely to crash the machine is bizarre ways. This bug can currently only be triggered by reading from an md sysfs attribute declared with __ATTR_PREALLOC() during the brief period between when mddev_put() deletes an mddev from the ->all_mddevs list, and when mddev_delayed_delete() - which is scheduled on a workqueue - completes. Before this, an error won't be returned by the ->show() After this, the ->show() won't be called. I can reproduce it reliably only by putting delay like usleep_range(500000,700000); early in mddev_delayed_delete(). Then after creating an md device md0 run echo clear > /sys/block/md0/md/array_state; cat /sys/block/md0/md/array_state The bug can be triggered without the usleep. Fixes: 65da3484d9be ("sysfs: correctly handle short reads on PREALLOC attrs.") Fixes: 17d0774f8068 ("sysfs: correctly handle read offset on PREALLOC attrs") Cc: stable@vger.kernel.org Signed-off-by: NeilBrown Acked-by: Tejun Heo Reported-and-tested-by: Miroslav Benes Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/file.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index b803213d1307..39c75a86c67f 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -108,7 +108,7 @@ static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf, { const struct sysfs_ops *ops = sysfs_file_ops(of->kn); struct kobject *kobj = of->kn->parent->priv; - size_t len; + ssize_t len; /* * If buf != of->prealloc_buf, we don't know how @@ -117,13 +117,15 @@ static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf, if (WARN_ON_ONCE(buf != of->prealloc_buf)) return 0; len = ops->show(kobj, of->kn->priv, buf); + if (len < 0) + return len; if (pos) { if (len <= pos) return 0; len -= pos; memmove(buf, buf + pos, len); } - return min(count, len); + return min_t(ssize_t, count, len); } /* kernfs write callback for regular sysfs files */ -- cgit v1.2.3 From 27f395b857abee5bced8356d39e1a491ff08748a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Mar 2017 13:34:47 -0400 Subject: MAINTAINERS: separate out kernfs maintainership Separate out kernfs from driver core and add myself as a co-maintainer. Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index c45c02bc6082..6af8df4b8680 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4117,14 +4117,13 @@ F: drivers/block/drbd/ F: lib/lru_cache.c F: Documentation/blockdev/drbd/ -DRIVER CORE, KOBJECTS, DEBUGFS, KERNFS AND SYSFS +DRIVER CORE, KOBJECTS, DEBUGFS AND SYSFS M: Greg Kroah-Hartman T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git S: Supported F: Documentation/kobject.txt F: drivers/base/ F: fs/debugfs/ -F: fs/kernfs/ F: fs/sysfs/ F: include/linux/debugfs.h F: include/linux/kobj* @@ -7202,6 +7201,14 @@ F: arch/mips/include/uapi/asm/kvm* F: arch/mips/include/asm/kvm* F: arch/mips/kvm/ +KERNFS +M: Greg Kroah-Hartman +M: Tejun Heo +T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git +S: Supported +F: include/linux/kernfs.h +F: fs/kernfs/ + KEXEC M: Eric Biederman W: http://kernel.org/pub/linux/utils/kernel/kexec/ -- cgit v1.2.3 From 425fffd886bae3d127a08fa6a17f2e31e24ed7ff Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Fri, 7 Apr 2017 23:51:07 +0800 Subject: sysctl: report EINVAL if value is larger than UINT_MAX for proc_douintvec Currently, inputting the following command will succeed but actually the value will be truncated: # echo 0x12ffffffff > /proc/sys/net/ipv4/tcp_notsent_lowat This is not friendly to the user, so instead, we should report error when the value is larger than UINT_MAX. Fixes: e7d316a02f68 ("sysctl: handle error writing UINT_MAX to u32 fields") Signed-off-by: Liping Zhang Cc: Subash Abhinov Kasiviswanathan Cc: Andrew Morton Cc: Eric W. Biederman Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8cca4c7bb21f..8c8714fcb53c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2133,6 +2133,8 @@ static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp, if (write) { if (*negp) return -EINVAL; + if (*lvalp > UINT_MAX) + return -EINVAL; *valp = *lvalp; } else { unsigned int val = *valp; -- cgit v1.2.3 From cf01fb9985e8deb25ccf0ea54d916b8871ae0e62 Mon Sep 17 00:00:00 2001 From: Chris Salls Date: Fri, 7 Apr 2017 23:48:11 -0700 Subject: mm/mempolicy.c: fix error handling in set_mempolicy and mbind. In the case that compat_get_bitmap fails we do not want to copy the bitmap to the user as it will contain uninitialized stack data and leak sensitive data. Signed-off-by: Chris Salls Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 75b2745bac41..37d0b334bfe9 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1529,7 +1529,6 @@ COMPAT_SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, COMPAT_SYSCALL_DEFINE3(set_mempolicy, int, mode, compat_ulong_t __user *, nmask, compat_ulong_t, maxnode) { - long err = 0; unsigned long __user *nm = NULL; unsigned long nr_bits, alloc_size; DECLARE_BITMAP(bm, MAX_NUMNODES); @@ -1538,14 +1537,13 @@ COMPAT_SYSCALL_DEFINE3(set_mempolicy, int, mode, compat_ulong_t __user *, nmask, alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8; if (nmask) { - err = compat_get_bitmap(bm, nmask, nr_bits); + if (compat_get_bitmap(bm, nmask, nr_bits)) + return -EFAULT; nm = compat_alloc_user_space(alloc_size); - err |= copy_to_user(nm, bm, alloc_size); + if (copy_to_user(nm, bm, alloc_size)) + return -EFAULT; } - if (err) - return -EFAULT; - return sys_set_mempolicy(mode, nm, nr_bits+1); } @@ -1553,7 +1551,6 @@ COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len, compat_ulong_t, mode, compat_ulong_t __user *, nmask, compat_ulong_t, maxnode, compat_ulong_t, flags) { - long err = 0; unsigned long __user *nm = NULL; unsigned long nr_bits, alloc_size; nodemask_t bm; @@ -1562,14 +1559,13 @@ COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len, alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8; if (nmask) { - err = compat_get_bitmap(nodes_addr(bm), nmask, nr_bits); + if (compat_get_bitmap(nodes_addr(bm), nmask, nr_bits)) + return -EFAULT; nm = compat_alloc_user_space(alloc_size); - err |= copy_to_user(nm, nodes_addr(bm), alloc_size); + if (copy_to_user(nm, nodes_addr(bm), alloc_size)) + return -EFAULT; } - if (err) - return -EFAULT; - return sys_mbind(start, len, mode, nm, nr_bits+1, flags); } -- cgit v1.2.3 From 39da7c509acff13fc8cb12ec1bb20337c988ed36 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 9 Apr 2017 09:49:44 -0700 Subject: Linux 4.11-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7acbcb324bae..efa267a92ba6 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit v1.2.3 From dbc9d69edfa0fc3b44156f32747a5add3fbdfb8b Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Wed, 22 Mar 2017 17:03:31 +0900 Subject: pinctrl: samsung: Add missing part for PINCFG_TYPE_DRV of Exynos5433 The commit 1259feddd0f8("pinctrl: samsung: Fix the width of PINCFG_TYPE_DRV bitfields for Exynos5433") already fixed the different width of PINCFG_TYPE_DRV from previous Exynos SoC. However wrong merge conflict resolution was chosen in commit 7f36f5d11cda ("Merge tag 'v4.10-rc6' into devel") effectively dropping the changes for PINCFG_TYPE_DRV. Re-do them here. The macro EXYNOS_PIN_BANK_EINTW is no longer used so remove it. Fixes: 7f36f5d11cda ("Merge tag 'v4.10-rc6' into devel") Signed-off-by: Chanwoo Choi Signed-off-by: Krzysztof Kozlowski Signed-off-by: Linus Walleij --- drivers/pinctrl/samsung/pinctrl-exynos.c | 80 ++++++++++++++++---------------- drivers/pinctrl/samsung/pinctrl-exynos.h | 11 ----- 2 files changed, 40 insertions(+), 51 deletions(-) diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.c b/drivers/pinctrl/samsung/pinctrl-exynos.c index f9b49967f512..63e51b56a22a 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos.c @@ -1468,82 +1468,82 @@ const struct samsung_pin_ctrl exynos5420_pin_ctrl[] __initconst = { /* pin banks of exynos5433 pin-controller - ALIVE */ static const struct samsung_pin_bank_data exynos5433_pin_banks0[] __initconst = { - EXYNOS_PIN_BANK_EINTW(8, 0x000, "gpa0", 0x00), - EXYNOS_PIN_BANK_EINTW(8, 0x020, "gpa1", 0x04), - EXYNOS_PIN_BANK_EINTW(8, 0x040, "gpa2", 0x08), - EXYNOS_PIN_BANK_EINTW(8, 0x060, "gpa3", 0x0c), - EXYNOS_PIN_BANK_EINTW_EXT(8, 0x020, "gpf1", 0x1004, 1), - EXYNOS_PIN_BANK_EINTW_EXT(4, 0x040, "gpf2", 0x1008, 1), - EXYNOS_PIN_BANK_EINTW_EXT(4, 0x060, "gpf3", 0x100c, 1), - EXYNOS_PIN_BANK_EINTW_EXT(8, 0x080, "gpf4", 0x1010, 1), - EXYNOS_PIN_BANK_EINTW_EXT(8, 0x0a0, "gpf5", 0x1014, 1), + EXYNOS5433_PIN_BANK_EINTW(8, 0x000, "gpa0", 0x00), + EXYNOS5433_PIN_BANK_EINTW(8, 0x020, "gpa1", 0x04), + EXYNOS5433_PIN_BANK_EINTW(8, 0x040, "gpa2", 0x08), + EXYNOS5433_PIN_BANK_EINTW(8, 0x060, "gpa3", 0x0c), + EXYNOS5433_PIN_BANK_EINTW_EXT(8, 0x020, "gpf1", 0x1004, 1), + EXYNOS5433_PIN_BANK_EINTW_EXT(4, 0x040, "gpf2", 0x1008, 1), + EXYNOS5433_PIN_BANK_EINTW_EXT(4, 0x060, "gpf3", 0x100c, 1), + EXYNOS5433_PIN_BANK_EINTW_EXT(8, 0x080, "gpf4", 0x1010, 1), + EXYNOS5433_PIN_BANK_EINTW_EXT(8, 0x0a0, "gpf5", 0x1014, 1), }; /* pin banks of exynos5433 pin-controller - AUD */ static const struct samsung_pin_bank_data exynos5433_pin_banks1[] __initconst = { - EXYNOS_PIN_BANK_EINTG(7, 0x000, "gpz0", 0x00), - EXYNOS_PIN_BANK_EINTG(4, 0x020, "gpz1", 0x04), + EXYNOS5433_PIN_BANK_EINTG(7, 0x000, "gpz0", 0x00), + EXYNOS5433_PIN_BANK_EINTG(4, 0x020, "gpz1", 0x04), }; /* pin banks of exynos5433 pin-controller - CPIF */ static const struct samsung_pin_bank_data exynos5433_pin_banks2[] __initconst = { - EXYNOS_PIN_BANK_EINTG(2, 0x000, "gpv6", 0x00), + EXYNOS5433_PIN_BANK_EINTG(2, 0x000, "gpv6", 0x00), }; /* pin banks of exynos5433 pin-controller - eSE */ static const struct samsung_pin_bank_data exynos5433_pin_banks3[] __initconst = { - EXYNOS_PIN_BANK_EINTG(3, 0x000, "gpj2", 0x00), + EXYNOS5433_PIN_BANK_EINTG(3, 0x000, "gpj2", 0x00), }; /* pin banks of exynos5433 pin-controller - FINGER */ static const struct samsung_pin_bank_data exynos5433_pin_banks4[] __initconst = { - EXYNOS_PIN_BANK_EINTG(4, 0x000, "gpd5", 0x00), + EXYNOS5433_PIN_BANK_EINTG(4, 0x000, "gpd5", 0x00), }; /* pin banks of exynos5433 pin-controller - FSYS */ static const struct samsung_pin_bank_data exynos5433_pin_banks5[] __initconst = { - EXYNOS_PIN_BANK_EINTG(6, 0x000, "gph1", 0x00), - EXYNOS_PIN_BANK_EINTG(7, 0x020, "gpr4", 0x04), - EXYNOS_PIN_BANK_EINTG(5, 0x040, "gpr0", 0x08), - EXYNOS_PIN_BANK_EINTG(8, 0x060, "gpr1", 0x0c), - EXYNOS_PIN_BANK_EINTG(2, 0x080, "gpr2", 0x10), - EXYNOS_PIN_BANK_EINTG(8, 0x0a0, "gpr3", 0x14), + EXYNOS5433_PIN_BANK_EINTG(6, 0x000, "gph1", 0x00), + EXYNOS5433_PIN_BANK_EINTG(7, 0x020, "gpr4", 0x04), + EXYNOS5433_PIN_BANK_EINTG(5, 0x040, "gpr0", 0x08), + EXYNOS5433_PIN_BANK_EINTG(8, 0x060, "gpr1", 0x0c), + EXYNOS5433_PIN_BANK_EINTG(2, 0x080, "gpr2", 0x10), + EXYNOS5433_PIN_BANK_EINTG(8, 0x0a0, "gpr3", 0x14), }; /* pin banks of exynos5433 pin-controller - IMEM */ static const struct samsung_pin_bank_data exynos5433_pin_banks6[] __initconst = { - EXYNOS_PIN_BANK_EINTG(8, 0x000, "gpf0", 0x00), + EXYNOS5433_PIN_BANK_EINTG(8, 0x000, "gpf0", 0x00), }; /* pin banks of exynos5433 pin-controller - NFC */ static const struct samsung_pin_bank_data exynos5433_pin_banks7[] __initconst = { - EXYNOS_PIN_BANK_EINTG(3, 0x000, "gpj0", 0x00), + EXYNOS5433_PIN_BANK_EINTG(3, 0x000, "gpj0", 0x00), }; /* pin banks of exynos5433 pin-controller - PERIC */ static const struct samsung_pin_bank_data exynos5433_pin_banks8[] __initconst = { - EXYNOS_PIN_BANK_EINTG(6, 0x000, "gpv7", 0x00), - EXYNOS_PIN_BANK_EINTG(5, 0x020, "gpb0", 0x04), - EXYNOS_PIN_BANK_EINTG(8, 0x040, "gpc0", 0x08), - EXYNOS_PIN_BANK_EINTG(2, 0x060, "gpc1", 0x0c), - EXYNOS_PIN_BANK_EINTG(6, 0x080, "gpc2", 0x10), - EXYNOS_PIN_BANK_EINTG(8, 0x0a0, "gpc3", 0x14), - EXYNOS_PIN_BANK_EINTG(2, 0x0c0, "gpg0", 0x18), - EXYNOS_PIN_BANK_EINTG(4, 0x0e0, "gpd0", 0x1c), - EXYNOS_PIN_BANK_EINTG(6, 0x100, "gpd1", 0x20), - EXYNOS_PIN_BANK_EINTG(8, 0x120, "gpd2", 0x24), - EXYNOS_PIN_BANK_EINTG(5, 0x140, "gpd4", 0x28), - EXYNOS_PIN_BANK_EINTG(2, 0x160, "gpd8", 0x2c), - EXYNOS_PIN_BANK_EINTG(7, 0x180, "gpd6", 0x30), - EXYNOS_PIN_BANK_EINTG(3, 0x1a0, "gpd7", 0x34), - EXYNOS_PIN_BANK_EINTG(5, 0x1c0, "gpg1", 0x38), - EXYNOS_PIN_BANK_EINTG(2, 0x1e0, "gpg2", 0x3c), - EXYNOS_PIN_BANK_EINTG(8, 0x200, "gpg3", 0x40), + EXYNOS5433_PIN_BANK_EINTG(6, 0x000, "gpv7", 0x00), + EXYNOS5433_PIN_BANK_EINTG(5, 0x020, "gpb0", 0x04), + EXYNOS5433_PIN_BANK_EINTG(8, 0x040, "gpc0", 0x08), + EXYNOS5433_PIN_BANK_EINTG(2, 0x060, "gpc1", 0x0c), + EXYNOS5433_PIN_BANK_EINTG(6, 0x080, "gpc2", 0x10), + EXYNOS5433_PIN_BANK_EINTG(8, 0x0a0, "gpc3", 0x14), + EXYNOS5433_PIN_BANK_EINTG(2, 0x0c0, "gpg0", 0x18), + EXYNOS5433_PIN_BANK_EINTG(4, 0x0e0, "gpd0", 0x1c), + EXYNOS5433_PIN_BANK_EINTG(6, 0x100, "gpd1", 0x20), + EXYNOS5433_PIN_BANK_EINTG(8, 0x120, "gpd2", 0x24), + EXYNOS5433_PIN_BANK_EINTG(5, 0x140, "gpd4", 0x28), + EXYNOS5433_PIN_BANK_EINTG(2, 0x160, "gpd8", 0x2c), + EXYNOS5433_PIN_BANK_EINTG(7, 0x180, "gpd6", 0x30), + EXYNOS5433_PIN_BANK_EINTG(3, 0x1a0, "gpd7", 0x34), + EXYNOS5433_PIN_BANK_EINTG(5, 0x1c0, "gpg1", 0x38), + EXYNOS5433_PIN_BANK_EINTG(2, 0x1e0, "gpg2", 0x3c), + EXYNOS5433_PIN_BANK_EINTG(8, 0x200, "gpg3", 0x40), }; /* pin banks of exynos5433 pin-controller - TOUCH */ static const struct samsung_pin_bank_data exynos5433_pin_banks9[] __initconst = { - EXYNOS_PIN_BANK_EINTG(3, 0x000, "gpj1", 0x00), + EXYNOS5433_PIN_BANK_EINTG(3, 0x000, "gpj1", 0x00), }; /* diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.h b/drivers/pinctrl/samsung/pinctrl-exynos.h index a473092fb8d2..cd046eb7d705 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos.h +++ b/drivers/pinctrl/samsung/pinctrl-exynos.h @@ -79,17 +79,6 @@ .name = id \ } -#define EXYNOS_PIN_BANK_EINTW_EXT(pins, reg, id, offs, pctl_idx) \ - { \ - .type = &bank_type_alive, \ - .pctl_offset = reg, \ - .nr_pins = pins, \ - .eint_type = EINT_TYPE_WKUP, \ - .eint_offset = offs, \ - .name = id, \ - .pctl_res_idx = pctl_idx, \ - } \ - #define EXYNOS5433_PIN_BANK_EINTG(pins, reg, id, offs) \ { \ .type = &exynos5433_bank_type_off, \ -- cgit v1.2.3 From 264d509637d95f9404e52ced5003ad352e0f6a26 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 10 Apr 2017 11:16:59 -0400 Subject: audit: make sure we don't let the retry queue grow without bounds The retry queue is intended to provide a temporary buffer in the case of transient errors when communicating with auditd, it is not meant as a long life queue, that functionality is provided by the hold queue. This patch fixes a problem identified by Seth where the retry queue could grow uncontrollably if an auditd instance did not connect to the kernel to drain the queues. This commit fixes this by doing the following: * Make sure we always call auditd_reset() if we decide the connection with audit is really dead. There were some cases in kauditd_hold_skb() where we did not reset the connection, this patch relocates the reset calls to kauditd_thread() so all the error conditions are caught and the connection reset. As a side effect, this means we could move auditd_reset() and get rid of the forward definition at the top of kernel/audit.c. * We never checked the status of the auditd connection when processing the main audit queue which meant that the retry queue could grow unchecked. This patch adds a call to auditd_reset() after the main queue has been processed if auditd is not connected, the auditd_reset() call will make sure the retry and hold queues are correctly managed/flushed so that the retry queue remains reasonable. Cc: # 4.10.x-: 5b52330bbfe6 Reported-by: Seth Forshee Signed-off-by: Paul Moore --- kernel/audit.c | 67 ++++++++++++++++++++++++++++------------------------------ 1 file changed, 32 insertions(+), 35 deletions(-) diff --git a/kernel/audit.c b/kernel/audit.c index 2f4964cfde0b..a871bf80fde1 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -160,7 +160,6 @@ static LIST_HEAD(audit_freelist); /* queue msgs to send via kauditd_task */ static struct sk_buff_head audit_queue; -static void kauditd_hold_skb(struct sk_buff *skb); /* queue msgs due to temporary unicast send problems */ static struct sk_buff_head audit_retry_queue; /* queue msgs waiting for new auditd connection */ @@ -453,30 +452,6 @@ static void auditd_set(int pid, u32 portid, struct net *net) spin_unlock_irqrestore(&auditd_conn.lock, flags); } -/** - * auditd_reset - Disconnect the auditd connection - * - * Description: - * Break the auditd/kauditd connection and move all the queued records into the - * hold queue in case auditd reconnects. - */ -static void auditd_reset(void) -{ - struct sk_buff *skb; - - /* if it isn't already broken, break the connection */ - rcu_read_lock(); - if (auditd_conn.pid) - auditd_set(0, 0, NULL); - rcu_read_unlock(); - - /* flush all of the main and retry queues to the hold queue */ - while ((skb = skb_dequeue(&audit_retry_queue))) - kauditd_hold_skb(skb); - while ((skb = skb_dequeue(&audit_queue))) - kauditd_hold_skb(skb); -} - /** * kauditd_print_skb - Print the audit record to the ring buffer * @skb: audit record @@ -505,9 +480,6 @@ static void kauditd_rehold_skb(struct sk_buff *skb) { /* put the record back in the queue at the same place */ skb_queue_head(&audit_hold_queue, skb); - - /* fail the auditd connection */ - auditd_reset(); } /** @@ -544,9 +516,6 @@ static void kauditd_hold_skb(struct sk_buff *skb) /* we have no other options - drop the message */ audit_log_lost("kauditd hold queue overflow"); kfree_skb(skb); - - /* fail the auditd connection */ - auditd_reset(); } /** @@ -566,6 +535,30 @@ static void kauditd_retry_skb(struct sk_buff *skb) skb_queue_tail(&audit_retry_queue, skb); } +/** + * auditd_reset - Disconnect the auditd connection + * + * Description: + * Break the auditd/kauditd connection and move all the queued records into the + * hold queue in case auditd reconnects. + */ +static void auditd_reset(void) +{ + struct sk_buff *skb; + + /* if it isn't already broken, break the connection */ + rcu_read_lock(); + if (auditd_conn.pid) + auditd_set(0, 0, NULL); + rcu_read_unlock(); + + /* flush all of the main and retry queues to the hold queue */ + while ((skb = skb_dequeue(&audit_retry_queue))) + kauditd_hold_skb(skb); + while ((skb = skb_dequeue(&audit_queue))) + kauditd_hold_skb(skb); +} + /** * auditd_send_unicast_skb - Send a record via unicast to auditd * @skb: audit record @@ -758,6 +751,7 @@ static int kauditd_thread(void *dummy) NULL, kauditd_rehold_skb); if (rc < 0) { sk = NULL; + auditd_reset(); goto main_queue; } @@ -767,6 +761,7 @@ static int kauditd_thread(void *dummy) NULL, kauditd_hold_skb); if (rc < 0) { sk = NULL; + auditd_reset(); goto main_queue; } @@ -775,16 +770,18 @@ main_queue: * unicast, dump failed record sends to the retry queue; if * sk == NULL due to previous failures we will just do the * multicast send and move the record to the retry queue */ - kauditd_send_queue(sk, portid, &audit_queue, 1, - kauditd_send_multicast_skb, - kauditd_retry_skb); + rc = kauditd_send_queue(sk, portid, &audit_queue, 1, + kauditd_send_multicast_skb, + kauditd_retry_skb); + if (sk == NULL || rc < 0) + auditd_reset(); + sk = NULL; /* drop our netns reference, no auditd sends past this line */ if (net) { put_net(net); net = NULL; } - sk = NULL; /* we have processed all the queues so wake everyone */ wake_up(&audit_backlog_wait); -- cgit v1.2.3 From bfb0b80db5f9dca5ac0a5fd0edb765ee555e5a8e Mon Sep 17 00:00:00 2001 From: Zefan Li Date: Fri, 7 Apr 2017 16:51:55 +0800 Subject: cgroup: avoid attaching a cgroup root to two different superblocks Run this: touch file0 for ((; ;)) { mount -t cpuset xxx file0 } And this concurrently: touch file1 for ((; ;)) { mount -t cpuset xxx file1 } We'll trigger a warning like this: ------------[ cut here ]------------ WARNING: CPU: 1 PID: 4675 at lib/percpu-refcount.c:317 percpu_ref_kill_and_confirm+0x92/0xb0 percpu_ref_kill_and_confirm called more than once on css_release! CPU: 1 PID: 4675 Comm: mount Not tainted 4.11.0-rc5+ #5 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 Call Trace: dump_stack+0x63/0x84 __warn+0xd1/0xf0 warn_slowpath_fmt+0x5f/0x80 percpu_ref_kill_and_confirm+0x92/0xb0 cgroup_kill_sb+0x95/0xb0 deactivate_locked_super+0x43/0x70 deactivate_super+0x46/0x60 ... ---[ end trace a79f61c2a2633700 ]--- Here's a race: Thread A Thread B cgroup1_mount() # alloc a new cgroup root cgroup_setup_root() cgroup1_mount() # no sb yet, returns NULL kernfs_pin_sb() # but succeeds in getting the refcnt, # so re-use cgroup root percpu_ref_tryget_live() # alloc sb with cgroup root cgroup_do_mount() cgroup_kill_sb() # alloc another sb with same root cgroup_do_mount() cgroup_kill_sb() We end up using the same cgroup root for two different superblocks, so percpu_ref_kill() will be called twice on the same root when the two superblocks are destroyed. We should fix to make sure the superblock pinning is really successful. Cc: stable@vger.kernel.org # 3.16+ Reported-by: Dmitry Vyukov Signed-off-by: Zefan Li Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup-v1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 1dc22f6b49f5..12e19f0636ea 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -1146,7 +1146,7 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, * path is super cold. Let's just sleep a bit and retry. */ pinned_sb = kernfs_pin_sb(root->kf_root, NULL); - if (IS_ERR(pinned_sb) || + if (IS_ERR_OR_NULL(pinned_sb) || !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) { mutex_unlock(&cgroup_mutex); if (!IS_ERR_OR_NULL(pinned_sb)) -- cgit v1.2.3 From 3cf864520e877505158f09075794a08abab11bbe Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Fri, 31 Mar 2017 20:35:42 +0200 Subject: sata_via: Enable hotplug only on VT6421 Commit 57e5568fda27 ("sata_via: Implement hotplug for VT6421") adds hotplug IRQ handler for VT6421 but enables hotplug on all chips. This is a bug because it causes "irq xx: nobody cared" error on VT6420 when hot-(un)plugging a drive: [ 381.839948] irq 20: nobody cared (try booting with the "irqpoll" option) [ 381.840014] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.10.0-rc5+ #148 [ 381.840066] Hardware name: P4VM800/P4VM800, BIOS P1.60 05/29/2006 [ 381.840117] Call Trace: [ 381.840167] [ 381.840225] ? dump_stack+0x44/0x58 [ 381.840278] ? __report_bad_irq+0x14/0x97 [ 381.840327] ? handle_edge_irq+0xa5/0xa5 [ 381.840376] ? note_interrupt+0x155/0x1cf [ 381.840426] ? handle_edge_irq+0xa5/0xa5 [ 381.840474] ? handle_irq_event_percpu+0x32/0x38 [ 381.840524] ? handle_irq_event+0x1f/0x38 [ 381.840573] ? handle_fasteoi_irq+0x69/0xb8 [ 381.840625] ? handle_irq+0x4f/0x5d [ 381.840672] [ 381.840726] ? do_IRQ+0x2e/0x8b [ 381.840782] ? common_interrupt+0x2c/0x34 [ 381.840836] ? mwait_idle+0x60/0x82 [ 381.840892] ? arch_cpu_idle+0x6/0x7 [ 381.840949] ? do_idle+0x96/0x18e [ 381.841002] ? cpu_startup_entry+0x16/0x1a [ 381.841057] ? start_kernel+0x319/0x31c [ 381.841111] ? startup_32_smp+0x166/0x168 [ 381.841165] handlers: [ 381.841219] [] ata_bmdma_interrupt [ 381.841274] Disabling IRQ #20 Seems that VT6420 can do hotplug too (there's no documentation) but the comments say that SCR register access (required for detecting hotplug events) can cause problems on these chips. For now, just keep hotplug disabled on anything other than VT6421. Signed-off-by: Ondrej Zary Signed-off-by: Tejun Heo --- drivers/ata/sata_via.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c index 0636d84fbefe..f3f538eec7b3 100644 --- a/drivers/ata/sata_via.c +++ b/drivers/ata/sata_via.c @@ -644,14 +644,16 @@ static void svia_configure(struct pci_dev *pdev, int board_id, pci_write_config_byte(pdev, SATA_NATIVE_MODE, tmp8); } - /* enable IRQ on hotplug */ - pci_read_config_byte(pdev, SVIA_MISC_3, &tmp8); - if ((tmp8 & SATA_HOTPLUG) != SATA_HOTPLUG) { - dev_dbg(&pdev->dev, - "enabling SATA hotplug (0x%x)\n", - (int) tmp8); - tmp8 |= SATA_HOTPLUG; - pci_write_config_byte(pdev, SVIA_MISC_3, tmp8); + if (board_id == vt6421) { + /* enable IRQ on hotplug */ + pci_read_config_byte(pdev, SVIA_MISC_3, &tmp8); + if ((tmp8 & SATA_HOTPLUG) != SATA_HOTPLUG) { + dev_dbg(&pdev->dev, + "enabling SATA hotplug (0x%x)\n", + (int) tmp8); + tmp8 |= SATA_HOTPLUG; + pci_write_config_byte(pdev, SVIA_MISC_3, tmp8); + } } /* -- cgit v1.2.3 From 7036502783729c2aaf7a3c24c89087c58721430f Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 10 Apr 2017 13:16:33 +0300 Subject: pinctrl: cherryview: Add a quirk to make Acer Chromebook keyboard work again After commit 47c950d10202 ("pinctrl: cherryview: Do not add all southwest and north GPIOs to IRQ domain") the driver does not add all GPIOs to the irqdomain. The reason for that is that those GPIOs cannot generate IRQs at all, only GPEs (General Purpose Events). This causes Linux virtual IRQ numbering to change. However, it seems some CYAN Chromebooks, including Acer Chromebook hardcodes these Linux IRQ numbers in the ACPI tables of the machine. Since the numbering is different now, the IRQ meant for keyboard does not match the Linux virtual IRQ number anymore making the keyboard non-functional. Work this around by adding special quirk just for these machines where we add back all GPIOs to the irqdomain. Rest of the Cherryview/Braswell based machines will not be affected by the change. Link: https://bugzilla.kernel.org/show_bug.cgi?id=194945 Fixes: 47c950d10202 ("pinctrl: cherryview: Do not add all southwest and north GPIOs to IRQ domain") Reported-by: Adam S Levy Signed-off-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-cherryview.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index f80134e3e0b6..9ff790174906 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -13,6 +13,7 @@ * published by the Free Software Foundation. */ +#include #include #include #include @@ -1524,10 +1525,31 @@ static void chv_gpio_irq_handler(struct irq_desc *desc) chained_irq_exit(chip, desc); } +/* + * Certain machines seem to hardcode Linux IRQ numbers in their ACPI + * tables. Since we leave GPIOs that are not capable of generating + * interrupts out of the irqdomain the numbering will be different and + * cause devices using the hardcoded IRQ numbers fail. In order not to + * break such machines we will only mask pins from irqdomain if the machine + * is not listed below. + */ +static const struct dmi_system_id chv_no_valid_mask[] = { + { + /* See https://bugzilla.kernel.org/show_bug.cgi?id=194945 */ + .ident = "Acer Chromebook (CYAN)", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"), + DMI_MATCH(DMI_PRODUCT_NAME, "Edgar"), + DMI_MATCH(DMI_BIOS_DATE, "05/21/2016"), + }, + } +}; + static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq) { const struct chv_gpio_pinrange *range; struct gpio_chip *chip = &pctrl->chip; + bool need_valid_mask = !dmi_check_system(chv_no_valid_mask); int ret, i, offset; *chip = chv_gpio_chip; @@ -1536,7 +1558,7 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq) chip->label = dev_name(pctrl->dev); chip->parent = pctrl->dev; chip->base = -1; - chip->irq_need_valid_mask = true; + chip->irq_need_valid_mask = need_valid_mask; ret = devm_gpiochip_add_data(pctrl->dev, chip, pctrl); if (ret) { @@ -1567,7 +1589,7 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq) intsel &= CHV_PADCTRL0_INTSEL_MASK; intsel >>= CHV_PADCTRL0_INTSEL_SHIFT; - if (intsel >= pctrl->community->nirqs) + if (need_valid_mask && intsel >= pctrl->community->nirqs) clear_bit(i, chip->irq_valid_mask); } -- cgit v1.2.3 From 84379d83d8e536aef2c706744ff22c136d0be6c9 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 11 Apr 2017 11:10:16 +0200 Subject: Revert "HID: rmi: Handle all Synaptics touchpads using hid-rmi" This reverts commit 279967a65b320d174a507498aea7d44db3fee7f4. Multiple regressions [1] [2] [3] have been reported. The hid-rmi support would have to fixed and redone in 4.11+. [1] http://lkml.kernel.org/r/b79b88c8-770a-13f6-5668-c3a94254e5e0@gmail.com [2] http://lkml.kernel.org/r/375e67b5-2cb8-3491-1d71-d8650d6e9451@gmail.com [3] https://bugzilla.kernel.org/show_bug.cgi?id=195287 Reported-by: Cameron Gutman Reported-by: Gabriele Mazzotta Reported-by: Lorenzo J. Lucchini Reported-by: Thorsten Leemhuis Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 86bbd8a1fd4a..d162f0dc76e3 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -819,8 +819,7 @@ static int hid_scan_report(struct hid_device *hid) hid->group = HID_GROUP_WACOM; break; case USB_VENDOR_ID_SYNAPTICS: - if (hid->group == HID_GROUP_GENERIC || - hid->group == HID_GROUP_MULTITOUCH_WIN_8) + if (hid->group == HID_GROUP_GENERIC) if ((parser->scan_flags & HID_SCAN_FLAG_VENDOR_SPECIFIC) && (parser->scan_flags & HID_SCAN_FLAG_GD_POINTER)) /* -- cgit v1.2.3 From 63987bfebd8869e00b34e2bdd12e59d71909bec0 Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Wed, 5 Apr 2017 15:51:50 +0530 Subject: drm/i915: Suspend GuC prior to GPU Reset during GEM suspend i915 is currently doing a full GPU reset at the end of i915_gem_suspend() followed by GuC suspend in i915_drm_suspend(). This GPU reset clobbers the GuC, causing the suspend request to then fail, leaving the GuC in an undefined state. We need to tell the GuC to suspend before we do the direct intel_gpu_reset(). v2: Commit message update. (Chris, Daniele) Fixes: 1c777c5d1dcd ("drm/i915/hsw: Fix GPU hang during resume from S3-devices state") Cc: Jeff McGee Cc: Daniele Ceraolo Spurio Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Imre Deak Cc: Mika Kuoppala Signed-off-by: Sagar Arun Kamble Link: http://patchwork.freedesktop.org/patch/msgid/1491387710-20553-1-git-send-email-sagar.a.kamble@intel.com Reviewed-by: Daniele Ceraolo Spurio Acked-by: Chris Wilson Signed-off-by: Chris Wilson (cherry picked from commit fd08923384385400101c71ac0d21d37d6b23b00d) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.c | 2 -- drivers/gpu/drm/i915/i915_gem.c | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 1c75402a59c1..5c089b3c2a7e 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1434,8 +1434,6 @@ static int i915_drm_suspend(struct drm_device *dev) goto out; } - intel_guc_suspend(dev_priv); - intel_display_suspend(dev); intel_dp_mst_suspend(dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 67b1fc5a0331..fe531f904062 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4348,6 +4348,8 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) i915_gem_context_lost(dev_priv); mutex_unlock(&dev->struct_mutex); + intel_guc_suspend(dev_priv); + cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); cancel_delayed_work_sync(&dev_priv->gt.retire_work); -- cgit v1.2.3 From c053b5a506d3afc038c485a86e3d461f6c7fb207 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Fri, 7 Apr 2017 13:49:34 +0300 Subject: drm/i915: Don't call synchronize_rcu_expedited under struct_mutex Only call synchronize_rcu_expedited after unlocking struct_mutex to avoid deadlock because the workqueues depend on struct_mutex. >From original patch by Andrea: synchronize_rcu/synchronize_sched/synchronize_rcu_expedited() will hang until its own workqueues are run. The i915 gem workqueues will wait on the struct_mutex to be released. So we cannot wait for a quiescent state using those rcu primitives while holding the struct_mutex or it creates a circular lock dependency resulting in kernel hangs (which is reproducible but goes undetected by lockdep). kswapd0 D 0 700 2 0x00000000 Call Trace: ? __schedule+0x1a5/0x660 ? schedule+0x36/0x80 ? _synchronize_rcu_expedited.constprop.65+0x2ef/0x300 ? wake_up_bit+0x20/0x20 ? rcu_stall_kick_kthreads.part.54+0xc0/0xc0 ? rcu_exp_wait_wake+0x530/0x530 ? i915_gem_shrink+0x34b/0x4b0 ? i915_gem_shrinker_scan+0x7c/0x90 ? i915_gem_shrinker_scan+0x7c/0x90 ? shrink_slab.part.61.constprop.72+0x1c1/0x3a0 ? shrink_zone+0x154/0x160 ? kswapd+0x40a/0x720 ? kthread+0xf4/0x130 ? try_to_free_pages+0x450/0x450 ? kthread_create_on_node+0x40/0x40 ? ret_from_fork+0x23/0x30 plasmashell D 0 4657 4614 0x00000000 Call Trace: ? __schedule+0x1a5/0x660 ? schedule+0x36/0x80 ? schedule_preempt_disabled+0xe/0x10 ? __mutex_lock.isra.4+0x1c9/0x790 ? i915_gem_close_object+0x26/0xc0 ? i915_gem_close_object+0x26/0xc0 ? drm_gem_object_release_handle+0x48/0x90 ? drm_gem_handle_delete+0x50/0x80 ? drm_ioctl+0x1fa/0x420 ? drm_gem_handle_create+0x40/0x40 ? pipe_write+0x391/0x410 ? __vfs_write+0xc6/0x120 ? do_vfs_ioctl+0x8b/0x5d0 ? SyS_ioctl+0x3b/0x70 ? entry_SYSCALL_64_fastpath+0x13/0x94 kworker/0:0 D 0 29186 2 0x00000000 Workqueue: events __i915_gem_free_work Call Trace: ? __schedule+0x1a5/0x660 ? schedule+0x36/0x80 ? schedule_preempt_disabled+0xe/0x10 ? __mutex_lock.isra.4+0x1c9/0x790 ? del_timer_sync+0x44/0x50 ? update_curr+0x57/0x110 ? __i915_gem_free_objects+0x31/0x300 ? __i915_gem_free_objects+0x31/0x300 ? __i915_gem_free_work+0x2d/0x40 ? process_one_work+0x13a/0x3b0 ? worker_thread+0x4a/0x460 ? kthread+0xf4/0x130 ? process_one_work+0x3b0/0x3b0 ? kthread_create_on_node+0x40/0x40 ? ret_from_fork+0x23/0x30 Fixes: 3d3d18f086cd ("drm/i915: Avoid rcu_barrier() from reclaim paths (shrinker)") Reported-by: Andrea Arcangeli Signed-off-by: Joonas Lahtinen Cc: Andrea Arcangeli Cc: Chris Wilson Cc: Daniel Vetter Cc: Jani Nikula Reviewed-by: Chris Wilson (cherry picked from commit 8f612d055183545070ca1009ac2eb1f2e044cc20) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_shrinker.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index d5d2b4c6ed38..70b3832a79dd 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -53,6 +53,17 @@ static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock) BUG(); } +static void i915_gem_shrinker_unlock(struct drm_device *dev, bool unlock) +{ + if (!unlock) + return; + + mutex_unlock(&dev->struct_mutex); + + /* expedite the RCU grace period to free some request slabs */ + synchronize_rcu_expedited(); +} + static bool any_vma_pinned(struct drm_i915_gem_object *obj) { struct i915_vma *vma; @@ -232,11 +243,8 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, intel_runtime_pm_put(dev_priv); i915_gem_retire_requests(dev_priv); - if (unlock) - mutex_unlock(&dev_priv->drm.struct_mutex); - /* expedite the RCU grace period to free some request slabs */ - synchronize_rcu_expedited(); + i915_gem_shrinker_unlock(&dev_priv->drm, unlock); return count; } @@ -293,8 +301,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) count += obj->base.size >> PAGE_SHIFT; } - if (unlock) - mutex_unlock(&dev->struct_mutex); + i915_gem_shrinker_unlock(dev, unlock); return count; } @@ -321,8 +328,8 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) sc->nr_to_scan - freed, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND); - if (unlock) - mutex_unlock(&dev->struct_mutex); + + i915_gem_shrinker_unlock(dev, unlock); return freed; } @@ -364,8 +371,7 @@ i915_gem_shrinker_unlock_uninterruptible(struct drm_i915_private *dev_priv, struct shrinker_lock_uninterruptible *slu) { dev_priv->mm.interruptible = slu->was_interruptible; - if (slu->unlock) - mutex_unlock(&dev_priv->drm.struct_mutex); + i915_gem_shrinker_unlock(&dev_priv->drm, slu->unlock); } static int -- cgit v1.2.3 From 0c45b36f8acc89cb94c8696a27574f01e0e411dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Fri, 7 Apr 2017 22:02:29 +0200 Subject: drm/udl: Fix unaligned memory access in udl_render_hline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On SPARC, the udl driver filled my kernel log with these messages: [186668.910612] Kernel unaligned access at TPC[76609c] udl_render_hline+0x13c/0x3a0 Use put_unaligned_be16 to avoid them. On x86 this results in the same code, but on SPARC the compiler emits two single-byte stores. Signed-off-by: Jonathan Neuschäfer Acked-by: David Airlie Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/20170407200229.20642-1-j.neuschaefer@gmx.net --- drivers/gpu/drm/udl/udl_transfer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/udl/udl_transfer.c b/drivers/gpu/drm/udl/udl_transfer.c index 917dcb978c2c..0c87b1ac6b68 100644 --- a/drivers/gpu/drm/udl/udl_transfer.c +++ b/drivers/gpu/drm/udl/udl_transfer.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "udl_drv.h" @@ -163,7 +164,7 @@ static void udl_compress_hline16( const u8 *const start = pixel; const uint16_t repeating_pixel_val16 = pixel_val16; - *(uint16_t *)cmd = cpu_to_be16(pixel_val16); + put_unaligned_be16(pixel_val16, cmd); cmd += 2; pixel += bpp; -- cgit v1.2.3 From 45abdf35cf82e4270328c7237e7812de960ac560 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 12 Apr 2017 00:31:16 +0000 Subject: drm/etnaviv: fix missing unlock on error in etnaviv_gpu_submit() Add the missing unlock before return from function etnaviv_gpu_submit() in the error handling case. lst: fixed label name. Fixes: f3cd1b064f11 ("drm/etnaviv: (re-)protect fence allocation with GPU mutex") CC: stable@vger.kernel.org #4.9+ Signed-off-by: Wei Yongjun Signed-off-by: Lucas Stach --- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index da48819ff2e6..b78d9239e48f 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -1317,7 +1317,7 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, if (!fence) { event_free(gpu, event); ret = -ENOMEM; - goto out_pm_put; + goto out_unlock; } gpu->event[event].fence = fence; @@ -1357,6 +1357,7 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, hangcheck_timer_reset(gpu); ret = 0; +out_unlock: mutex_unlock(&gpu->lock); out_pm_put: -- cgit v1.2.3 From d8a6e3aed955342dd68dd72e0feaf2cd32be683b Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 13 Apr 2017 11:06:00 -0700 Subject: ia64: restore symbol versions for symbols defined in assembly The ia64 build generates many warnings like this: WARNING: EXPORT symbol "empty_zero_page" [vmlinux] version generation failed, symbol will not be versioned. Besides adding the necessary header this also requires fiddling with some explicit .S -> .o rules. Cc: IA64-ML Signed-off-by: Jan Beulich Signed-off-by: Tony Luck Signed-off-by: Linus Torvalds --- arch/ia64/include/asm/asm-prototypes.h | 29 +++++++++++++++++++++++++++++ arch/ia64/lib/Makefile | 16 ++++++++-------- 2 files changed, 37 insertions(+), 8 deletions(-) create mode 100644 arch/ia64/include/asm/asm-prototypes.h diff --git a/arch/ia64/include/asm/asm-prototypes.h b/arch/ia64/include/asm/asm-prototypes.h new file mode 100644 index 000000000000..a2c139808cfe --- /dev/null +++ b/arch/ia64/include/asm/asm-prototypes.h @@ -0,0 +1,29 @@ +#ifndef _ASM_IA64_ASM_PROTOTYPES_H +#define _ASM_IA64_ASM_PROTOTYPES_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern const char ia64_ivt[]; + +signed int __divsi3(signed int, unsigned int); +signed int __modsi3(signed int, unsigned int); + +signed long long __divdi3(signed long long, unsigned long long); +signed long long __moddi3(signed long long, unsigned long long); + +unsigned int __udivsi3(unsigned int, unsigned int); +unsigned int __umodsi3(unsigned int, unsigned int); + +unsigned long long __udivdi3(unsigned long long, unsigned long long); +unsigned long long __umoddi3(unsigned long long, unsigned long long); + +#endif /* _ASM_IA64_ASM_PROTOTYPES_H */ diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile index 1f3d3877618f..0a40b14407b1 100644 --- a/arch/ia64/lib/Makefile +++ b/arch/ia64/lib/Makefile @@ -24,25 +24,25 @@ AFLAGS___modsi3.o = -DMODULO AFLAGS___umodsi3.o = -DUNSIGNED -DMODULO $(obj)/__divdi3.o: $(src)/idiv64.S FORCE - $(call if_changed_dep,as_o_S) + $(call if_changed_rule,as_o_S) $(obj)/__udivdi3.o: $(src)/idiv64.S FORCE - $(call if_changed_dep,as_o_S) + $(call if_changed_rule,as_o_S) $(obj)/__moddi3.o: $(src)/idiv64.S FORCE - $(call if_changed_dep,as_o_S) + $(call if_changed_rule,as_o_S) $(obj)/__umoddi3.o: $(src)/idiv64.S FORCE - $(call if_changed_dep,as_o_S) + $(call if_changed_rule,as_o_S) $(obj)/__divsi3.o: $(src)/idiv32.S FORCE - $(call if_changed_dep,as_o_S) + $(call if_changed_rule,as_o_S) $(obj)/__udivsi3.o: $(src)/idiv32.S FORCE - $(call if_changed_dep,as_o_S) + $(call if_changed_rule,as_o_S) $(obj)/__modsi3.o: $(src)/idiv32.S FORCE - $(call if_changed_dep,as_o_S) + $(call if_changed_rule,as_o_S) $(obj)/__umodsi3.o: $(src)/idiv32.S FORCE - $(call if_changed_dep,as_o_S) + $(call if_changed_rule,as_o_S) -- cgit v1.2.3 From 76e32a2a084ed71b48179023cd8fdb3787c8a6ad Mon Sep 17 00:00:00 2001 From: Vitaly Wool Date: Thu, 13 Apr 2017 14:56:14 -0700 Subject: z3fold: fix page locking in z3fold_alloc() Stress testing of the current z3fold implementation on a 8-core system revealed it was possible that a z3fold page deleted from its unbuddied list in z3fold_alloc() would be put on another unbuddied list by z3fold_free() while z3fold_alloc() is still processing it. This has been introduced with commit 5a27aa822 ("z3fold: add kref refcounting") due to the removal of special handling of a z3fold page not on any list in z3fold_free(). To fix this, the z3fold page lock should be taken in z3fold_alloc() before the pool lock is released. To avoid deadlocking, we just try to lock the page as soon as we get a hold of it, and if trylock fails, we drop this page and take the next one. Signed-off-by: Vitaly Wool Cc: Dan Streetman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/z3fold.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/mm/z3fold.c b/mm/z3fold.c index f9492bccfd79..54f63c4a809a 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -185,6 +185,12 @@ static inline void z3fold_page_lock(struct z3fold_header *zhdr) spin_lock(&zhdr->page_lock); } +/* Try to lock a z3fold page */ +static inline int z3fold_page_trylock(struct z3fold_header *zhdr) +{ + return spin_trylock(&zhdr->page_lock); +} + /* Unlock a z3fold page */ static inline void z3fold_page_unlock(struct z3fold_header *zhdr) { @@ -385,7 +391,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, spin_lock(&pool->lock); zhdr = list_first_entry_or_null(&pool->unbuddied[i], struct z3fold_header, buddy); - if (!zhdr) { + if (!zhdr || !z3fold_page_trylock(zhdr)) { spin_unlock(&pool->lock); continue; } @@ -394,7 +400,6 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, spin_unlock(&pool->lock); page = virt_to_page(zhdr); - z3fold_page_lock(zhdr); if (zhdr->first_chunks == 0) { if (zhdr->middle_chunks != 0 && chunks >= zhdr->start_middle) -- cgit v1.2.3 From 0a85e51d37645e9ce57e5e1a30859e07810ed07c Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 13 Apr 2017 14:56:17 -0700 Subject: thp: reduce indentation level in change_huge_pmd() Patch series "thp: fix few MADV_DONTNEED races" For MADV_DONTNEED to work properly with huge pages, it's critical to not clear pmd intermittently unless you hold down_write(mmap_sem). Otherwise MADV_DONTNEED can miss the THP which can lead to userspace breakage. See example of such race in commit message of patch 2/4. All these races are found by code inspection. I haven't seen them triggered. I don't think it's worth to apply them to stable@. This patch (of 4): Restructure code in preparation for a fix. Link: http://lkml.kernel.org/r/20170302151034.27829-2-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Cc: Andrea Arcangeli Cc: Hillf Danton Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 52 ++++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index fef4cf210cc7..a513861a9037 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1724,37 +1724,37 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, { struct mm_struct *mm = vma->vm_mm; spinlock_t *ptl; - int ret = 0; + pmd_t entry; + bool preserve_write; + int ret; ptl = __pmd_trans_huge_lock(pmd, vma); - if (ptl) { - pmd_t entry; - bool preserve_write = prot_numa && pmd_write(*pmd); - ret = 1; + if (!ptl) + return 0; - /* - * Avoid trapping faults against the zero page. The read-only - * data is likely to be read-cached on the local CPU and - * local/remote hits to the zero page are not interesting. - */ - if (prot_numa && is_huge_zero_pmd(*pmd)) { - spin_unlock(ptl); - return ret; - } + preserve_write = prot_numa && pmd_write(*pmd); + ret = 1; - if (!prot_numa || !pmd_protnone(*pmd)) { - entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd); - entry = pmd_modify(entry, newprot); - if (preserve_write) - entry = pmd_mk_savedwrite(entry); - ret = HPAGE_PMD_NR; - set_pmd_at(mm, addr, pmd, entry); - BUG_ON(vma_is_anonymous(vma) && !preserve_write && - pmd_write(entry)); - } - spin_unlock(ptl); - } + /* + * Avoid trapping faults against the zero page. The read-only + * data is likely to be read-cached on the local CPU and + * local/remote hits to the zero page are not interesting. + */ + if (prot_numa && is_huge_zero_pmd(*pmd)) + goto unlock; + if (prot_numa && pmd_protnone(*pmd)) + goto unlock; + + entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd); + entry = pmd_modify(entry, newprot); + if (preserve_write) + entry = pmd_mk_savedwrite(entry); + ret = HPAGE_PMD_NR; + set_pmd_at(mm, addr, pmd, entry); + BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry)); +unlock: + spin_unlock(ptl); return ret; } -- cgit v1.2.3 From ced108037c2aa542b3ed8b7afd1576064ad1362a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 13 Apr 2017 14:56:20 -0700 Subject: thp: fix MADV_DONTNEED vs. numa balancing race In case prot_numa, we are under down_read(mmap_sem). It's critical to not clear pmd intermittently to avoid race with MADV_DONTNEED which is also under down_read(mmap_sem): CPU0: CPU1: change_huge_pmd(prot_numa=1) pmdp_huge_get_and_clear_notify() madvise_dontneed() zap_pmd_range() pmd_trans_huge(*pmd) == 0 (without ptl) // skip the pmd set_pmd_at(); // pmd is re-established The race makes MADV_DONTNEED miss the huge pmd and don't clear it which may break userspace. Found by code analysis, never saw triggered. Link: http://lkml.kernel.org/r/20170302151034.27829-3-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Cc: Andrea Arcangeli Cc: Hillf Danton Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index a513861a9037..26769465af63 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1746,7 +1746,39 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, if (prot_numa && pmd_protnone(*pmd)) goto unlock; - entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd); + /* + * In case prot_numa, we are under down_read(mmap_sem). It's critical + * to not clear pmd intermittently to avoid race with MADV_DONTNEED + * which is also under down_read(mmap_sem): + * + * CPU0: CPU1: + * change_huge_pmd(prot_numa=1) + * pmdp_huge_get_and_clear_notify() + * madvise_dontneed() + * zap_pmd_range() + * pmd_trans_huge(*pmd) == 0 (without ptl) + * // skip the pmd + * set_pmd_at(); + * // pmd is re-established + * + * The race makes MADV_DONTNEED miss the huge pmd and don't clear it + * which may break userspace. + * + * pmdp_invalidate() is required to make sure we don't miss + * dirty/young flags set by hardware. + */ + entry = *pmd; + pmdp_invalidate(vma, addr, pmd); + + /* + * Recover dirty/young flags. It relies on pmdp_invalidate to not + * corrupt them. + */ + if (pmd_dirty(*pmd)) + entry = pmd_mkdirty(entry); + if (pmd_young(*pmd)) + entry = pmd_mkyoung(entry); + entry = pmd_modify(entry, newprot); if (preserve_write) entry = pmd_mk_savedwrite(entry); -- cgit v1.2.3 From c0c379e2931b05facef538e53bf3b21f283d9a0b Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 13 Apr 2017 14:56:23 -0700 Subject: mm: drop unused pmdp_huge_get_and_clear_notify() Dave noticed that after fixing MADV_DONTNEED vs numa balancing race the last pmdp_huge_get_and_clear_notify() user is gone. Let's drop the helper. Link: http://lkml.kernel.org/r/20170306112047.24809-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Cc: Dave Hansen Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmu_notifier.h | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 51891fb0d3ce..c91b3bcd158f 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -394,18 +394,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) ___pud; \ }) -#define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd) \ -({ \ - unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ - pmd_t ___pmd; \ - \ - ___pmd = pmdp_huge_get_and_clear(__mm, __haddr, __pmd); \ - mmu_notifier_invalidate_range(__mm, ___haddr, \ - ___haddr + HPAGE_PMD_SIZE); \ - \ - ___pmd; \ -}) - /* * set_pte_at_notify() sets the pte _after_ running the notifier. * This is safe to start by updating the secondary MMUs, because the primary MMU @@ -489,7 +477,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) #define ptep_clear_flush_notify ptep_clear_flush #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush #define pudp_huge_clear_flush_notify pudp_huge_clear_flush -#define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear #define set_pte_at_notify set_pte_at #endif /* CONFIG_MMU_NOTIFIER */ -- cgit v1.2.3 From 58ceeb6bec86d9140f9d91d71a710e963523d063 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 13 Apr 2017 14:56:26 -0700 Subject: thp: fix MADV_DONTNEED vs. MADV_FREE race Both MADV_DONTNEED and MADV_FREE handled with down_read(mmap_sem). It's critical to not clear pmd intermittently while handling MADV_FREE to avoid race with MADV_DONTNEED: CPU0: CPU1: madvise_free_huge_pmd() pmdp_huge_get_and_clear_full() madvise_dontneed() zap_pmd_range() pmd_trans_huge(*pmd) == 0 (without ptl) // skip the pmd set_pmd_at(); // pmd is re-established It results in MADV_DONTNEED skipping the pmd, leaving it not cleared. It violates MADV_DONTNEED interface and can result is userspace misbehaviour. Basically it's the same race as with numa balancing in change_huge_pmd(), but a bit simpler to mitigate: we don't need to preserve dirty/young flags here due to MADV_FREE functionality. [kirill.shutemov@linux.intel.com: Urgh... Power is special again] Link: http://lkml.kernel.org/r/20170303102636.bhd2zhtpds4mt62a@black.fi.intel.com Link: http://lkml.kernel.org/r/20170302151034.27829-4-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Acked-by: Minchan Kim Cc: Minchan Kim Cc: Andrea Arcangeli Cc: Hillf Danton Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 26769465af63..f3c4f9d22821 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1568,8 +1568,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, deactivate_page(page); if (pmd_young(orig_pmd) || pmd_dirty(orig_pmd)) { - orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd, - tlb->fullmm); + pmdp_invalidate(vma, addr, pmd); orig_pmd = pmd_mkold(orig_pmd); orig_pmd = pmd_mkclean(orig_pmd); -- cgit v1.2.3 From 5b7abeae3af8c08c577e599dd0578b9e3ee6687b Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 13 Apr 2017 14:56:28 -0700 Subject: thp: fix MADV_DONTNEED vs clear soft dirty race Yet another instance of the same race. Fix is identical to change_huge_pmd(). See "thp: fix MADV_DONTNEED vs. numa balancing race" for more details. Link: http://lkml.kernel.org/r/20170302151034.27829-5-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Cc: Andrea Arcangeli Cc: Hillf Danton Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index f08bd31c1081..312578089544 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -900,7 +900,14 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { - pmd_t pmd = pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp); + pmd_t pmd = *pmdp; + + /* See comment in change_huge_pmd() */ + pmdp_invalidate(vma, addr, pmdp); + if (pmd_dirty(*pmdp)) + pmd = pmd_mkdirty(pmd); + if (pmd_young(*pmdp)) + pmd = pmd_mkyoung(pmd); pmd = pmd_wrprotect(pmd); pmd = pmd_clear_soft_dirty(pmd); -- cgit v1.2.3 From 045c7a3f53d9403b62d396b6d051c4be5044cdb4 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Thu, 13 Apr 2017 14:56:32 -0700 Subject: hugetlbfs: fix offset overflow in hugetlbfs mmap If mmap() maps a file, it can be passed an offset into the file at which the mapping is to start. Offset could be a negative value when represented as a loff_t. The offset plus length will be used to update the file size (i_size) which is also a loff_t. Validate the value of offset and offset + length to make sure they do not overflow and appear as negative. Found by syzcaller with commit ff8c0c53c475 ("mm/hugetlb.c: don't call region_abort if region_chg fails") applied. Prior to this commit, the overflow would still occur but we would luckily return ENOMEM. To reproduce: mmap(0, 0x2000, 0, 0x40021, 0xffffffffffffffffULL, 0x8000000000000000ULL); Resulted in, kernel BUG at mm/hugetlb.c:742! Call Trace: hugetlbfs_evict_inode+0x80/0xa0 evict+0x24a/0x620 iput+0x48f/0x8c0 dentry_unlink_inode+0x31f/0x4d0 __dentry_kill+0x292/0x5e0 dput+0x730/0x830 __fput+0x438/0x720 ____fput+0x1a/0x20 task_work_run+0xfe/0x180 exit_to_usermode_loop+0x133/0x150 syscall_return_slowpath+0x184/0x1c0 entry_SYSCALL_64_fastpath+0xab/0xad Fixes: ff8c0c53c475 ("mm/hugetlb.c: don't call region_abort if region_chg fails") Link: http://lkml.kernel.org/r/1491951118-30678-1-git-send-email-mike.kravetz@oracle.com Reported-by: Vegard Nossum Signed-off-by: Mike Kravetz Acked-by: Hillf Danton Cc: Dmitry Vyukov Cc: Michal Hocko Cc: "Kirill A . Shutemov" Cc: Andrey Ryabinin Cc: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 7163fe014b57..dde861387a40 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -136,17 +136,26 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND; vma->vm_ops = &hugetlb_vm_ops; + /* + * Offset passed to mmap (before page shift) could have been + * negative when represented as a (l)off_t. + */ + if (((loff_t)vma->vm_pgoff << PAGE_SHIFT) < 0) + return -EINVAL; + if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) return -EINVAL; vma_len = (loff_t)(vma->vm_end - vma->vm_start); + len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); + /* check for overflow */ + if (len < vma_len) + return -EINVAL; inode_lock(inode); file_accessed(file); ret = -ENOMEM; - len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); - if (hugetlb_reserve_pages(inode, vma->vm_pgoff >> huge_page_order(h), len >> huge_page_shift(h), vma, @@ -155,7 +164,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) ret = 0; if (vma->vm_flags & VM_WRITE && inode->i_size < len) - inode->i_size = len; + i_size_write(inode, len); out: inode_unlock(inode); -- cgit v1.2.3 From 4ca82dabc9fbf7bc5322aa54d802cb3cb7b125c5 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 13 Apr 2017 14:56:35 -0700 Subject: zram: fix operator precedence to get offset In zram_rw_page, the logic to get offset is wrong by operator precedence (i.e., "<<" is higher than "&"). With wrong offset, zram can corrupt the user's data. This patch fixes it. Fixes: 8c7f01025 ("zram: implement rw_page operation of zram") Link: http://lkml.kernel.org/r/1492042622-12074-1-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim Cc: Sergey Senozhatsky Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index dceb5edd1e54..03a7408e090d 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -928,7 +928,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector, } index = sector >> SECTORS_PER_PAGE_SHIFT; - offset = sector & (SECTORS_PER_PAGE - 1) << SECTOR_SHIFT; + offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; bv.bv_page = page; bv.bv_len = PAGE_SIZE; -- cgit v1.2.3 From d72e9a7a93e4f8e9e52491921d99e0c8aa89eb4e Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 13 Apr 2017 14:56:37 -0700 Subject: zram: do not use copy_page with non-page aligned address The copy_page is optimized memcpy for page-alinged address. If it is used with non-page aligned address, it can corrupt memory which means system corruption. With zram, it can happen with 1. 64K architecture 2. partial IO 3. slub debug Partial IO need to allocate a page and zram allocates it via kmalloc. With slub debug, kmalloc(PAGE_SIZE) doesn't return page-size aligned address. And finally, copy_page(mem, cmem) corrupts memory. So, this patch changes it to memcpy. Actuaully, we don't need to change zram_bvec_write part because zsmalloc returns page-aligned address in case of PAGE_SIZE class but it's not good to rely on the internal of zsmalloc. Note: When this patch is merged to stable, clear_page should be fixed, too. Unfortunately, recent zram removes it by "same page merge" feature so it's hard to backport this patch to -stable tree. I will handle it when I receive the mail from stable tree maintainer to merge this patch to backport. Fixes: 42e99bd ("zram: optimize memory operations with clear_page()/copy_page()") Link: http://lkml.kernel.org/r/1492042622-12074-2-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim Cc: Sergey Senozhatsky Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 03a7408e090d..0c09d4256108 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -523,7 +523,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO); if (size == PAGE_SIZE) { - copy_page(mem, cmem); + memcpy(mem, cmem, PAGE_SIZE); } else { struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp); @@ -717,7 +717,7 @@ compress_again: if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) { src = kmap_atomic(page); - copy_page(cmem, src); + memcpy(cmem, src, PAGE_SIZE); kunmap_atomic(src); } else { memcpy(cmem, src, clen); -- cgit v1.2.3 From 85d492f28d056c40629fc25d79f54da618a29dc4 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 13 Apr 2017 14:56:40 -0700 Subject: zsmalloc: expand class bit Now 64K page system, zsamlloc has 257 classes so 8 class bit is not enough. With that, it corrupts the system when zsmalloc stores 65536byte data(ie, index number 256) so that this patch increases class bit for simple fix for stable backport. We should clean up this mess soon. index size 0 32 1 288 .. .. 204 52256 256 65536 Fixes: 3783689a1 ("zsmalloc: introduce zspage structure") Link: http://lkml.kernel.org/r/1492042622-12074-3-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim Cc: Sergey Senozhatsky Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/zsmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index b7ee9c34dbd6..d41edd28298b 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -276,7 +276,7 @@ struct zs_pool { struct zspage { struct { unsigned int fullness:FULLNESS_BITS; - unsigned int class:CLASS_BITS; + unsigned int class:CLASS_BITS + 1; unsigned int isolated:ISOLATED_BITS; unsigned int magic:MAGIC_VAL_BITS; }; -- cgit v1.2.3 From 5714320d316ae6e830bfc463bfdaf288695f6b62 Mon Sep 17 00:00:00 2001 From: Martin Kepplinger Date: Thu, 13 Apr 2017 14:56:43 -0700 Subject: mailmap: add Martin Kepplinger's email Set the partly deprecated companies' email addresses as alias for the personal one. Link: http://lkml.kernel.org/r/1491984622-17321-1-git-send-email-martin.kepplinger@ginzinger.com Signed-off-by: Martin Kepplinger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .mailmap | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.mailmap b/.mailmap index e229922dc7f0..1d6f4e7280dc 100644 --- a/.mailmap +++ b/.mailmap @@ -99,6 +99,8 @@ Linas Vepstas Linus Lüssing Linus Lüssing Mark Brown +Martin Kepplinger +Martin Kepplinger Matthieu CASTET Mauro Carvalho Chehab Mauro Carvalho Chehab -- cgit v1.2.3