summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/intel-family.h2
-rw-r--r--arch/x86/include/asm/vdso/gettimeofday.h18
-rw-r--r--arch/x86/kernel/apic/apic.c2
-rw-r--r--arch/x86/kernel/cpu/bugs.c92
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/process.c28
-rw-r--r--arch/x86/kernel/reboot.c8
-rw-r--r--arch/x86/kernel/time.c4
-rw-r--r--arch/x86/kernel/vmlinux.lds.S4
-rw-r--r--block/bio-integrity.c1
-rw-r--r--block/bio.c3
-rw-r--r--block/blk-mq-tag.c39
-rw-r--r--block/blk-mq-tag.h8
-rw-r--r--block/blk-mq.c29
-rw-r--r--block/blk-mq.h1
-rw-r--r--block/blk.h2
-rw-r--r--drivers/block/loop.c2
-rw-r--r--drivers/block/pktcdvd.c2
-rw-r--r--drivers/block/umem.c2
-rw-r--r--drivers/nvme/host/core.c4
-rw-r--r--drivers/nvme/host/fc.c5
-rw-r--r--drivers/nvme/host/nvme.h3
-rw-r--r--drivers/nvme/host/pci.c6
-rw-r--r--drivers/nvme/host/tcp.c8
-rw-r--r--drivers/nvme/target/core.c27
-rw-r--r--drivers/nvme/target/tcp.c4
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/io-wq.c10
-rw-r--r--fs/io-wq.h8
-rw-r--r--fs/io_uring.c424
-rw-r--r--include/trace/events/block.h6
-rw-r--r--kernel/time/clocksource.c2
-rw-r--r--kernel/trace/blktrace.c36
-rw-r--r--lib/vdso/gettimeofday.c13
34 files changed, 416 insertions, 391 deletions
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 8f1e94f29a16..a338a6deb950 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -89,6 +89,8 @@
#define INTEL_FAM6_COMETLAKE 0xA5
#define INTEL_FAM6_COMETLAKE_L 0xA6
+#define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F
+
/* "Small Core" Processors (Atom) */
#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
index 9a6dc9b4ec99..fb81fea99093 100644
--- a/arch/x86/include/asm/vdso/gettimeofday.h
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -271,6 +271,24 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
return __vdso_data;
}
+static inline bool arch_vdso_clocksource_ok(const struct vdso_data *vd)
+{
+ return true;
+}
+#define vdso_clocksource_ok arch_vdso_clocksource_ok
+
+/*
+ * Clocksource read value validation to handle PV and HyperV clocksources
+ * which can be invalidated asynchronously and indicate invalidation by
+ * returning U64_MAX, which can be effectively tested by checking for a
+ * negative value after casting it to s64.
+ */
+static inline bool arch_vdso_cycles_ok(u64 cycles)
+{
+ return (s64)cycles >= 0;
+}
+#define vdso_cycles_ok arch_vdso_cycles_ok
+
/*
* x86 specific delta calculation.
*
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 4b1d31be50b4..bf4acb0b5365 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2060,7 +2060,7 @@ void __init init_apic_mappings(void)
unsigned int new_apicid;
if (apic_validate_deadline_timer())
- pr_debug("TSC deadline timer available\n");
+ pr_info("TSC deadline timer available\n");
if (x2apic_mode) {
boot_cpu_physical_apicid = read_apic_id();
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index b6f887be440c..0b71970d2d3d 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -588,7 +588,9 @@ early_param("nospectre_v1", nospectre_v1_cmdline);
static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
SPECTRE_V2_NONE;
-static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init =
+static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init =
+ SPECTRE_V2_USER_NONE;
+static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init =
SPECTRE_V2_USER_NONE;
#ifdef CONFIG_RETPOLINE
@@ -734,15 +736,6 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
break;
}
- /*
- * At this point, an STIBP mode other than "off" has been set.
- * If STIBP support is not being forced, check if STIBP always-on
- * is preferred.
- */
- if (mode != SPECTRE_V2_USER_STRICT &&
- boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
- mode = SPECTRE_V2_USER_STRICT_PREFERRED;
-
/* Initialize Indirect Branch Prediction Barrier */
if (boot_cpu_has(X86_FEATURE_IBPB)) {
setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
@@ -765,23 +758,36 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n",
static_key_enabled(&switch_mm_always_ibpb) ?
"always-on" : "conditional");
+
+ spectre_v2_user_ibpb = mode;
}
- /* If enhanced IBRS is enabled no STIBP required */
- if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+ /*
+ * If enhanced IBRS is enabled or SMT impossible, STIBP is not
+ * required.
+ */
+ if (!smt_possible || spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
return;
/*
- * If SMT is not possible or STIBP is not available clear the STIBP
- * mode.
+ * At this point, an STIBP mode other than "off" has been set.
+ * If STIBP support is not being forced, check if STIBP always-on
+ * is preferred.
*/
- if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP))
+ if (mode != SPECTRE_V2_USER_STRICT &&
+ boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
+ mode = SPECTRE_V2_USER_STRICT_PREFERRED;
+
+ /*
+ * If STIBP is not available, clear the STIBP mode.
+ */
+ if (!boot_cpu_has(X86_FEATURE_STIBP))
mode = SPECTRE_V2_USER_NONE;
+
+ spectre_v2_user_stibp = mode;
+
set_mode:
- spectre_v2_user = mode;
- /* Only print the STIBP mode when SMT possible */
- if (smt_possible)
- pr_info("%s\n", spectre_v2_user_strings[mode]);
+ pr_info("%s\n", spectre_v2_user_strings[mode]);
}
static const char * const spectre_v2_strings[] = {
@@ -1014,7 +1020,7 @@ void cpu_bugs_smt_update(void)
{
mutex_lock(&spec_ctrl_mutex);
- switch (spectre_v2_user) {
+ switch (spectre_v2_user_stibp) {
case SPECTRE_V2_USER_NONE:
break;
case SPECTRE_V2_USER_STRICT:
@@ -1257,14 +1263,19 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
{
switch (ctrl) {
case PR_SPEC_ENABLE:
- if (spectre_v2_user == SPECTRE_V2_USER_NONE)
+ if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
+ spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
return 0;
/*
* Indirect branch speculation is always disabled in strict
- * mode.
+ * mode. It can neither be enabled if it was force-disabled
+ * by a previous prctl call.
+
*/
- if (spectre_v2_user == SPECTRE_V2_USER_STRICT ||
- spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED)
+ if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ||
+ task_spec_ib_force_disable(task))
return -EPERM;
task_clear_spec_ib_disable(task);
task_update_spec_tif(task);
@@ -1275,10 +1286,12 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
* Indirect branch speculation is always allowed when
* mitigation is force disabled.
*/
- if (spectre_v2_user == SPECTRE_V2_USER_NONE)
+ if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
+ spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
return -EPERM;
- if (spectre_v2_user == SPECTRE_V2_USER_STRICT ||
- spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED)
+ if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED)
return 0;
task_set_spec_ib_disable(task);
if (ctrl == PR_SPEC_FORCE_DISABLE)
@@ -1309,7 +1322,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task)
{
if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP)
ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
- if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP)
+ if (spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP)
ib_prctl_set(task, PR_SPEC_FORCE_DISABLE);
}
#endif
@@ -1340,22 +1354,24 @@ static int ib_prctl_get(struct task_struct *task)
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
return PR_SPEC_NOT_AFFECTED;
- switch (spectre_v2_user) {
- case SPECTRE_V2_USER_NONE:
+ if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
+ spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
return PR_SPEC_ENABLE;
- case SPECTRE_V2_USER_PRCTL:
- case SPECTRE_V2_USER_SECCOMP:
+ else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED)
+ return PR_SPEC_DISABLE;
+ else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL ||
+ spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) {
if (task_spec_ib_force_disable(task))
return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
if (task_spec_ib_disable(task))
return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
- case SPECTRE_V2_USER_STRICT:
- case SPECTRE_V2_USER_STRICT_PREFERRED:
- return PR_SPEC_DISABLE;
- default:
+ } else
return PR_SPEC_NOT_AFFECTED;
- }
}
int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
@@ -1594,7 +1610,7 @@ static char *stibp_state(void)
if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
return "";
- switch (spectre_v2_user) {
+ switch (spectre_v2_user_stibp) {
case SPECTRE_V2_USER_NONE:
return ", STIBP: disabled";
case SPECTRE_V2_USER_STRICT:
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 63926c94eb5f..c25a67a34bd3 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -1142,9 +1142,12 @@ void switch_to_sld(unsigned long tifn)
static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, 0),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, 1),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, 1),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, 1),
+ X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, 1),
+ X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, 1),
{}
};
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 8e3d0347b664..f362ce0d5ac0 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -545,28 +545,20 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp,
lockdep_assert_irqs_disabled();
- /*
- * If TIF_SSBD is different, select the proper mitigation
- * method. Note that if SSBD mitigation is disabled or permanentely
- * enabled this branch can't be taken because nothing can set
- * TIF_SSBD.
- */
- if (tif_diff & _TIF_SSBD) {
- if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) {
+ /* Handle change of TIF_SSBD depending on the mitigation method. */
+ if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) {
+ if (tif_diff & _TIF_SSBD)
amd_set_ssb_virt_state(tifn);
- } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) {
+ } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) {
+ if (tif_diff & _TIF_SSBD)
amd_set_core_ssb_state(tifn);
- } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
- static_cpu_has(X86_FEATURE_AMD_SSBD)) {
- msr |= ssbd_tif_to_spec_ctrl(tifn);
- updmsr = true;
- }
+ } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+ static_cpu_has(X86_FEATURE_AMD_SSBD)) {
+ updmsr |= !!(tif_diff & _TIF_SSBD);
+ msr |= ssbd_tif_to_spec_ctrl(tifn);
}
- /*
- * Only evaluate TIF_SPEC_IB if conditional STIBP is enabled,
- * otherwise avoid the MSR write.
- */
+ /* Only evaluate TIF_SPEC_IB if conditional STIBP is enabled. */
if (IS_ENABLED(CONFIG_SMP) &&
static_branch_unlikely(&switch_to_cond_stibp)) {
updmsr |= !!(tif_diff & _TIF_SPEC_IB);
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index e040ba6be27b..0ec7ced727fe 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -197,6 +197,14 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = {
DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"),
},
},
+ { /* Handle problems with rebooting on Apple MacBook6,1 */
+ .callback = set_pci_reboot,
+ .ident = "Apple MacBook6,1",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "MacBook6,1"),
+ },
+ },
{ /* Handle problems with rebooting on Apple MacBookPro5 */
.callback = set_pci_reboot,
.ident = "Apple MacBookPro5",
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 371a6b348e44..e42faa792c07 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -25,10 +25,6 @@
#include <asm/hpet.h>
#include <asm/time.h>
-#ifdef CONFIG_X86_64
-__visible volatile unsigned long jiffies __cacheline_aligned_in_smp = INITIAL_JIFFIES;
-#endif
-
unsigned long profile_pc(struct pt_regs *regs)
{
unsigned long pc = instruction_pointer(regs);
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 1bf7e312361f..7c35556c7827 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -40,13 +40,13 @@ OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT)
#ifdef CONFIG_X86_32
OUTPUT_ARCH(i386)
ENTRY(phys_startup_32)
-jiffies = jiffies_64;
#else
OUTPUT_ARCH(i386:x86-64)
ENTRY(phys_startup_64)
-jiffies_64 = jiffies;
#endif
+jiffies = jiffies_64;
+
#if defined(CONFIG_X86_64)
/*
* On 64-bit, align RODATA to 2MB so we retain large page mappings for
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 3579ac0f6ec1..23632a33ed39 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -281,7 +281,6 @@ bool bio_integrity_prep(struct bio *bio)
if (ret == 0) {
printk(KERN_ERR "could not attach integrity payload\n");
- kfree(buf);
status = BLK_STS_RESOURCE;
goto err_end_io;
}
diff --git a/block/bio.c b/block/bio.c
index 5235da6434aa..a7366c02c9b5 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1434,8 +1434,7 @@ again:
}
if (bio->bi_disk && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
- trace_block_bio_complete(bio->bi_disk->queue, bio,
- blk_status_to_errno(bio->bi_status));
+ trace_block_bio_complete(bio->bi_disk->queue, bio);
bio_clear_flag(bio, BIO_TRACE_COMPLETION);
}
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 96a39d0724a2..44f3d0967cb4 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -191,6 +191,33 @@ found_tag:
return tag + tag_offset;
}
+bool __blk_mq_get_driver_tag(struct request *rq)
+{
+ struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags;
+ unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags;
+ bool shared = blk_mq_tag_busy(rq->mq_hctx);
+ int tag;
+
+ if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) {
+ bt = &rq->mq_hctx->tags->breserved_tags;
+ tag_offset = 0;
+ }
+
+ if (!hctx_may_queue(rq->mq_hctx, bt))
+ return false;
+ tag = __sbitmap_queue_get(bt);
+ if (tag == BLK_MQ_NO_TAG)
+ return false;
+
+ rq->tag = tag + tag_offset;
+ if (shared) {
+ rq->rq_flags |= RQF_MQ_INFLIGHT;
+ atomic_inc(&rq->mq_hctx->nr_active);
+ }
+ rq->mq_hctx->tags->rqs[rq->tag] = rq;
+ return true;
+}
+
void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
unsigned int tag)
{
@@ -269,6 +296,7 @@ struct bt_tags_iter_data {
#define BT_TAG_ITER_RESERVED (1 << 0)
#define BT_TAG_ITER_STARTED (1 << 1)
+#define BT_TAG_ITER_STATIC_RQS (1 << 2)
static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
{
@@ -282,9 +310,12 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
/*
* We can hit rq == NULL here, because the tagging functions
- * test and set the bit before assining ->rqs[].
+ * test and set the bit before assigning ->rqs[].
*/
- rq = tags->rqs[bitnr];
+ if (iter_data->flags & BT_TAG_ITER_STATIC_RQS)
+ rq = tags->static_rqs[bitnr];
+ else
+ rq = tags->rqs[bitnr];
if (!rq)
return true;
if ((iter_data->flags & BT_TAG_ITER_STARTED) &&
@@ -339,11 +370,13 @@ static void __blk_mq_all_tag_iter(struct blk_mq_tags *tags,
* indicates whether or not @rq is a reserved request. Return
* true to continue iterating tags, false to stop.
* @priv: Will be passed as second argument to @fn.
+ *
+ * Caller has to pass the tag map from which requests are allocated.
*/
void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
void *priv)
{
- return __blk_mq_all_tag_iter(tags, fn, priv, 0);
+ return __blk_mq_all_tag_iter(tags, fn, priv, BT_TAG_ITER_STATIC_RQS);
}
/**
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index d38e48f2a0a4..2e4ef51cdb32 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -51,6 +51,14 @@ enum {
BLK_MQ_TAG_MAX = BLK_MQ_NO_TAG - 1,
};
+bool __blk_mq_get_driver_tag(struct request *rq);
+static inline bool blk_mq_get_driver_tag(struct request *rq)
+{
+ if (rq->tag != BLK_MQ_NO_TAG)
+ return true;
+ return __blk_mq_get_driver_tag(rq);
+}
+
extern bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *);
extern void __blk_mq_tag_idle(struct blk_mq_hw_ctx *);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 9a36ac1c1fa1..4f57d27bfa73 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1052,35 +1052,6 @@ static inline unsigned int queued_to_index(unsigned int queued)
return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1);
}
-bool blk_mq_get_driver_tag(struct request *rq)
-{
- struct blk_mq_alloc_data data = {
- .q = rq->q,
- .hctx = rq->mq_hctx,
- .flags = BLK_MQ_REQ_NOWAIT,
- .cmd_flags = rq->cmd_flags,
- };
- bool shared;
-
- if (rq->tag != BLK_MQ_NO_TAG)
- return true;
-
- if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag))
- data.flags |= BLK_MQ_REQ_RESERVED;
-
- shared = blk_mq_tag_busy(data.hctx);
- rq->tag = blk_mq_get_tag(&data);
- if (rq->tag >= 0) {
- if (shared) {
- rq->rq_flags |= RQF_MQ_INFLIGHT;
- atomic_inc(&data.hctx->nr_active);
- }
- data.hctx->tags->rqs[rq->tag] = rq;
- }
-
- return rq->tag != BLK_MQ_NO_TAG;
-}
-
static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
int flags, void *key)
{
diff --git a/block/blk-mq.h b/block/blk-mq.h
index a139b0631817..b3ce0f3a2ad2 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -44,7 +44,6 @@ bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *, bool);
void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
bool kick_requeue_list);
void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
-bool blk_mq_get_driver_tag(struct request *rq);
struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *start);
diff --git a/block/blk.h b/block/blk.h
index aa16e524dc35..b5d1f0fc6547 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -420,9 +420,11 @@ static inline sector_t part_nr_sects_read(struct hd_struct *part)
static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
{
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+ preempt_disable();
write_seqcount_begin(&part->nr_sects_seq);
part->nr_sects = size;
write_seqcount_end(&part->nr_sects_seq);
+ preempt_enable();
#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
preempt_disable();
part->nr_sects = size;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 2e96d8b8758b..c33bbbfd1bd9 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1390,7 +1390,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
goto out_unfreeze;
/* Mask out flags that can't be set using LOOP_SET_STATUS. */
- lo->lo_flags &= ~LOOP_SET_STATUS_SETTABLE_FLAGS;
+ lo->lo_flags &= LOOP_SET_STATUS_SETTABLE_FLAGS;
/* For those flags, use the previous values instead */
lo->lo_flags |= prev_lo_flags & ~LOOP_SET_STATUS_SETTABLE_FLAGS;
/* For flags that can't be cleared, use previous values too */
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 0b944ac96d6b..27a33adc41e4 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -1613,7 +1613,7 @@ static noinline_for_stack int pkt_get_last_written(struct pktcdvd_device *pd,
disc_information di;
track_information ti;
__u32 last_track;
- int ret = -1;
+ int ret;
ret = pkt_get_disc_info(pd, &di);
if (ret)
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index d84e8a878df2..1e2aa5ae2796 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -784,7 +784,7 @@ static const struct block_device_operations mm_fops = {
static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
{
- int ret = -ENODEV;
+ int ret;
struct cardinfo *card = &cards[num_cards];
unsigned char mem_present;
unsigned char batt_status;
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 0585efa47d8f..c2c5bc4fb702 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3669,7 +3669,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
ns->disk = disk;
if (__nvme_revalidate_disk(disk, id))
- goto out_free_disk;
+ goto out_put_disk;
if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
ret = nvme_nvm_register(ns, disk_name, node);
@@ -3696,8 +3696,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
/* prevent double queue cleanup */
ns->disk->queue = NULL;
put_disk(ns->disk);
- out_free_disk:
- del_gendisk(ns->disk);
out_unlink_ns:
mutex_lock(&ctrl->subsys->lock);
list_del_rcu(&ns->siblings);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index cb0007592c12..e999a8c4b7e8 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2634,10 +2634,11 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE);
__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
- if (!(op->flags & FCOP_FLAGS_AEN))
+ if (!(op->flags & FCOP_FLAGS_AEN)) {
nvme_fc_unmap_data(ctrl, op->rq, op);
+ nvme_cleanup_cmd(op->rq);
+ }
- nvme_cleanup_cmd(op->rq);
nvme_fc_ctrl_put(ctrl);
if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE &&
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index fa5c75501049..c0f4226d3299 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -599,8 +599,7 @@ static inline void nvme_trace_bio_complete(struct request *req,
struct nvme_ns *ns = req->q->queuedata;
if (req->cmd_flags & REQ_NVME_MPATH)
- trace_block_bio_complete(ns->head->disk->queue,
- req->bio, status);
+ trace_block_bio_complete(ns->head->disk->queue, req->bio);
}
extern struct device_attribute dev_attr_ana_grpid;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d690d5593a80..e2bacd369a88 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2950,9 +2950,15 @@ static int nvme_suspend(struct device *dev)
* the PCI bus layer to put it into D3 in order to take the PCIe link
* down, so as to allow the platform to achieve its minimum low-power
* state (which may not be possible if the link is up).
+ *
+ * If a host memory buffer is enabled, shut down the device as the NVMe
+ * specification allows the device to access the host memory buffer in
+ * host DRAM from all power states, but hosts will fail access to DRAM
+ * during S3.
*/
if (pm_suspend_via_firmware() || !ctrl->npss ||
!pcie_aspm_enabled(pdev) ||
+ ndev->nr_host_mem_descs ||
(ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND))
return nvme_disable_prepare_reset(ndev, true);
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 1843110ec34f..3345ec7efaff 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -131,8 +131,8 @@ struct nvme_tcp_ctrl {
static LIST_HEAD(nvme_tcp_ctrl_list);
static DEFINE_MUTEX(nvme_tcp_ctrl_mutex);
static struct workqueue_struct *nvme_tcp_wq;
-static struct blk_mq_ops nvme_tcp_mq_ops;
-static struct blk_mq_ops nvme_tcp_admin_mq_ops;
+static const struct blk_mq_ops nvme_tcp_mq_ops;
+static const struct blk_mq_ops nvme_tcp_admin_mq_ops;
static int nvme_tcp_try_send(struct nvme_tcp_queue *queue);
static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl)
@@ -2301,7 +2301,7 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx)
return queue->nr_cqe;
}
-static struct blk_mq_ops nvme_tcp_mq_ops = {
+static const struct blk_mq_ops nvme_tcp_mq_ops = {
.queue_rq = nvme_tcp_queue_rq,
.complete = nvme_complete_rq,
.init_request = nvme_tcp_init_request,
@@ -2312,7 +2312,7 @@ static struct blk_mq_ops nvme_tcp_mq_ops = {
.poll = nvme_tcp_poll,
};
-static struct blk_mq_ops nvme_tcp_admin_mq_ops = {
+static const struct blk_mq_ops nvme_tcp_admin_mq_ops = {
.queue_rq = nvme_tcp_queue_rq,
.complete = nvme_complete_rq,
.init_request = nvme_tcp_init_request,
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 6392bcd30bd7..6e2f623e472e 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -129,7 +129,22 @@ static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16);
}
-static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status)
+static void nvmet_async_events_failall(struct nvmet_ctrl *ctrl)
+{
+ u16 status = NVME_SC_INTERNAL | NVME_SC_DNR;
+ struct nvmet_req *req;
+
+ mutex_lock(&ctrl->lock);
+ while (ctrl->nr_async_event_cmds) {
+ req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
+ mutex_unlock(&ctrl->lock);
+ nvmet_req_complete(req, status);
+ mutex_lock(&ctrl->lock);
+ }
+ mutex_unlock(&ctrl->lock);
+}
+
+static void nvmet_async_events_process(struct nvmet_ctrl *ctrl)
{
struct nvmet_async_event *aen;
struct nvmet_req *req;
@@ -139,15 +154,14 @@ static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status)
aen = list_first_entry(&ctrl->async_events,
struct nvmet_async_event, entry);
req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
- if (status == 0)
- nvmet_set_result(req, nvmet_async_event_result(aen));
+ nvmet_set_result(req, nvmet_async_event_result(aen));
list_del(&aen->entry);
kfree(aen);
mutex_unlock(&ctrl->lock);
trace_nvmet_async_event(ctrl, req->cqe->result.u32);
- nvmet_req_complete(req, status);
+ nvmet_req_complete(req, 0);
mutex_lock(&ctrl->lock);
}
mutex_unlock(&ctrl->lock);
@@ -170,7 +184,7 @@ static void nvmet_async_event_work(struct work_struct *work)
struct nvmet_ctrl *ctrl =
container_of(work, struct nvmet_ctrl, async_event_work);
- nvmet_async_events_process(ctrl, 0);
+ nvmet_async_events_process(ctrl);
}
void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
@@ -779,7 +793,6 @@ static void nvmet_confirm_sq(struct percpu_ref *ref)
void nvmet_sq_destroy(struct nvmet_sq *sq)
{
- u16 status = NVME_SC_INTERNAL | NVME_SC_DNR;
struct nvmet_ctrl *ctrl = sq->ctrl;
/*
@@ -787,7 +800,7 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
* queue doesn't have outstanding requests on it.
*/
if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq)
- nvmet_async_events_process(ctrl, status);
+ nvmet_async_events_failall(ctrl);
percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
wait_for_completion(&sq->confirm_done);
wait_for_completion(&sq->free_done);
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 1669177cd26c..de9217cfd22d 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -153,7 +153,7 @@ static LIST_HEAD(nvmet_tcp_queue_list);
static DEFINE_MUTEX(nvmet_tcp_queue_mutex);
static struct workqueue_struct *nvmet_tcp_wq;
-static struct nvmet_fabrics_ops nvmet_tcp_ops;
+static const struct nvmet_fabrics_ops nvmet_tcp_ops;
static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c);
static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd);
@@ -1713,7 +1713,7 @@ static void nvmet_tcp_disc_port_addr(struct nvmet_req *req,
}
}
-static struct nvmet_fabrics_ops nvmet_tcp_ops = {
+static const struct nvmet_fabrics_ops nvmet_tcp_ops = {
.owner = THIS_MODULE,
.type = NVMF_TRTYPE_TCP,
.msdbd = 1,
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 97bccde3298b..768497f82aee 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -447,6 +447,7 @@ static int afs_store_data(struct address_space *mapping,
op->store.last = last;
op->store.first_offset = offset;
op->store.last_to = to;
+ op->mtime = vnode->vfs_inode.i_mtime;
op->ops = &afs_store_data_operation;
try_next_key:
diff --git a/fs/io-wq.c b/fs/io-wq.c
index a5e90ac39e4d..0b65a912b036 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -111,6 +111,7 @@ struct io_wq {
unsigned long state;
free_work_fn *free_work;
+ io_wq_work_fn *do_work;
struct task_struct *manager;
struct user_struct *user;
@@ -523,7 +524,7 @@ get_next:
hash = io_get_work_hash(work);
linked = old_work = work;
- linked->func(&linked);
+ wq->do_work(&linked);
linked = (old_work == linked) ? NULL : linked;
work = next_hashed;
@@ -780,7 +781,7 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
struct io_wq_work *old_work = work;
work->flags |= IO_WQ_WORK_CANCEL;
- work->func(&work);
+ wq->do_work(&work);
work = (work == old_work) ? NULL : work;
wq->free_work(old_work);
} while (work);
@@ -1018,7 +1019,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
int ret = -ENOMEM, node;
struct io_wq *wq;
- if (WARN_ON_ONCE(!data->free_work))
+ if (WARN_ON_ONCE(!data->free_work || !data->do_work))
return ERR_PTR(-EINVAL);
wq = kzalloc(sizeof(*wq), GFP_KERNEL);
@@ -1032,6 +1033,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
}
wq->free_work = data->free_work;
+ wq->do_work = data->do_work;
/* caller must already hold a reference to this */
wq->user = data->user;
@@ -1088,7 +1090,7 @@ err:
bool io_wq_get(struct io_wq *wq, struct io_wq_data *data)
{
- if (data->free_work != wq->free_work)
+ if (data->free_work != wq->free_work || data->do_work != wq->do_work)
return false;
return refcount_inc_not_zero(&wq->use_refs);
diff --git a/fs/io-wq.h b/fs/io-wq.h
index 5ba12de7572f..8e138fa88b9f 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -85,7 +85,6 @@ static inline void wq_list_del(struct io_wq_work_list *list,
struct io_wq_work {
struct io_wq_work_node list;
- void (*func)(struct io_wq_work **);
struct files_struct *files;
struct mm_struct *mm;
const struct cred *creds;
@@ -94,11 +93,6 @@ struct io_wq_work {
pid_t task_pid;
};
-#define INIT_IO_WORK(work, _func) \
- do { \
- *(work) = (struct io_wq_work){ .func = _func }; \
- } while (0) \
-
static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
{
if (!work->list.next)
@@ -108,10 +102,12 @@ static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
}
typedef void (free_work_fn)(struct io_wq_work *);
+typedef void (io_wq_work_fn)(struct io_wq_work **);
struct io_wq_data {
struct user_struct *user;
+ io_wq_work_fn *do_work;
free_work_fn *free_work;
};
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 26f7bc941d01..155f3d830ddb 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -528,7 +528,6 @@ enum {
REQ_F_INFLIGHT_BIT,
REQ_F_CUR_POS_BIT,
REQ_F_NOWAIT_BIT,
- REQ_F_IOPOLL_COMPLETED_BIT,
REQ_F_LINK_TIMEOUT_BIT,
REQ_F_TIMEOUT_BIT,
REQ_F_ISREG_BIT,
@@ -540,6 +539,8 @@ enum {
REQ_F_POLLED_BIT,
REQ_F_BUFFER_SELECTED_BIT,
REQ_F_NO_FILE_TABLE_BIT,
+ REQ_F_QUEUE_TIMEOUT_BIT,
+ REQ_F_WORK_INITIALIZED_BIT,
/* not a real bit, just to check we're not overflowing the space */
__REQ_F_LAST_BIT,
@@ -571,8 +572,6 @@ enum {
REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT),
/* must not punt to workers */
REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT),
- /* polled IO has completed */
- REQ_F_IOPOLL_COMPLETED = BIT(REQ_F_IOPOLL_COMPLETED_BIT),
/* has linked timeout */
REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT),
/* timeout request */
@@ -595,6 +594,10 @@ enum {
REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT),
/* doesn't need file table for this request */
REQ_F_NO_FILE_TABLE = BIT(REQ_F_NO_FILE_TABLE_BIT),
+ /* needs to queue linked timeout */
+ REQ_F_QUEUE_TIMEOUT = BIT(REQ_F_QUEUE_TIMEOUT_BIT),
+ /* io_wq_work is initialized */
+ REQ_F_WORK_INITIALIZED = BIT(REQ_F_WORK_INITIALIZED_BIT),
};
struct async_poll {
@@ -633,6 +636,8 @@ struct io_kiocb {
struct io_async_ctx *io;
int cflags;
u8 opcode;
+ /* polled IO has completed */
+ u8 iopoll_completed;
u16 buf_index;
@@ -697,6 +702,8 @@ struct io_op_def {
unsigned needs_mm : 1;
/* needs req->file assigned */
unsigned needs_file : 1;
+ /* don't fail if file grab fails */
+ unsigned needs_file_no_error : 1;
/* hash wq insertion if file is a regular file */
unsigned hash_reg_file : 1;
/* unbound wq insertion if file is a non-regular file */
@@ -803,6 +810,8 @@ static const struct io_op_def io_op_defs[] = {
.needs_fs = 1,
},
[IORING_OP_CLOSE] = {
+ .needs_file = 1,
+ .needs_file_no_error = 1,
.file_table = 1,
},
[IORING_OP_FILES_UPDATE] = {
@@ -903,6 +912,19 @@ EXPORT_SYMBOL(io_uring_get_socket);
static void io_file_put_work(struct work_struct *work);
+/*
+ * Note: must call io_req_init_async() for the first time you
+ * touch any members of io_wq_work.
+ */
+static inline void io_req_init_async(struct io_kiocb *req)
+{
+ if (req->flags & REQ_F_WORK_INITIALIZED)
+ return;
+
+ memset(&req->work, 0, sizeof(req->work));
+ req->flags |= REQ_F_WORK_INITIALIZED;
+}
+
static inline bool io_async_submit(struct io_ring_ctx *ctx)
{
return ctx->flags & IORING_SETUP_SQPOLL;
@@ -1029,6 +1051,9 @@ static inline void io_req_work_grab_env(struct io_kiocb *req,
static inline void io_req_work_drop_env(struct io_kiocb *req)
{
+ if (!(req->flags & REQ_F_WORK_INITIALIZED))
+ return;
+
if (req->work.mm) {
mmdrop(req->work.mm);
req->work.mm = NULL;
@@ -1575,16 +1600,6 @@ static void io_free_req(struct io_kiocb *req)
io_queue_async_work(nxt);
}
-static void io_link_work_cb(struct io_wq_work **workptr)
-{
- struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
- struct io_kiocb *link;
-
- link = list_first_entry(&req->link_list, struct io_kiocb, link_list);
- io_queue_linked_timeout(link);
- io_wq_submit_work(workptr);
-}
-
static void io_wq_assign_next(struct io_wq_work **workptr, struct io_kiocb *nxt)
{
struct io_kiocb *link;
@@ -1596,7 +1611,7 @@ static void io_wq_assign_next(struct io_wq_work **workptr, struct io_kiocb *nxt)
*workptr = &nxt->work;
link = io_prep_linked_timeout(nxt);
if (link)
- nxt->work.func = io_link_work_cb;
+ nxt->flags |= REQ_F_QUEUE_TIMEOUT;
}
/*
@@ -1781,7 +1796,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
* If we find a request that requires polling, break out
* and complete those lists first, if we have entries there.
*/
- if (req->flags & REQ_F_IOPOLL_COMPLETED) {
+ if (READ_ONCE(req->iopoll_completed)) {
list_move_tail(&req->list, &done);
continue;
}
@@ -1962,7 +1977,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
req_set_fail_links(req);
req->result = res;
if (res != -EAGAIN)
- req->flags |= REQ_F_IOPOLL_COMPLETED;
+ WRITE_ONCE(req->iopoll_completed, 1);
}
/*
@@ -1995,7 +2010,7 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
* For fast devices, IO may have already completed. If it has, add
* it to the front so we find it first.
*/
- if (req->flags & REQ_F_IOPOLL_COMPLETED)
+ if (READ_ONCE(req->iopoll_completed))
list_add(&req->list, &ctx->poll_list);
else
list_add_tail(&req->list, &ctx->poll_list);
@@ -2063,6 +2078,10 @@ static bool io_file_supports_async(struct file *file, int rw)
if (S_ISREG(mode) && file->f_op != &io_uring_fops)
return true;
+ /* any ->read/write should understand O_NONBLOCK */
+ if (file->f_flags & O_NONBLOCK)
+ return true;
+
if (!(file->f_mode & FMODE_NOWAIT))
return false;
@@ -2105,8 +2124,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
kiocb->ki_ioprio = get_current_ioprio();
/* don't allow async punt if RWF_NOWAIT was requested */
- if ((kiocb->ki_flags & IOCB_NOWAIT) ||
- (req->file->f_flags & O_NONBLOCK))
+ if (kiocb->ki_flags & IOCB_NOWAIT)
req->flags |= REQ_F_NOWAIT;
if (force_nonblock)
@@ -2120,6 +2138,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
kiocb->ki_flags |= IOCB_HIPRI;
kiocb->ki_complete = io_complete_rw_iopoll;
req->result = 0;
+ req->iopoll_completed = 0;
} else {
if (kiocb->ki_flags & IOCB_HIPRI)
return -EINVAL;
@@ -2358,8 +2377,14 @@ static ssize_t __io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
bool needs_lock)
{
- if (req->flags & REQ_F_BUFFER_SELECTED)
+ if (req->flags & REQ_F_BUFFER_SELECTED) {
+ struct io_buffer *kbuf;
+
+ kbuf = (struct io_buffer *) (unsigned long) req->rw.addr;
+ iov[0].iov_base = u64_to_user_ptr(kbuf->addr);
+ iov[0].iov_len = kbuf->len;
return 0;
+ }
if (!req->rw.len)
return 0;
else if (req->rw.len > 1)
@@ -2741,7 +2766,8 @@ copy_iov:
if (ret)
goto out_free;
/* any defer here is final, must blocking retry */
- if (!file_can_poll(req->file))
+ if (!(req->flags & REQ_F_NOWAIT) &&
+ !file_can_poll(req->file))
req->flags |= REQ_F_MUST_PUNT;
return -EAGAIN;
}
@@ -2761,6 +2787,8 @@ static int __io_splice_prep(struct io_kiocb *req,
if (req->flags & REQ_F_NEED_CLEANUP)
return 0;
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
sp->file_in = NULL;
sp->len = READ_ONCE(sqe->len);
@@ -2775,8 +2803,14 @@ static int __io_splice_prep(struct io_kiocb *req,
return ret;
req->flags |= REQ_F_NEED_CLEANUP;
- if (!S_ISREG(file_inode(sp->file_in)->i_mode))
+ if (!S_ISREG(file_inode(sp->file_in)->i_mode)) {
+ /*
+ * Splice operation will be punted aync, and here need to
+ * modify io_wq_work.flags, so initialize io_wq_work firstly.
+ */
+ io_req_init_async(req);
req->work.flags |= IO_WQ_WORK_UNBOUND;
+ }
return 0;
}
@@ -2885,23 +2919,15 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return 0;
}
-static bool io_req_cancelled(struct io_kiocb *req)
-{
- if (req->work.flags & IO_WQ_WORK_CANCEL) {
- req_set_fail_links(req);
- io_cqring_add_event(req, -ECANCELED);
- io_put_req(req);
- return true;
- }
-
- return false;
-}
-
-static void __io_fsync(struct io_kiocb *req)
+static int io_fsync(struct io_kiocb *req, bool force_nonblock)
{
loff_t end = req->sync.off + req->sync.len;
int ret;
+ /* fsync always requires a blocking context */
+ if (force_nonblock)
+ return -EAGAIN;
+
ret = vfs_fsync_range(req->file, req->sync.off,
end > 0 ? end : LLONG_MAX,
req->sync.flags & IORING_FSYNC_DATASYNC);
@@ -2909,58 +2935,16 @@ static void __io_fsync(struct io_kiocb *req)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
io_put_req(req);
-}
-
-static void io_fsync_finish(struct io_wq_work **workptr)
-{
- struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
-
- if (io_req_cancelled(req))
- return;
- __io_fsync(req);
- io_steal_work(req, workptr);
-}
-
-static int io_fsync(struct io_kiocb *req, bool force_nonblock)
-{
- /* fsync always requires a blocking context */
- if (force_nonblock) {
- req->work.func = io_fsync_finish;
- return -EAGAIN;
- }
- __io_fsync(req);
return 0;
}
-static void __io_fallocate(struct io_kiocb *req)
-{
- int ret;
-
- current->signal->rlim[RLIMIT_FSIZE].rlim_cur = req->fsize;
- ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off,
- req->sync.len);
- current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
- if (ret < 0)
- req_set_fail_links(req);
- io_cqring_add_event(req, ret);
- io_put_req(req);
-}
-
-static void io_fallocate_finish(struct io_wq_work **workptr)
-{
- struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
-
- if (io_req_cancelled(req))
- return;
- __io_fallocate(req);
- io_steal_work(req, workptr);
-}
-
static int io_fallocate_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
if (sqe->ioprio || sqe->buf_index || sqe->rw_flags)
return -EINVAL;
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
req->sync.off = READ_ONCE(sqe->off);
req->sync.len = READ_ONCE(sqe->addr);
@@ -2971,66 +2955,74 @@ static int io_fallocate_prep(struct io_kiocb *req,
static int io_fallocate(struct io_kiocb *req, bool force_nonblock)
{
+ int ret;
+
/* fallocate always requiring blocking context */
- if (force_nonblock) {
- req->work.func = io_fallocate_finish;
+ if (force_nonblock)
return -EAGAIN;
- }
- __io_fallocate(req);
+ current->signal->rlim[RLIMIT_FSIZE].rlim_cur = req->fsize;
+ ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off,
+ req->sync.len);
+ current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
+ if (ret < 0)
+ req_set_fail_links(req);
+ io_cqring_add_event(req, ret);
+ io_put_req(req);
return 0;
}
-static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
const char __user *fname;
int ret;
- if (sqe->ioprio || sqe->buf_index)
+ if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
return -EINVAL;
- if (req->flags & REQ_F_FIXED_FILE)
+ if (unlikely(sqe->ioprio || sqe->buf_index))
+ return -EINVAL;
+ if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
- if (req->flags & REQ_F_NEED_CLEANUP)
- return 0;
- req->open.dfd = READ_ONCE(sqe->fd);
- req->open.how.mode = READ_ONCE(sqe->len);
- fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
- req->open.how.flags = READ_ONCE(sqe->open_flags);
- if (force_o_largefile())
+ /* open.how should be already initialised */
+ if (!(req->open.how.flags & O_PATH) && force_o_largefile())
req->open.how.flags |= O_LARGEFILE;
+ req->open.dfd = READ_ONCE(sqe->fd);
+ fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
req->open.filename = getname(fname);
if (IS_ERR(req->open.filename)) {
ret = PTR_ERR(req->open.filename);
req->open.filename = NULL;
return ret;
}
-
req->open.nofile = rlimit(RLIMIT_NOFILE);
req->flags |= REQ_F_NEED_CLEANUP;
return 0;
}
+static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ u64 flags, mode;
+
+ if (req->flags & REQ_F_NEED_CLEANUP)
+ return 0;
+ mode = READ_ONCE(sqe->len);
+ flags = READ_ONCE(sqe->open_flags);
+ req->open.how = build_open_how(flags, mode);
+ return __io_openat_prep(req, sqe);
+}
+
static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct open_how __user *how;
- const char __user *fname;
size_t len;
int ret;
- if (sqe->ioprio || sqe->buf_index)
- return -EINVAL;
- if (req->flags & REQ_F_FIXED_FILE)
- return -EBADF;
if (req->flags & REQ_F_NEED_CLEANUP)
return 0;
-
- req->open.dfd = READ_ONCE(sqe->fd);
- fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
how = u64_to_user_ptr(READ_ONCE(sqe->addr2));
len = READ_ONCE(sqe->len);
-
if (len < OPEN_HOW_SIZE_VER0)
return -EINVAL;
@@ -3039,19 +3031,7 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (ret)
return ret;
- if (!(req->open.how.flags & O_PATH) && force_o_largefile())
- req->open.how.flags |= O_LARGEFILE;
-
- req->open.filename = getname(fname);
- if (IS_ERR(req->open.filename)) {
- ret = PTR_ERR(req->open.filename);
- req->open.filename = NULL;
- return ret;
- }
-
- req->open.nofile = rlimit(RLIMIT_NOFILE);
- req->flags |= REQ_F_NEED_CLEANUP;
- return 0;
+ return __io_openat_prep(req, sqe);
}
static int io_openat2(struct io_kiocb *req, bool force_nonblock)
@@ -3091,7 +3071,6 @@ err:
static int io_openat(struct io_kiocb *req, bool force_nonblock)
{
- req->open.how = build_open_how(req->open.how.flags, req->open.how.mode);
return io_openat2(req, force_nonblock);
}
@@ -3180,7 +3159,7 @@ static int io_provide_buffers_prep(struct io_kiocb *req,
p->addr = READ_ONCE(sqe->addr);
p->len = READ_ONCE(sqe->len);
- if (!access_ok(u64_to_user_ptr(p->addr), p->len))
+ if (!access_ok(u64_to_user_ptr(p->addr), (p->len * p->nbufs)))
return -EFAULT;
p->bgid = READ_ONCE(sqe->buf_group);
@@ -3258,6 +3237,8 @@ static int io_epoll_ctl_prep(struct io_kiocb *req,
#if defined(CONFIG_EPOLL)
if (sqe->ioprio || sqe->buf_index)
return -EINVAL;
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
req->epoll.epfd = READ_ONCE(sqe->fd);
req->epoll.op = READ_ONCE(sqe->len);
@@ -3302,6 +3283,8 @@ static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
if (sqe->ioprio || sqe->buf_index || sqe->off)
return -EINVAL;
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
req->madvise.addr = READ_ONCE(sqe->addr);
req->madvise.len = READ_ONCE(sqe->len);
@@ -3336,6 +3319,8 @@ static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
if (sqe->ioprio || sqe->buf_index || sqe->addr)
return -EINVAL;
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
req->fadvise.offset = READ_ONCE(sqe->off);
req->fadvise.len = READ_ONCE(sqe->len);
@@ -3369,6 +3354,8 @@ static int io_fadvise(struct io_kiocb *req, bool force_nonblock)
static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
if (sqe->ioprio || sqe->buf_index)
return -EINVAL;
if (req->flags & REQ_F_FIXED_FILE)
@@ -3409,10 +3396,14 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
/*
* If we queue this for async, it must not be cancellable. That would
- * leave the 'file' in an undeterminate state.
+ * leave the 'file' in an undeterminate state, and here need to modify
+ * io_wq_work.flags, so initialize io_wq_work firstly.
*/
+ io_req_init_async(req);
req->work.flags |= IO_WQ_WORK_NO_CANCEL;
+ if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
+ return -EINVAL;
if (sqe->ioprio || sqe->off || sqe->addr || sqe->len ||
sqe->rw_flags || sqe->buf_index)
return -EINVAL;
@@ -3420,53 +3411,41 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return -EBADF;
req->close.fd = READ_ONCE(sqe->fd);
- return 0;
-}
-
-/* only called when __close_fd_get_file() is done */
-static void __io_close_finish(struct io_kiocb *req)
-{
- int ret;
-
- ret = filp_close(req->close.put_file, req->work.files);
- if (ret < 0)
- req_set_fail_links(req);
- io_cqring_add_event(req, ret);
- fput(req->close.put_file);
- io_put_req(req);
-}
-
-static void io_close_finish(struct io_wq_work **workptr)
-{
- struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
+ if ((req->file && req->file->f_op == &io_uring_fops) ||
+ req->close.fd == req->ctx->ring_fd)
+ return -EBADF;
- /* not cancellable, don't do io_req_cancelled() */
- __io_close_finish(req);
- io_steal_work(req, workptr);
+ req->close.put_file = NULL;
+ return 0;
}
static int io_close(struct io_kiocb *req, bool force_nonblock)
{
+ struct io_close *close = &req->close;
int ret;
- req->close.put_file = NULL;
- ret = __close_fd_get_file(req->close.fd, &req->close.put_file);
- if (ret < 0)
- return (ret == -ENOENT) ? -EBADF : ret;
+ /* might be already done during nonblock submission */
+ if (!close->put_file) {
+ ret = __close_fd_get_file(close->fd, &close->put_file);
+ if (ret < 0)
+ return (ret == -ENOENT) ? -EBADF : ret;
+ }
/* if the file has a flush method, be safe and punt to async */
- if (req->close.put_file->f_op->flush && force_nonblock) {
+ if (close->put_file->f_op->flush && force_nonblock) {
/* avoid grabbing files - we don't need the files */
req->flags |= REQ_F_NO_FILE_TABLE | REQ_F_MUST_PUNT;
- req->work.func = io_close_finish;
return -EAGAIN;
}
- /*
- * No ->flush(), safely close from here and just punt the
- * fput() to async context.
- */
- __io_close_finish(req);
+ /* No ->flush() or already async, safely close from here */
+ ret = filp_close(close->put_file, req->work.files);
+ if (ret < 0)
+ req_set_fail_links(req);
+ io_cqring_add_event(req, ret);
+ fput(close->put_file);
+ close->put_file = NULL;
+ io_put_req(req);
return 0;
}
@@ -3488,38 +3467,20 @@ static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return 0;
}
-static void __io_sync_file_range(struct io_kiocb *req)
+static int io_sync_file_range(struct io_kiocb *req, bool force_nonblock)
{
int ret;
+ /* sync_file_range always requires a blocking context */
+ if (force_nonblock)
+ return -EAGAIN;
+
ret = sync_file_range(req->file, req->sync.off, req->sync.len,
req->sync.flags);
if (ret < 0)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
io_put_req(req);
-}
-
-
-static void io_sync_file_range_finish(struct io_wq_work **workptr)
-{
- struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
-
- if (io_req_cancelled(req))
- return;
- __io_sync_file_range(req);
- io_steal_work(req, workptr);
-}
-
-static int io_sync_file_range(struct io_kiocb *req, bool force_nonblock)
-{
- /* sync_file_range always requires a blocking context */
- if (force_nonblock) {
- req->work.func = io_sync_file_range_finish;
- return -EAGAIN;
- }
-
- __io_sync_file_range(req);
return 0;
}
@@ -3545,6 +3506,9 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
struct io_async_ctx *io = req->io;
int ret;
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
+
sr->msg_flags = READ_ONCE(sqe->msg_flags);
sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len);
@@ -3574,9 +3538,6 @@ static int io_sendmsg(struct io_kiocb *req, bool force_nonblock)
struct socket *sock;
int ret;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
-
sock = sock_from_file(req->file, &ret);
if (sock) {
struct io_async_ctx io;
@@ -3630,9 +3591,6 @@ static int io_send(struct io_kiocb *req, bool force_nonblock)
struct socket *sock;
int ret;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
-
sock = sock_from_file(req->file, &ret);
if (sock) {
struct io_sr_msg *sr = &req->sr_msg;
@@ -3785,6 +3743,9 @@ static int io_recvmsg_prep(struct io_kiocb *req,
struct io_async_ctx *io = req->io;
int ret;
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
+
sr->msg_flags = READ_ONCE(sqe->msg_flags);
sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len);
@@ -3813,9 +3774,6 @@ static int io_recvmsg(struct io_kiocb *req, bool force_nonblock)
struct socket *sock;
int ret, cflags = 0;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
-
sock = sock_from_file(req->file, &ret);
if (sock) {
struct io_buffer *kbuf;
@@ -3877,9 +3835,6 @@ static int io_recv(struct io_kiocb *req, bool force_nonblock)
struct socket *sock;
int ret, cflags = 0;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
-
sock = sock_from_file(req->file, &ret);
if (sock) {
struct io_sr_msg *sr = &req->sr_msg;
@@ -3947,49 +3902,30 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return 0;
}
-static int __io_accept(struct io_kiocb *req, bool force_nonblock)
+static int io_accept(struct io_kiocb *req, bool force_nonblock)
{
struct io_accept *accept = &req->accept;
- unsigned file_flags;
+ unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
int ret;
- file_flags = force_nonblock ? O_NONBLOCK : 0;
+ if (req->file->f_flags & O_NONBLOCK)
+ req->flags |= REQ_F_NOWAIT;
+
ret = __sys_accept4_file(req->file, file_flags, accept->addr,
accept->addr_len, accept->flags,
accept->nofile);
if (ret == -EAGAIN && force_nonblock)
return -EAGAIN;
- if (ret == -ERESTARTSYS)
- ret = -EINTR;
- if (ret < 0)
+ if (ret < 0) {
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
req_set_fail_links(req);
+ }
io_cqring_add_event(req, ret);
io_put_req(req);
return 0;
}
-static void io_accept_finish(struct io_wq_work **workptr)
-{
- struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
-
- if (io_req_cancelled(req))
- return;
- __io_accept(req, false);
- io_steal_work(req, workptr);
-}
-
-static int io_accept(struct io_kiocb *req, bool force_nonblock)
-{
- int ret;
-
- ret = __io_accept(req, force_nonblock);
- if (ret == -EAGAIN && force_nonblock) {
- req->work.func = io_accept_finish;
- return -EAGAIN;
- }
- return 0;
-}
-
static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_connect *conn = &req->connect;
@@ -4328,7 +4264,8 @@ static void io_async_task_func(struct callback_head *cb)
spin_unlock_irq(&ctx->completion_lock);
/* restore ->work in case we need to retry again */
- memcpy(&req->work, &apoll->work, sizeof(req->work));
+ if (req->flags & REQ_F_WORK_INITIALIZED)
+ memcpy(&req->work, &apoll->work, sizeof(req->work));
kfree(apoll);
if (!canceled) {
@@ -4425,7 +4362,8 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
return false;
req->flags |= REQ_F_POLLED;
- memcpy(&apoll->work, &req->work, sizeof(req->work));
+ if (req->flags & REQ_F_WORK_INITIALIZED)
+ memcpy(&apoll->work, &req->work, sizeof(req->work));
had_io = req->io != NULL;
get_task_struct(current);
@@ -4450,7 +4388,8 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
if (!had_io)
io_poll_remove_double(req);
spin_unlock_irq(&ctx->completion_lock);
- memcpy(&req->work, &apoll->work, sizeof(req->work));
+ if (req->flags & REQ_F_WORK_INITIALIZED)
+ memcpy(&req->work, &apoll->work, sizeof(req->work));
kfree(apoll);
return false;
}
@@ -4495,7 +4434,9 @@ static bool io_poll_remove_one(struct io_kiocb *req)
* io_req_work_drop_env below when dropping the
* final reference.
*/
- memcpy(&req->work, &apoll->work, sizeof(req->work));
+ if (req->flags & REQ_F_WORK_INITIALIZED)
+ memcpy(&req->work, &apoll->work,
+ sizeof(req->work));
kfree(apoll);
}
}
@@ -4944,6 +4885,8 @@ static int io_req_defer_prep(struct io_kiocb *req,
if (!sqe)
return 0;
+ io_req_init_async(req);
+
if (io_op_defs[req->opcode].file_table) {
ret = io_grab_files(req);
if (unlikely(ret))
@@ -5381,12 +5324,26 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
return 0;
}
+static void io_arm_async_linked_timeout(struct io_kiocb *req)
+{
+ struct io_kiocb *link;
+
+ /* link head's timeout is queued in io_queue_async_work() */
+ if (!(req->flags & REQ_F_QUEUE_TIMEOUT))
+ return;
+
+ link = list_first_entry(&req->link_list, struct io_kiocb, link_list);
+ io_queue_linked_timeout(link);
+}
+
static void io_wq_submit_work(struct io_wq_work **workptr)
{
struct io_wq_work *work = *workptr;
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
int ret = 0;
+ io_arm_async_linked_timeout(req);
+
/* if NO_CANCEL is set, we must still run the work */
if ((work->flags & (IO_WQ_WORK_CANCEL|IO_WQ_WORK_NO_CANCEL)) ==
IO_WQ_WORK_CANCEL) {
@@ -5437,19 +5394,20 @@ static int io_file_get(struct io_submit_state *state, struct io_kiocb *req,
return -EBADF;
fd = array_index_nospec(fd, ctx->nr_user_files);
file = io_file_from_index(ctx, fd);
- if (!file)
- return -EBADF;
- req->fixed_file_refs = ctx->file_data->cur_refs;
- percpu_ref_get(req->fixed_file_refs);
+ if (file) {
+ req->fixed_file_refs = ctx->file_data->cur_refs;
+ percpu_ref_get(req->fixed_file_refs);
+ }
} else {
trace_io_uring_file_get(ctx, fd);
file = __io_file_get(state, fd);
- if (unlikely(!file))
- return -EBADF;
}
- *out_file = file;
- return 0;
+ if (file || io_op_defs[req->opcode].needs_file_no_error) {
+ *out_file = file;
+ return 0;
+ }
+ return -EBADF;
}
static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
@@ -5583,7 +5541,8 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
again:
linked_timeout = io_prep_linked_timeout(req);
- if (req->work.creds && req->work.creds != current_cred()) {
+ if ((req->flags & REQ_F_WORK_INITIALIZED) && req->work.creds &&
+ req->work.creds != current_cred()) {
if (old_creds)
revert_creds(old_creds);
if (old_creds == req->work.creds)
@@ -5606,6 +5565,8 @@ again:
goto exit;
}
punt:
+ io_req_init_async(req);
+
if (io_op_defs[req->opcode].file_table) {
ret = io_grab_files(req);
if (ret)
@@ -5858,7 +5819,6 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
refcount_set(&req->refs, 2);
req->task = NULL;
req->result = 0;
- INIT_IO_WORK(&req->work, io_wq_submit_work);
if (unlikely(req->opcode >= IORING_OP_LAST))
return -EINVAL;
@@ -5880,6 +5840,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
id = READ_ONCE(sqe->personality);
if (id) {
+ io_req_init_async(req);
req->work.creds = idr_find(&ctx->personality_idr, id);
if (unlikely(!req->work.creds))
return -EINVAL;
@@ -6874,6 +6835,7 @@ static int io_init_wq_offload(struct io_ring_ctx *ctx,
data.user = ctx->user;
data.free_work = io_free_work;
+ data.do_work = io_wq_submit_work;
if (!(p->flags & IORING_SETUP_ATTACH_WQ)) {
/* Do QD, or 4 * CPUS, whatever is smallest */
@@ -7155,8 +7117,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
ret = 0;
if (!pages || nr_pages > got_pages) {
- kfree(vmas);
- kfree(pages);
+ kvfree(vmas);
+ kvfree(pages);
pages = kvmalloc_array(nr_pages, sizeof(struct page *),
GFP_KERNEL);
vmas = kvmalloc_array(nr_pages,
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 81b43f5bdf23..1257f26bb887 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -261,9 +261,9 @@ TRACE_EVENT(block_bio_bounce,
*/
TRACE_EVENT(block_bio_complete,
- TP_PROTO(struct request_queue *q, struct bio *bio, int error),
+ TP_PROTO(struct request_queue *q, struct bio *bio),
- TP_ARGS(q, bio, error),
+ TP_ARGS(q, bio),
TP_STRUCT__entry(
__field( dev_t, dev )
@@ -277,7 +277,7 @@ TRACE_EVENT(block_bio_complete,
__entry->dev = bio_dev(bio);
__entry->sector = bio->bi_iter.bi_sector;
__entry->nr_sector = bio_sectors(bio);
- __entry->error = error;
+ __entry->error = blk_status_to_errno(bio->bi_status);
blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
),
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 7cb09c4cf21c..02441ead3c3b 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -928,14 +928,12 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
clocksource_arch_init(cs);
-#ifdef CONFIG_GENERIC_VDSO_CLOCK_MODE
if (cs->vdso_clock_mode < 0 ||
cs->vdso_clock_mode >= VDSO_CLOCKMODE_MAX) {
pr_warn("clocksource %s registered with invalid VDSO mode %d. Disabling VDSO support.\n",
cs->name, cs->vdso_clock_mode);
cs->vdso_clock_mode = VDSO_CLOCKMODE_NONE;
}
-#endif
/* Initialize mult/shift and max_idle_ns */
__clocksource_update_freq_scale(cs, scale, freq);
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index ea47f2084087..5773f0ba7e76 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -885,10 +885,10 @@ static void blk_add_trace_bio_bounce(void *ignore,
}
static void blk_add_trace_bio_complete(void *ignore,
- struct request_queue *q, struct bio *bio,
- int error)
+ struct request_queue *q, struct bio *bio)
{
- blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error);
+ blk_add_trace_bio(q, bio, BLK_TA_COMPLETE,
+ blk_status_to_errno(bio->bi_status));
}
static void blk_add_trace_bio_backmerge(void *ignore,
@@ -995,8 +995,10 @@ static void blk_add_trace_split(void *ignore,
__blk_add_trace(bt, bio->bi_iter.bi_sector,
bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf,
- BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu),
- &rpdu, blk_trace_bio_get_cgid(q, bio));
+ BLK_TA_SPLIT,
+ blk_status_to_errno(bio->bi_status),
+ sizeof(rpdu), &rpdu,
+ blk_trace_bio_get_cgid(q, bio));
}
rcu_read_unlock();
}
@@ -1033,7 +1035,8 @@ static void blk_add_trace_bio_remap(void *ignore,
r.sector_from = cpu_to_be64(from);
__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
- bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status,
+ bio_op(bio), bio->bi_opf, BLK_TA_REMAP,
+ blk_status_to_errno(bio->bi_status),
sizeof(r), &r, blk_trace_bio_get_cgid(q, bio));
rcu_read_unlock();
}
@@ -1253,21 +1256,10 @@ static inline __u16 t_error(const struct trace_entry *ent)
static __u64 get_pdu_int(const struct trace_entry *ent, bool has_cg)
{
- const __u64 *val = pdu_start(ent, has_cg);
+ const __be64 *val = pdu_start(ent, has_cg);
return be64_to_cpu(*val);
}
-static void get_pdu_remap(const struct trace_entry *ent,
- struct blk_io_trace_remap *r, bool has_cg)
-{
- const struct blk_io_trace_remap *__r = pdu_start(ent, has_cg);
- __u64 sector_from = __r->sector_from;
-
- r->device_from = be32_to_cpu(__r->device_from);
- r->device_to = be32_to_cpu(__r->device_to);
- r->sector_from = be64_to_cpu(sector_from);
-}
-
typedef void (blk_log_action_t) (struct trace_iterator *iter, const char *act,
bool has_cg);
@@ -1407,13 +1399,13 @@ static void blk_log_with_error(struct trace_seq *s,
static void blk_log_remap(struct trace_seq *s, const struct trace_entry *ent, bool has_cg)
{
- struct blk_io_trace_remap r = { .device_from = 0, };
+ const struct blk_io_trace_remap *__r = pdu_start(ent, has_cg);
- get_pdu_remap(ent, &r, has_cg);
trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n",
t_sector(ent), t_sec(ent),
- MAJOR(r.device_from), MINOR(r.device_from),
- (unsigned long long)r.sector_from);
+ MAJOR(be32_to_cpu(__r->device_from)),
+ MINOR(be32_to_cpu(__r->device_from)),
+ be64_to_cpu(__r->sector_from));
}
static void blk_log_plug(struct trace_seq *s, const struct trace_entry *ent, bool has_cg)
diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
index a2909af4b924..bcc9a98a0524 100644
--- a/lib/vdso/gettimeofday.c
+++ b/lib/vdso/gettimeofday.c
@@ -38,6 +38,13 @@ static inline bool vdso_clocksource_ok(const struct vdso_data *vd)
}
#endif
+#ifndef vdso_cycles_ok
+static inline bool vdso_cycles_ok(u64 cycles)
+{
+ return true;
+}
+#endif
+
#ifdef CONFIG_TIME_NS
static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
struct __kernel_timespec *ts)
@@ -62,6 +69,8 @@ static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
return -1;
cycles = __arch_get_hw_counter(vd->clock_mode);
+ if (unlikely(!vdso_cycles_ok(cycles)))
+ return -1;
ns = vdso_ts->nsec;
last = vd->cycle_last;
ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
@@ -130,6 +139,8 @@ static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk,
return -1;
cycles = __arch_get_hw_counter(vd->clock_mode);
+ if (unlikely(!vdso_cycles_ok(cycles)))
+ return -1;
ns = vdso_ts->nsec;
last = vd->cycle_last;
ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
@@ -210,7 +221,7 @@ static __always_inline int do_coarse(const struct vdso_data *vd, clockid_t clk,
return 0;
}
-static __maybe_unused int
+static __always_inline int
__cvdso_clock_gettime_common(const struct vdso_data *vd, clockid_t clock,
struct __kernel_timespec *ts)
{