diff options
34 files changed, 416 insertions, 391 deletions
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 8f1e94f29a16..a338a6deb950 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -89,6 +89,8 @@ #define INTEL_FAM6_COMETLAKE 0xA5 #define INTEL_FAM6_COMETLAKE_L 0xA6 +#define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F + /* "Small Core" Processors (Atom) */ #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index 9a6dc9b4ec99..fb81fea99093 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -271,6 +271,24 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void) return __vdso_data; } +static inline bool arch_vdso_clocksource_ok(const struct vdso_data *vd) +{ + return true; +} +#define vdso_clocksource_ok arch_vdso_clocksource_ok + +/* + * Clocksource read value validation to handle PV and HyperV clocksources + * which can be invalidated asynchronously and indicate invalidation by + * returning U64_MAX, which can be effectively tested by checking for a + * negative value after casting it to s64. + */ +static inline bool arch_vdso_cycles_ok(u64 cycles) +{ + return (s64)cycles >= 0; +} +#define vdso_cycles_ok arch_vdso_cycles_ok + /* * x86 specific delta calculation. * diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 4b1d31be50b4..bf4acb0b5365 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2060,7 +2060,7 @@ void __init init_apic_mappings(void) unsigned int new_apicid; if (apic_validate_deadline_timer()) - pr_debug("TSC deadline timer available\n"); + pr_info("TSC deadline timer available\n"); if (x2apic_mode) { boot_cpu_physical_apicid = read_apic_id(); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b6f887be440c..0b71970d2d3d 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -588,7 +588,9 @@ early_param("nospectre_v1", nospectre_v1_cmdline); static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = SPECTRE_V2_NONE; -static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init = +static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init = + SPECTRE_V2_USER_NONE; +static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init = SPECTRE_V2_USER_NONE; #ifdef CONFIG_RETPOLINE @@ -734,15 +736,6 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) break; } - /* - * At this point, an STIBP mode other than "off" has been set. - * If STIBP support is not being forced, check if STIBP always-on - * is preferred. - */ - if (mode != SPECTRE_V2_USER_STRICT && - boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON)) - mode = SPECTRE_V2_USER_STRICT_PREFERRED; - /* Initialize Indirect Branch Prediction Barrier */ if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); @@ -765,23 +758,36 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n", static_key_enabled(&switch_mm_always_ibpb) ? "always-on" : "conditional"); + + spectre_v2_user_ibpb = mode; } - /* If enhanced IBRS is enabled no STIBP required */ - if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + /* + * If enhanced IBRS is enabled or SMT impossible, STIBP is not + * required. + */ + if (!smt_possible || spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return; /* - * If SMT is not possible or STIBP is not available clear the STIBP - * mode. + * At this point, an STIBP mode other than "off" has been set. + * If STIBP support is not being forced, check if STIBP always-on + * is preferred. */ - if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP)) + if (mode != SPECTRE_V2_USER_STRICT && + boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON)) + mode = SPECTRE_V2_USER_STRICT_PREFERRED; + + /* + * If STIBP is not available, clear the STIBP mode. + */ + if (!boot_cpu_has(X86_FEATURE_STIBP)) mode = SPECTRE_V2_USER_NONE; + + spectre_v2_user_stibp = mode; + set_mode: - spectre_v2_user = mode; - /* Only print the STIBP mode when SMT possible */ - if (smt_possible) - pr_info("%s\n", spectre_v2_user_strings[mode]); + pr_info("%s\n", spectre_v2_user_strings[mode]); } static const char * const spectre_v2_strings[] = { @@ -1014,7 +1020,7 @@ void cpu_bugs_smt_update(void) { mutex_lock(&spec_ctrl_mutex); - switch (spectre_v2_user) { + switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: break; case SPECTRE_V2_USER_STRICT: @@ -1257,14 +1263,19 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) { switch (ctrl) { case PR_SPEC_ENABLE: - if (spectre_v2_user == SPECTRE_V2_USER_NONE) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && + spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return 0; /* * Indirect branch speculation is always disabled in strict - * mode. + * mode. It can neither be enabled if it was force-disabled + * by a previous prctl call. + */ - if (spectre_v2_user == SPECTRE_V2_USER_STRICT || - spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED || + task_spec_ib_force_disable(task)) return -EPERM; task_clear_spec_ib_disable(task); task_update_spec_tif(task); @@ -1275,10 +1286,12 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) * Indirect branch speculation is always allowed when * mitigation is force disabled. */ - if (spectre_v2_user == SPECTRE_V2_USER_NONE) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && + spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return -EPERM; - if (spectre_v2_user == SPECTRE_V2_USER_STRICT || - spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) return 0; task_set_spec_ib_disable(task); if (ctrl == PR_SPEC_FORCE_DISABLE) @@ -1309,7 +1322,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task) { if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP) ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); - if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || + spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) ib_prctl_set(task, PR_SPEC_FORCE_DISABLE); } #endif @@ -1340,22 +1354,24 @@ static int ib_prctl_get(struct task_struct *task) if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return PR_SPEC_NOT_AFFECTED; - switch (spectre_v2_user) { - case SPECTRE_V2_USER_NONE: + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && + spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return PR_SPEC_ENABLE; - case SPECTRE_V2_USER_PRCTL: - case SPECTRE_V2_USER_SECCOMP: + else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) + return PR_SPEC_DISABLE; + else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL || + spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || + spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL || + spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) { if (task_spec_ib_force_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; if (task_spec_ib_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_DISABLE; return PR_SPEC_PRCTL | PR_SPEC_ENABLE; - case SPECTRE_V2_USER_STRICT: - case SPECTRE_V2_USER_STRICT_PREFERRED: - return PR_SPEC_DISABLE; - default: + } else return PR_SPEC_NOT_AFFECTED; - } } int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) @@ -1594,7 +1610,7 @@ static char *stibp_state(void) if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return ""; - switch (spectre_v2_user) { + switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: return ", STIBP: disabled"; case SPECTRE_V2_USER_STRICT: diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 63926c94eb5f..c25a67a34bd3 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -1142,9 +1142,12 @@ void switch_to_sld(unsigned long tifn) static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, 0), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, 1), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, 1), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, 1), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, 1), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, 1), {} }; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 8e3d0347b664..f362ce0d5ac0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -545,28 +545,20 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, lockdep_assert_irqs_disabled(); - /* - * If TIF_SSBD is different, select the proper mitigation - * method. Note that if SSBD mitigation is disabled or permanentely - * enabled this branch can't be taken because nothing can set - * TIF_SSBD. - */ - if (tif_diff & _TIF_SSBD) { - if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { + /* Handle change of TIF_SSBD depending on the mitigation method. */ + if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { + if (tif_diff & _TIF_SSBD) amd_set_ssb_virt_state(tifn); - } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { + } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { + if (tif_diff & _TIF_SSBD) amd_set_core_ssb_state(tifn); - } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || - static_cpu_has(X86_FEATURE_AMD_SSBD)) { - msr |= ssbd_tif_to_spec_ctrl(tifn); - updmsr = true; - } + } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) { + updmsr |= !!(tif_diff & _TIF_SSBD); + msr |= ssbd_tif_to_spec_ctrl(tifn); } - /* - * Only evaluate TIF_SPEC_IB if conditional STIBP is enabled, - * otherwise avoid the MSR write. - */ + /* Only evaluate TIF_SPEC_IB if conditional STIBP is enabled. */ if (IS_ENABLED(CONFIG_SMP) && static_branch_unlikely(&switch_to_cond_stibp)) { updmsr |= !!(tif_diff & _TIF_SPEC_IB); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index e040ba6be27b..0ec7ced727fe 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -197,6 +197,14 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"), }, }, + { /* Handle problems with rebooting on Apple MacBook6,1 */ + .callback = set_pci_reboot, + .ident = "Apple MacBook6,1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "MacBook6,1"), + }, + }, { /* Handle problems with rebooting on Apple MacBookPro5 */ .callback = set_pci_reboot, .ident = "Apple MacBookPro5", diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 371a6b348e44..e42faa792c07 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -25,10 +25,6 @@ #include <asm/hpet.h> #include <asm/time.h> -#ifdef CONFIG_X86_64 -__visible volatile unsigned long jiffies __cacheline_aligned_in_smp = INITIAL_JIFFIES; -#endif - unsigned long profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 1bf7e312361f..7c35556c7827 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -40,13 +40,13 @@ OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT) #ifdef CONFIG_X86_32 OUTPUT_ARCH(i386) ENTRY(phys_startup_32) -jiffies = jiffies_64; #else OUTPUT_ARCH(i386:x86-64) ENTRY(phys_startup_64) -jiffies_64 = jiffies; #endif +jiffies = jiffies_64; + #if defined(CONFIG_X86_64) /* * On 64-bit, align RODATA to 2MB so we retain large page mappings for diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 3579ac0f6ec1..23632a33ed39 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -281,7 +281,6 @@ bool bio_integrity_prep(struct bio *bio) if (ret == 0) { printk(KERN_ERR "could not attach integrity payload\n"); - kfree(buf); status = BLK_STS_RESOURCE; goto err_end_io; } diff --git a/block/bio.c b/block/bio.c index 5235da6434aa..a7366c02c9b5 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1434,8 +1434,7 @@ again: } if (bio->bi_disk && bio_flagged(bio, BIO_TRACE_COMPLETION)) { - trace_block_bio_complete(bio->bi_disk->queue, bio, - blk_status_to_errno(bio->bi_status)); + trace_block_bio_complete(bio->bi_disk->queue, bio); bio_clear_flag(bio, BIO_TRACE_COMPLETION); } diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 96a39d0724a2..44f3d0967cb4 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -191,6 +191,33 @@ found_tag: return tag + tag_offset; } +bool __blk_mq_get_driver_tag(struct request *rq) +{ + struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags; + unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags; + bool shared = blk_mq_tag_busy(rq->mq_hctx); + int tag; + + if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) { + bt = &rq->mq_hctx->tags->breserved_tags; + tag_offset = 0; + } + + if (!hctx_may_queue(rq->mq_hctx, bt)) + return false; + tag = __sbitmap_queue_get(bt); + if (tag == BLK_MQ_NO_TAG) + return false; + + rq->tag = tag + tag_offset; + if (shared) { + rq->rq_flags |= RQF_MQ_INFLIGHT; + atomic_inc(&rq->mq_hctx->nr_active); + } + rq->mq_hctx->tags->rqs[rq->tag] = rq; + return true; +} + void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx, unsigned int tag) { @@ -269,6 +296,7 @@ struct bt_tags_iter_data { #define BT_TAG_ITER_RESERVED (1 << 0) #define BT_TAG_ITER_STARTED (1 << 1) +#define BT_TAG_ITER_STATIC_RQS (1 << 2) static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) { @@ -282,9 +310,12 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) /* * We can hit rq == NULL here, because the tagging functions - * test and set the bit before assining ->rqs[]. + * test and set the bit before assigning ->rqs[]. */ - rq = tags->rqs[bitnr]; + if (iter_data->flags & BT_TAG_ITER_STATIC_RQS) + rq = tags->static_rqs[bitnr]; + else + rq = tags->rqs[bitnr]; if (!rq) return true; if ((iter_data->flags & BT_TAG_ITER_STARTED) && @@ -339,11 +370,13 @@ static void __blk_mq_all_tag_iter(struct blk_mq_tags *tags, * indicates whether or not @rq is a reserved request. Return * true to continue iterating tags, false to stop. * @priv: Will be passed as second argument to @fn. + * + * Caller has to pass the tag map from which requests are allocated. */ void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, void *priv) { - return __blk_mq_all_tag_iter(tags, fn, priv, 0); + return __blk_mq_all_tag_iter(tags, fn, priv, BT_TAG_ITER_STATIC_RQS); } /** diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index d38e48f2a0a4..2e4ef51cdb32 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -51,6 +51,14 @@ enum { BLK_MQ_TAG_MAX = BLK_MQ_NO_TAG - 1, }; +bool __blk_mq_get_driver_tag(struct request *rq); +static inline bool blk_mq_get_driver_tag(struct request *rq) +{ + if (rq->tag != BLK_MQ_NO_TAG) + return true; + return __blk_mq_get_driver_tag(rq); +} + extern bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *); extern void __blk_mq_tag_idle(struct blk_mq_hw_ctx *); diff --git a/block/blk-mq.c b/block/blk-mq.c index 9a36ac1c1fa1..4f57d27bfa73 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1052,35 +1052,6 @@ static inline unsigned int queued_to_index(unsigned int queued) return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1); } -bool blk_mq_get_driver_tag(struct request *rq) -{ - struct blk_mq_alloc_data data = { - .q = rq->q, - .hctx = rq->mq_hctx, - .flags = BLK_MQ_REQ_NOWAIT, - .cmd_flags = rq->cmd_flags, - }; - bool shared; - - if (rq->tag != BLK_MQ_NO_TAG) - return true; - - if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag)) - data.flags |= BLK_MQ_REQ_RESERVED; - - shared = blk_mq_tag_busy(data.hctx); - rq->tag = blk_mq_get_tag(&data); - if (rq->tag >= 0) { - if (shared) { - rq->rq_flags |= RQF_MQ_INFLIGHT; - atomic_inc(&data.hctx->nr_active); - } - data.hctx->tags->rqs[rq->tag] = rq; - } - - return rq->tag != BLK_MQ_NO_TAG; -} - static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, int flags, void *key) { diff --git a/block/blk-mq.h b/block/blk-mq.h index a139b0631817..b3ce0f3a2ad2 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -44,7 +44,6 @@ bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *, bool); void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, bool kick_requeue_list); void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list); -bool blk_mq_get_driver_tag(struct request *rq); struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *start); diff --git a/block/blk.h b/block/blk.h index aa16e524dc35..b5d1f0fc6547 100644 --- a/block/blk.h +++ b/block/blk.h @@ -420,9 +420,11 @@ static inline sector_t part_nr_sects_read(struct hd_struct *part) static inline void part_nr_sects_write(struct hd_struct *part, sector_t size) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) + preempt_disable(); write_seqcount_begin(&part->nr_sects_seq); part->nr_sects = size; write_seqcount_end(&part->nr_sects_seq); + preempt_enable(); #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) preempt_disable(); part->nr_sects = size; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 2e96d8b8758b..c33bbbfd1bd9 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1390,7 +1390,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) goto out_unfreeze; /* Mask out flags that can't be set using LOOP_SET_STATUS. */ - lo->lo_flags &= ~LOOP_SET_STATUS_SETTABLE_FLAGS; + lo->lo_flags &= LOOP_SET_STATUS_SETTABLE_FLAGS; /* For those flags, use the previous values instead */ lo->lo_flags |= prev_lo_flags & ~LOOP_SET_STATUS_SETTABLE_FLAGS; /* For flags that can't be cleared, use previous values too */ diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 0b944ac96d6b..27a33adc41e4 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -1613,7 +1613,7 @@ static noinline_for_stack int pkt_get_last_written(struct pktcdvd_device *pd, disc_information di; track_information ti; __u32 last_track; - int ret = -1; + int ret; ret = pkt_get_disc_info(pd, &di); if (ret) diff --git a/drivers/block/umem.c b/drivers/block/umem.c index d84e8a878df2..1e2aa5ae2796 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -784,7 +784,7 @@ static const struct block_device_operations mm_fops = { static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) { - int ret = -ENODEV; + int ret; struct cardinfo *card = &cards[num_cards]; unsigned char mem_present; unsigned char batt_status; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0585efa47d8f..c2c5bc4fb702 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3669,7 +3669,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ns->disk = disk; if (__nvme_revalidate_disk(disk, id)) - goto out_free_disk; + goto out_put_disk; if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { ret = nvme_nvm_register(ns, disk_name, node); @@ -3696,8 +3696,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) /* prevent double queue cleanup */ ns->disk->queue = NULL; put_disk(ns->disk); - out_free_disk: - del_gendisk(ns->disk); out_unlink_ns: mutex_lock(&ctrl->subsys->lock); list_del_rcu(&ns->siblings); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index cb0007592c12..e999a8c4b7e8 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2634,10 +2634,11 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE); __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); - if (!(op->flags & FCOP_FLAGS_AEN)) + if (!(op->flags & FCOP_FLAGS_AEN)) { nvme_fc_unmap_data(ctrl, op->rq, op); + nvme_cleanup_cmd(op->rq); + } - nvme_cleanup_cmd(op->rq); nvme_fc_ctrl_put(ctrl); if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE && diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index fa5c75501049..c0f4226d3299 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -599,8 +599,7 @@ static inline void nvme_trace_bio_complete(struct request *req, struct nvme_ns *ns = req->q->queuedata; if (req->cmd_flags & REQ_NVME_MPATH) - trace_block_bio_complete(ns->head->disk->queue, - req->bio, status); + trace_block_bio_complete(ns->head->disk->queue, req->bio); } extern struct device_attribute dev_attr_ana_grpid; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d690d5593a80..e2bacd369a88 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2950,9 +2950,15 @@ static int nvme_suspend(struct device *dev) * the PCI bus layer to put it into D3 in order to take the PCIe link * down, so as to allow the platform to achieve its minimum low-power * state (which may not be possible if the link is up). + * + * If a host memory buffer is enabled, shut down the device as the NVMe + * specification allows the device to access the host memory buffer in + * host DRAM from all power states, but hosts will fail access to DRAM + * during S3. */ if (pm_suspend_via_firmware() || !ctrl->npss || !pcie_aspm_enabled(pdev) || + ndev->nr_host_mem_descs || (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND)) return nvme_disable_prepare_reset(ndev, true); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 1843110ec34f..3345ec7efaff 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -131,8 +131,8 @@ struct nvme_tcp_ctrl { static LIST_HEAD(nvme_tcp_ctrl_list); static DEFINE_MUTEX(nvme_tcp_ctrl_mutex); static struct workqueue_struct *nvme_tcp_wq; -static struct blk_mq_ops nvme_tcp_mq_ops; -static struct blk_mq_ops nvme_tcp_admin_mq_ops; +static const struct blk_mq_ops nvme_tcp_mq_ops; +static const struct blk_mq_ops nvme_tcp_admin_mq_ops; static int nvme_tcp_try_send(struct nvme_tcp_queue *queue); static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl) @@ -2301,7 +2301,7 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx) return queue->nr_cqe; } -static struct blk_mq_ops nvme_tcp_mq_ops = { +static const struct blk_mq_ops nvme_tcp_mq_ops = { .queue_rq = nvme_tcp_queue_rq, .complete = nvme_complete_rq, .init_request = nvme_tcp_init_request, @@ -2312,7 +2312,7 @@ static struct blk_mq_ops nvme_tcp_mq_ops = { .poll = nvme_tcp_poll, }; -static struct blk_mq_ops nvme_tcp_admin_mq_ops = { +static const struct blk_mq_ops nvme_tcp_admin_mq_ops = { .queue_rq = nvme_tcp_queue_rq, .complete = nvme_complete_rq, .init_request = nvme_tcp_init_request, diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 6392bcd30bd7..6e2f623e472e 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -129,7 +129,22 @@ static u32 nvmet_async_event_result(struct nvmet_async_event *aen) return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16); } -static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status) +static void nvmet_async_events_failall(struct nvmet_ctrl *ctrl) +{ + u16 status = NVME_SC_INTERNAL | NVME_SC_DNR; + struct nvmet_req *req; + + mutex_lock(&ctrl->lock); + while (ctrl->nr_async_event_cmds) { + req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; + mutex_unlock(&ctrl->lock); + nvmet_req_complete(req, status); + mutex_lock(&ctrl->lock); + } + mutex_unlock(&ctrl->lock); +} + +static void nvmet_async_events_process(struct nvmet_ctrl *ctrl) { struct nvmet_async_event *aen; struct nvmet_req *req; @@ -139,15 +154,14 @@ static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status) aen = list_first_entry(&ctrl->async_events, struct nvmet_async_event, entry); req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; - if (status == 0) - nvmet_set_result(req, nvmet_async_event_result(aen)); + nvmet_set_result(req, nvmet_async_event_result(aen)); list_del(&aen->entry); kfree(aen); mutex_unlock(&ctrl->lock); trace_nvmet_async_event(ctrl, req->cqe->result.u32); - nvmet_req_complete(req, status); + nvmet_req_complete(req, 0); mutex_lock(&ctrl->lock); } mutex_unlock(&ctrl->lock); @@ -170,7 +184,7 @@ static void nvmet_async_event_work(struct work_struct *work) struct nvmet_ctrl *ctrl = container_of(work, struct nvmet_ctrl, async_event_work); - nvmet_async_events_process(ctrl, 0); + nvmet_async_events_process(ctrl); } void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, @@ -779,7 +793,6 @@ static void nvmet_confirm_sq(struct percpu_ref *ref) void nvmet_sq_destroy(struct nvmet_sq *sq) { - u16 status = NVME_SC_INTERNAL | NVME_SC_DNR; struct nvmet_ctrl *ctrl = sq->ctrl; /* @@ -787,7 +800,7 @@ void nvmet_sq_destroy(struct nvmet_sq *sq) * queue doesn't have outstanding requests on it. */ if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) - nvmet_async_events_process(ctrl, status); + nvmet_async_events_failall(ctrl); percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); wait_for_completion(&sq->confirm_done); wait_for_completion(&sq->free_done); diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 1669177cd26c..de9217cfd22d 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -153,7 +153,7 @@ static LIST_HEAD(nvmet_tcp_queue_list); static DEFINE_MUTEX(nvmet_tcp_queue_mutex); static struct workqueue_struct *nvmet_tcp_wq; -static struct nvmet_fabrics_ops nvmet_tcp_ops; +static const struct nvmet_fabrics_ops nvmet_tcp_ops; static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c); static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd); @@ -1713,7 +1713,7 @@ static void nvmet_tcp_disc_port_addr(struct nvmet_req *req, } } -static struct nvmet_fabrics_ops nvmet_tcp_ops = { +static const struct nvmet_fabrics_ops nvmet_tcp_ops = { .owner = THIS_MODULE, .type = NVMF_TRTYPE_TCP, .msdbd = 1, diff --git a/fs/afs/write.c b/fs/afs/write.c index 97bccde3298b..768497f82aee 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -447,6 +447,7 @@ static int afs_store_data(struct address_space *mapping, op->store.last = last; op->store.first_offset = offset; op->store.last_to = to; + op->mtime = vnode->vfs_inode.i_mtime; op->ops = &afs_store_data_operation; try_next_key: diff --git a/fs/io-wq.c b/fs/io-wq.c index a5e90ac39e4d..0b65a912b036 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -111,6 +111,7 @@ struct io_wq { unsigned long state; free_work_fn *free_work; + io_wq_work_fn *do_work; struct task_struct *manager; struct user_struct *user; @@ -523,7 +524,7 @@ get_next: hash = io_get_work_hash(work); linked = old_work = work; - linked->func(&linked); + wq->do_work(&linked); linked = (old_work == linked) ? NULL : linked; work = next_hashed; @@ -780,7 +781,7 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe) struct io_wq_work *old_work = work; work->flags |= IO_WQ_WORK_CANCEL; - work->func(&work); + wq->do_work(&work); work = (work == old_work) ? NULL : work; wq->free_work(old_work); } while (work); @@ -1018,7 +1019,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) int ret = -ENOMEM, node; struct io_wq *wq; - if (WARN_ON_ONCE(!data->free_work)) + if (WARN_ON_ONCE(!data->free_work || !data->do_work)) return ERR_PTR(-EINVAL); wq = kzalloc(sizeof(*wq), GFP_KERNEL); @@ -1032,6 +1033,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) } wq->free_work = data->free_work; + wq->do_work = data->do_work; /* caller must already hold a reference to this */ wq->user = data->user; @@ -1088,7 +1090,7 @@ err: bool io_wq_get(struct io_wq *wq, struct io_wq_data *data) { - if (data->free_work != wq->free_work) + if (data->free_work != wq->free_work || data->do_work != wq->do_work) return false; return refcount_inc_not_zero(&wq->use_refs); diff --git a/fs/io-wq.h b/fs/io-wq.h index 5ba12de7572f..8e138fa88b9f 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -85,7 +85,6 @@ static inline void wq_list_del(struct io_wq_work_list *list, struct io_wq_work { struct io_wq_work_node list; - void (*func)(struct io_wq_work **); struct files_struct *files; struct mm_struct *mm; const struct cred *creds; @@ -94,11 +93,6 @@ struct io_wq_work { pid_t task_pid; }; -#define INIT_IO_WORK(work, _func) \ - do { \ - *(work) = (struct io_wq_work){ .func = _func }; \ - } while (0) \ - static inline struct io_wq_work *wq_next_work(struct io_wq_work *work) { if (!work->list.next) @@ -108,10 +102,12 @@ static inline struct io_wq_work *wq_next_work(struct io_wq_work *work) } typedef void (free_work_fn)(struct io_wq_work *); +typedef void (io_wq_work_fn)(struct io_wq_work **); struct io_wq_data { struct user_struct *user; + io_wq_work_fn *do_work; free_work_fn *free_work; }; diff --git a/fs/io_uring.c b/fs/io_uring.c index 26f7bc941d01..155f3d830ddb 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -528,7 +528,6 @@ enum { REQ_F_INFLIGHT_BIT, REQ_F_CUR_POS_BIT, REQ_F_NOWAIT_BIT, - REQ_F_IOPOLL_COMPLETED_BIT, REQ_F_LINK_TIMEOUT_BIT, REQ_F_TIMEOUT_BIT, REQ_F_ISREG_BIT, @@ -540,6 +539,8 @@ enum { REQ_F_POLLED_BIT, REQ_F_BUFFER_SELECTED_BIT, REQ_F_NO_FILE_TABLE_BIT, + REQ_F_QUEUE_TIMEOUT_BIT, + REQ_F_WORK_INITIALIZED_BIT, /* not a real bit, just to check we're not overflowing the space */ __REQ_F_LAST_BIT, @@ -571,8 +572,6 @@ enum { REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT), /* must not punt to workers */ REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT), - /* polled IO has completed */ - REQ_F_IOPOLL_COMPLETED = BIT(REQ_F_IOPOLL_COMPLETED_BIT), /* has linked timeout */ REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT), /* timeout request */ @@ -595,6 +594,10 @@ enum { REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT), /* doesn't need file table for this request */ REQ_F_NO_FILE_TABLE = BIT(REQ_F_NO_FILE_TABLE_BIT), + /* needs to queue linked timeout */ + REQ_F_QUEUE_TIMEOUT = BIT(REQ_F_QUEUE_TIMEOUT_BIT), + /* io_wq_work is initialized */ + REQ_F_WORK_INITIALIZED = BIT(REQ_F_WORK_INITIALIZED_BIT), }; struct async_poll { @@ -633,6 +636,8 @@ struct io_kiocb { struct io_async_ctx *io; int cflags; u8 opcode; + /* polled IO has completed */ + u8 iopoll_completed; u16 buf_index; @@ -697,6 +702,8 @@ struct io_op_def { unsigned needs_mm : 1; /* needs req->file assigned */ unsigned needs_file : 1; + /* don't fail if file grab fails */ + unsigned needs_file_no_error : 1; /* hash wq insertion if file is a regular file */ unsigned hash_reg_file : 1; /* unbound wq insertion if file is a non-regular file */ @@ -803,6 +810,8 @@ static const struct io_op_def io_op_defs[] = { .needs_fs = 1, }, [IORING_OP_CLOSE] = { + .needs_file = 1, + .needs_file_no_error = 1, .file_table = 1, }, [IORING_OP_FILES_UPDATE] = { @@ -903,6 +912,19 @@ EXPORT_SYMBOL(io_uring_get_socket); static void io_file_put_work(struct work_struct *work); +/* + * Note: must call io_req_init_async() for the first time you + * touch any members of io_wq_work. + */ +static inline void io_req_init_async(struct io_kiocb *req) +{ + if (req->flags & REQ_F_WORK_INITIALIZED) + return; + + memset(&req->work, 0, sizeof(req->work)); + req->flags |= REQ_F_WORK_INITIALIZED; +} + static inline bool io_async_submit(struct io_ring_ctx *ctx) { return ctx->flags & IORING_SETUP_SQPOLL; @@ -1029,6 +1051,9 @@ static inline void io_req_work_grab_env(struct io_kiocb *req, static inline void io_req_work_drop_env(struct io_kiocb *req) { + if (!(req->flags & REQ_F_WORK_INITIALIZED)) + return; + if (req->work.mm) { mmdrop(req->work.mm); req->work.mm = NULL; @@ -1575,16 +1600,6 @@ static void io_free_req(struct io_kiocb *req) io_queue_async_work(nxt); } -static void io_link_work_cb(struct io_wq_work **workptr) -{ - struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); - struct io_kiocb *link; - - link = list_first_entry(&req->link_list, struct io_kiocb, link_list); - io_queue_linked_timeout(link); - io_wq_submit_work(workptr); -} - static void io_wq_assign_next(struct io_wq_work **workptr, struct io_kiocb *nxt) { struct io_kiocb *link; @@ -1596,7 +1611,7 @@ static void io_wq_assign_next(struct io_wq_work **workptr, struct io_kiocb *nxt) *workptr = &nxt->work; link = io_prep_linked_timeout(nxt); if (link) - nxt->work.func = io_link_work_cb; + nxt->flags |= REQ_F_QUEUE_TIMEOUT; } /* @@ -1781,7 +1796,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, * If we find a request that requires polling, break out * and complete those lists first, if we have entries there. */ - if (req->flags & REQ_F_IOPOLL_COMPLETED) { + if (READ_ONCE(req->iopoll_completed)) { list_move_tail(&req->list, &done); continue; } @@ -1962,7 +1977,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) req_set_fail_links(req); req->result = res; if (res != -EAGAIN) - req->flags |= REQ_F_IOPOLL_COMPLETED; + WRITE_ONCE(req->iopoll_completed, 1); } /* @@ -1995,7 +2010,7 @@ static void io_iopoll_req_issued(struct io_kiocb *req) * For fast devices, IO may have already completed. If it has, add * it to the front so we find it first. */ - if (req->flags & REQ_F_IOPOLL_COMPLETED) + if (READ_ONCE(req->iopoll_completed)) list_add(&req->list, &ctx->poll_list); else list_add_tail(&req->list, &ctx->poll_list); @@ -2063,6 +2078,10 @@ static bool io_file_supports_async(struct file *file, int rw) if (S_ISREG(mode) && file->f_op != &io_uring_fops) return true; + /* any ->read/write should understand O_NONBLOCK */ + if (file->f_flags & O_NONBLOCK) + return true; + if (!(file->f_mode & FMODE_NOWAIT)) return false; @@ -2105,8 +2124,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, kiocb->ki_ioprio = get_current_ioprio(); /* don't allow async punt if RWF_NOWAIT was requested */ - if ((kiocb->ki_flags & IOCB_NOWAIT) || - (req->file->f_flags & O_NONBLOCK)) + if (kiocb->ki_flags & IOCB_NOWAIT) req->flags |= REQ_F_NOWAIT; if (force_nonblock) @@ -2120,6 +2138,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, kiocb->ki_flags |= IOCB_HIPRI; kiocb->ki_complete = io_complete_rw_iopoll; req->result = 0; + req->iopoll_completed = 0; } else { if (kiocb->ki_flags & IOCB_HIPRI) return -EINVAL; @@ -2358,8 +2377,14 @@ static ssize_t __io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov, static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov, bool needs_lock) { - if (req->flags & REQ_F_BUFFER_SELECTED) + if (req->flags & REQ_F_BUFFER_SELECTED) { + struct io_buffer *kbuf; + + kbuf = (struct io_buffer *) (unsigned long) req->rw.addr; + iov[0].iov_base = u64_to_user_ptr(kbuf->addr); + iov[0].iov_len = kbuf->len; return 0; + } if (!req->rw.len) return 0; else if (req->rw.len > 1) @@ -2741,7 +2766,8 @@ copy_iov: if (ret) goto out_free; /* any defer here is final, must blocking retry */ - if (!file_can_poll(req->file)) + if (!(req->flags & REQ_F_NOWAIT) && + !file_can_poll(req->file)) req->flags |= REQ_F_MUST_PUNT; return -EAGAIN; } @@ -2761,6 +2787,8 @@ static int __io_splice_prep(struct io_kiocb *req, if (req->flags & REQ_F_NEED_CLEANUP) return 0; + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; sp->file_in = NULL; sp->len = READ_ONCE(sqe->len); @@ -2775,8 +2803,14 @@ static int __io_splice_prep(struct io_kiocb *req, return ret; req->flags |= REQ_F_NEED_CLEANUP; - if (!S_ISREG(file_inode(sp->file_in)->i_mode)) + if (!S_ISREG(file_inode(sp->file_in)->i_mode)) { + /* + * Splice operation will be punted aync, and here need to + * modify io_wq_work.flags, so initialize io_wq_work firstly. + */ + io_req_init_async(req); req->work.flags |= IO_WQ_WORK_UNBOUND; + } return 0; } @@ -2885,23 +2919,15 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe) return 0; } -static bool io_req_cancelled(struct io_kiocb *req) -{ - if (req->work.flags & IO_WQ_WORK_CANCEL) { - req_set_fail_links(req); - io_cqring_add_event(req, -ECANCELED); - io_put_req(req); - return true; - } - - return false; -} - -static void __io_fsync(struct io_kiocb *req) +static int io_fsync(struct io_kiocb *req, bool force_nonblock) { loff_t end = req->sync.off + req->sync.len; int ret; + /* fsync always requires a blocking context */ + if (force_nonblock) + return -EAGAIN; + ret = vfs_fsync_range(req->file, req->sync.off, end > 0 ? end : LLONG_MAX, req->sync.flags & IORING_FSYNC_DATASYNC); @@ -2909,58 +2935,16 @@ static void __io_fsync(struct io_kiocb *req) req_set_fail_links(req); io_cqring_add_event(req, ret); io_put_req(req); -} - -static void io_fsync_finish(struct io_wq_work **workptr) -{ - struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); - - if (io_req_cancelled(req)) - return; - __io_fsync(req); - io_steal_work(req, workptr); -} - -static int io_fsync(struct io_kiocb *req, bool force_nonblock) -{ - /* fsync always requires a blocking context */ - if (force_nonblock) { - req->work.func = io_fsync_finish; - return -EAGAIN; - } - __io_fsync(req); return 0; } -static void __io_fallocate(struct io_kiocb *req) -{ - int ret; - - current->signal->rlim[RLIMIT_FSIZE].rlim_cur = req->fsize; - ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off, - req->sync.len); - current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; - if (ret < 0) - req_set_fail_links(req); - io_cqring_add_event(req, ret); - io_put_req(req); -} - -static void io_fallocate_finish(struct io_wq_work **workptr) -{ - struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); - - if (io_req_cancelled(req)) - return; - __io_fallocate(req); - io_steal_work(req, workptr); -} - static int io_fallocate_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { if (sqe->ioprio || sqe->buf_index || sqe->rw_flags) return -EINVAL; + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; req->sync.off = READ_ONCE(sqe->off); req->sync.len = READ_ONCE(sqe->addr); @@ -2971,66 +2955,74 @@ static int io_fallocate_prep(struct io_kiocb *req, static int io_fallocate(struct io_kiocb *req, bool force_nonblock) { + int ret; + /* fallocate always requiring blocking context */ - if (force_nonblock) { - req->work.func = io_fallocate_finish; + if (force_nonblock) return -EAGAIN; - } - __io_fallocate(req); + current->signal->rlim[RLIMIT_FSIZE].rlim_cur = req->fsize; + ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off, + req->sync.len); + current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; + if (ret < 0) + req_set_fail_links(req); + io_cqring_add_event(req, ret); + io_put_req(req); return 0; } -static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { const char __user *fname; int ret; - if (sqe->ioprio || sqe->buf_index) + if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) return -EINVAL; - if (req->flags & REQ_F_FIXED_FILE) + if (unlikely(sqe->ioprio || sqe->buf_index)) + return -EINVAL; + if (unlikely(req->flags & REQ_F_FIXED_FILE)) return -EBADF; - if (req->flags & REQ_F_NEED_CLEANUP) - return 0; - req->open.dfd = READ_ONCE(sqe->fd); - req->open.how.mode = READ_ONCE(sqe->len); - fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); - req->open.how.flags = READ_ONCE(sqe->open_flags); - if (force_o_largefile()) + /* open.how should be already initialised */ + if (!(req->open.how.flags & O_PATH) && force_o_largefile()) req->open.how.flags |= O_LARGEFILE; + req->open.dfd = READ_ONCE(sqe->fd); + fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); req->open.filename = getname(fname); if (IS_ERR(req->open.filename)) { ret = PTR_ERR(req->open.filename); req->open.filename = NULL; return ret; } - req->open.nofile = rlimit(RLIMIT_NOFILE); req->flags |= REQ_F_NEED_CLEANUP; return 0; } +static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + u64 flags, mode; + + if (req->flags & REQ_F_NEED_CLEANUP) + return 0; + mode = READ_ONCE(sqe->len); + flags = READ_ONCE(sqe->open_flags); + req->open.how = build_open_how(flags, mode); + return __io_openat_prep(req, sqe); +} + static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct open_how __user *how; - const char __user *fname; size_t len; int ret; - if (sqe->ioprio || sqe->buf_index) - return -EINVAL; - if (req->flags & REQ_F_FIXED_FILE) - return -EBADF; if (req->flags & REQ_F_NEED_CLEANUP) return 0; - - req->open.dfd = READ_ONCE(sqe->fd); - fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); how = u64_to_user_ptr(READ_ONCE(sqe->addr2)); len = READ_ONCE(sqe->len); - if (len < OPEN_HOW_SIZE_VER0) return -EINVAL; @@ -3039,19 +3031,7 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (ret) return ret; - if (!(req->open.how.flags & O_PATH) && force_o_largefile()) - req->open.how.flags |= O_LARGEFILE; - - req->open.filename = getname(fname); - if (IS_ERR(req->open.filename)) { - ret = PTR_ERR(req->open.filename); - req->open.filename = NULL; - return ret; - } - - req->open.nofile = rlimit(RLIMIT_NOFILE); - req->flags |= REQ_F_NEED_CLEANUP; - return 0; + return __io_openat_prep(req, sqe); } static int io_openat2(struct io_kiocb *req, bool force_nonblock) @@ -3091,7 +3071,6 @@ err: static int io_openat(struct io_kiocb *req, bool force_nonblock) { - req->open.how = build_open_how(req->open.how.flags, req->open.how.mode); return io_openat2(req, force_nonblock); } @@ -3180,7 +3159,7 @@ static int io_provide_buffers_prep(struct io_kiocb *req, p->addr = READ_ONCE(sqe->addr); p->len = READ_ONCE(sqe->len); - if (!access_ok(u64_to_user_ptr(p->addr), p->len)) + if (!access_ok(u64_to_user_ptr(p->addr), (p->len * p->nbufs))) return -EFAULT; p->bgid = READ_ONCE(sqe->buf_group); @@ -3258,6 +3237,8 @@ static int io_epoll_ctl_prep(struct io_kiocb *req, #if defined(CONFIG_EPOLL) if (sqe->ioprio || sqe->buf_index) return -EINVAL; + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; req->epoll.epfd = READ_ONCE(sqe->fd); req->epoll.op = READ_ONCE(sqe->len); @@ -3302,6 +3283,8 @@ static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) #if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU) if (sqe->ioprio || sqe->buf_index || sqe->off) return -EINVAL; + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; req->madvise.addr = READ_ONCE(sqe->addr); req->madvise.len = READ_ONCE(sqe->len); @@ -3336,6 +3319,8 @@ static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { if (sqe->ioprio || sqe->buf_index || sqe->addr) return -EINVAL; + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; req->fadvise.offset = READ_ONCE(sqe->off); req->fadvise.len = READ_ONCE(sqe->len); @@ -3369,6 +3354,8 @@ static int io_fadvise(struct io_kiocb *req, bool force_nonblock) static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; if (sqe->ioprio || sqe->buf_index) return -EINVAL; if (req->flags & REQ_F_FIXED_FILE) @@ -3409,10 +3396,14 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { /* * If we queue this for async, it must not be cancellable. That would - * leave the 'file' in an undeterminate state. + * leave the 'file' in an undeterminate state, and here need to modify + * io_wq_work.flags, so initialize io_wq_work firstly. */ + io_req_init_async(req); req->work.flags |= IO_WQ_WORK_NO_CANCEL; + if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) + return -EINVAL; if (sqe->ioprio || sqe->off || sqe->addr || sqe->len || sqe->rw_flags || sqe->buf_index) return -EINVAL; @@ -3420,53 +3411,41 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return -EBADF; req->close.fd = READ_ONCE(sqe->fd); - return 0; -} - -/* only called when __close_fd_get_file() is done */ -static void __io_close_finish(struct io_kiocb *req) -{ - int ret; - - ret = filp_close(req->close.put_file, req->work.files); - if (ret < 0) - req_set_fail_links(req); - io_cqring_add_event(req, ret); - fput(req->close.put_file); - io_put_req(req); -} - -static void io_close_finish(struct io_wq_work **workptr) -{ - struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); + if ((req->file && req->file->f_op == &io_uring_fops) || + req->close.fd == req->ctx->ring_fd) + return -EBADF; - /* not cancellable, don't do io_req_cancelled() */ - __io_close_finish(req); - io_steal_work(req, workptr); + req->close.put_file = NULL; + return 0; } static int io_close(struct io_kiocb *req, bool force_nonblock) { + struct io_close *close = &req->close; int ret; - req->close.put_file = NULL; - ret = __close_fd_get_file(req->close.fd, &req->close.put_file); - if (ret < 0) - return (ret == -ENOENT) ? -EBADF : ret; + /* might be already done during nonblock submission */ + if (!close->put_file) { + ret = __close_fd_get_file(close->fd, &close->put_file); + if (ret < 0) + return (ret == -ENOENT) ? -EBADF : ret; + } /* if the file has a flush method, be safe and punt to async */ - if (req->close.put_file->f_op->flush && force_nonblock) { + if (close->put_file->f_op->flush && force_nonblock) { /* avoid grabbing files - we don't need the files */ req->flags |= REQ_F_NO_FILE_TABLE | REQ_F_MUST_PUNT; - req->work.func = io_close_finish; return -EAGAIN; } - /* - * No ->flush(), safely close from here and just punt the - * fput() to async context. - */ - __io_close_finish(req); + /* No ->flush() or already async, safely close from here */ + ret = filp_close(close->put_file, req->work.files); + if (ret < 0) + req_set_fail_links(req); + io_cqring_add_event(req, ret); + fput(close->put_file); + close->put_file = NULL; + io_put_req(req); return 0; } @@ -3488,38 +3467,20 @@ static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe) return 0; } -static void __io_sync_file_range(struct io_kiocb *req) +static int io_sync_file_range(struct io_kiocb *req, bool force_nonblock) { int ret; + /* sync_file_range always requires a blocking context */ + if (force_nonblock) + return -EAGAIN; + ret = sync_file_range(req->file, req->sync.off, req->sync.len, req->sync.flags); if (ret < 0) req_set_fail_links(req); io_cqring_add_event(req, ret); io_put_req(req); -} - - -static void io_sync_file_range_finish(struct io_wq_work **workptr) -{ - struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); - - if (io_req_cancelled(req)) - return; - __io_sync_file_range(req); - io_steal_work(req, workptr); -} - -static int io_sync_file_range(struct io_kiocb *req, bool force_nonblock) -{ - /* sync_file_range always requires a blocking context */ - if (force_nonblock) { - req->work.func = io_sync_file_range_finish; - return -EAGAIN; - } - - __io_sync_file_range(req); return 0; } @@ -3545,6 +3506,9 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) struct io_async_ctx *io = req->io; int ret; + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; + sr->msg_flags = READ_ONCE(sqe->msg_flags); sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->len = READ_ONCE(sqe->len); @@ -3574,9 +3538,6 @@ static int io_sendmsg(struct io_kiocb *req, bool force_nonblock) struct socket *sock; int ret; - if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) - return -EINVAL; - sock = sock_from_file(req->file, &ret); if (sock) { struct io_async_ctx io; @@ -3630,9 +3591,6 @@ static int io_send(struct io_kiocb *req, bool force_nonblock) struct socket *sock; int ret; - if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) - return -EINVAL; - sock = sock_from_file(req->file, &ret); if (sock) { struct io_sr_msg *sr = &req->sr_msg; @@ -3785,6 +3743,9 @@ static int io_recvmsg_prep(struct io_kiocb *req, struct io_async_ctx *io = req->io; int ret; + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; + sr->msg_flags = READ_ONCE(sqe->msg_flags); sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->len = READ_ONCE(sqe->len); @@ -3813,9 +3774,6 @@ static int io_recvmsg(struct io_kiocb *req, bool force_nonblock) struct socket *sock; int ret, cflags = 0; - if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) - return -EINVAL; - sock = sock_from_file(req->file, &ret); if (sock) { struct io_buffer *kbuf; @@ -3877,9 +3835,6 @@ static int io_recv(struct io_kiocb *req, bool force_nonblock) struct socket *sock; int ret, cflags = 0; - if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) - return -EINVAL; - sock = sock_from_file(req->file, &ret); if (sock) { struct io_sr_msg *sr = &req->sr_msg; @@ -3947,49 +3902,30 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return 0; } -static int __io_accept(struct io_kiocb *req, bool force_nonblock) +static int io_accept(struct io_kiocb *req, bool force_nonblock) { struct io_accept *accept = &req->accept; - unsigned file_flags; + unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0; int ret; - file_flags = force_nonblock ? O_NONBLOCK : 0; + if (req->file->f_flags & O_NONBLOCK) + req->flags |= REQ_F_NOWAIT; + ret = __sys_accept4_file(req->file, file_flags, accept->addr, accept->addr_len, accept->flags, accept->nofile); if (ret == -EAGAIN && force_nonblock) return -EAGAIN; - if (ret == -ERESTARTSYS) - ret = -EINTR; - if (ret < 0) + if (ret < 0) { + if (ret == -ERESTARTSYS) + ret = -EINTR; req_set_fail_links(req); + } io_cqring_add_event(req, ret); io_put_req(req); return 0; } -static void io_accept_finish(struct io_wq_work **workptr) -{ - struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); - - if (io_req_cancelled(req)) - return; - __io_accept(req, false); - io_steal_work(req, workptr); -} - -static int io_accept(struct io_kiocb *req, bool force_nonblock) -{ - int ret; - - ret = __io_accept(req, force_nonblock); - if (ret == -EAGAIN && force_nonblock) { - req->work.func = io_accept_finish; - return -EAGAIN; - } - return 0; -} - static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_connect *conn = &req->connect; @@ -4328,7 +4264,8 @@ static void io_async_task_func(struct callback_head *cb) spin_unlock_irq(&ctx->completion_lock); /* restore ->work in case we need to retry again */ - memcpy(&req->work, &apoll->work, sizeof(req->work)); + if (req->flags & REQ_F_WORK_INITIALIZED) + memcpy(&req->work, &apoll->work, sizeof(req->work)); kfree(apoll); if (!canceled) { @@ -4425,7 +4362,8 @@ static bool io_arm_poll_handler(struct io_kiocb *req) return false; req->flags |= REQ_F_POLLED; - memcpy(&apoll->work, &req->work, sizeof(req->work)); + if (req->flags & REQ_F_WORK_INITIALIZED) + memcpy(&apoll->work, &req->work, sizeof(req->work)); had_io = req->io != NULL; get_task_struct(current); @@ -4450,7 +4388,8 @@ static bool io_arm_poll_handler(struct io_kiocb *req) if (!had_io) io_poll_remove_double(req); spin_unlock_irq(&ctx->completion_lock); - memcpy(&req->work, &apoll->work, sizeof(req->work)); + if (req->flags & REQ_F_WORK_INITIALIZED) + memcpy(&req->work, &apoll->work, sizeof(req->work)); kfree(apoll); return false; } @@ -4495,7 +4434,9 @@ static bool io_poll_remove_one(struct io_kiocb *req) * io_req_work_drop_env below when dropping the * final reference. */ - memcpy(&req->work, &apoll->work, sizeof(req->work)); + if (req->flags & REQ_F_WORK_INITIALIZED) + memcpy(&req->work, &apoll->work, + sizeof(req->work)); kfree(apoll); } } @@ -4944,6 +4885,8 @@ static int io_req_defer_prep(struct io_kiocb *req, if (!sqe) return 0; + io_req_init_async(req); + if (io_op_defs[req->opcode].file_table) { ret = io_grab_files(req); if (unlikely(ret)) @@ -5381,12 +5324,26 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, return 0; } +static void io_arm_async_linked_timeout(struct io_kiocb *req) +{ + struct io_kiocb *link; + + /* link head's timeout is queued in io_queue_async_work() */ + if (!(req->flags & REQ_F_QUEUE_TIMEOUT)) + return; + + link = list_first_entry(&req->link_list, struct io_kiocb, link_list); + io_queue_linked_timeout(link); +} + static void io_wq_submit_work(struct io_wq_work **workptr) { struct io_wq_work *work = *workptr; struct io_kiocb *req = container_of(work, struct io_kiocb, work); int ret = 0; + io_arm_async_linked_timeout(req); + /* if NO_CANCEL is set, we must still run the work */ if ((work->flags & (IO_WQ_WORK_CANCEL|IO_WQ_WORK_NO_CANCEL)) == IO_WQ_WORK_CANCEL) { @@ -5437,19 +5394,20 @@ static int io_file_get(struct io_submit_state *state, struct io_kiocb *req, return -EBADF; fd = array_index_nospec(fd, ctx->nr_user_files); file = io_file_from_index(ctx, fd); - if (!file) - return -EBADF; - req->fixed_file_refs = ctx->file_data->cur_refs; - percpu_ref_get(req->fixed_file_refs); + if (file) { + req->fixed_file_refs = ctx->file_data->cur_refs; + percpu_ref_get(req->fixed_file_refs); + } } else { trace_io_uring_file_get(ctx, fd); file = __io_file_get(state, fd); - if (unlikely(!file)) - return -EBADF; } - *out_file = file; - return 0; + if (file || io_op_defs[req->opcode].needs_file_no_error) { + *out_file = file; + return 0; + } + return -EBADF; } static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req, @@ -5583,7 +5541,8 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe) again: linked_timeout = io_prep_linked_timeout(req); - if (req->work.creds && req->work.creds != current_cred()) { + if ((req->flags & REQ_F_WORK_INITIALIZED) && req->work.creds && + req->work.creds != current_cred()) { if (old_creds) revert_creds(old_creds); if (old_creds == req->work.creds) @@ -5606,6 +5565,8 @@ again: goto exit; } punt: + io_req_init_async(req); + if (io_op_defs[req->opcode].file_table) { ret = io_grab_files(req); if (ret) @@ -5858,7 +5819,6 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, refcount_set(&req->refs, 2); req->task = NULL; req->result = 0; - INIT_IO_WORK(&req->work, io_wq_submit_work); if (unlikely(req->opcode >= IORING_OP_LAST)) return -EINVAL; @@ -5880,6 +5840,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, id = READ_ONCE(sqe->personality); if (id) { + io_req_init_async(req); req->work.creds = idr_find(&ctx->personality_idr, id); if (unlikely(!req->work.creds)) return -EINVAL; @@ -6874,6 +6835,7 @@ static int io_init_wq_offload(struct io_ring_ctx *ctx, data.user = ctx->user; data.free_work = io_free_work; + data.do_work = io_wq_submit_work; if (!(p->flags & IORING_SETUP_ATTACH_WQ)) { /* Do QD, or 4 * CPUS, whatever is smallest */ @@ -7155,8 +7117,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg, ret = 0; if (!pages || nr_pages > got_pages) { - kfree(vmas); - kfree(pages); + kvfree(vmas); + kvfree(pages); pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL); vmas = kvmalloc_array(nr_pages, diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 81b43f5bdf23..1257f26bb887 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -261,9 +261,9 @@ TRACE_EVENT(block_bio_bounce, */ TRACE_EVENT(block_bio_complete, - TP_PROTO(struct request_queue *q, struct bio *bio, int error), + TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio, error), + TP_ARGS(q, bio), TP_STRUCT__entry( __field( dev_t, dev ) @@ -277,7 +277,7 @@ TRACE_EVENT(block_bio_complete, __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); - __entry->error = error; + __entry->error = blk_status_to_errno(bio->bi_status); blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); ), diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 7cb09c4cf21c..02441ead3c3b 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -928,14 +928,12 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) clocksource_arch_init(cs); -#ifdef CONFIG_GENERIC_VDSO_CLOCK_MODE if (cs->vdso_clock_mode < 0 || cs->vdso_clock_mode >= VDSO_CLOCKMODE_MAX) { pr_warn("clocksource %s registered with invalid VDSO mode %d. Disabling VDSO support.\n", cs->name, cs->vdso_clock_mode); cs->vdso_clock_mode = VDSO_CLOCKMODE_NONE; } -#endif /* Initialize mult/shift and max_idle_ns */ __clocksource_update_freq_scale(cs, scale, freq); diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index ea47f2084087..5773f0ba7e76 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -885,10 +885,10 @@ static void blk_add_trace_bio_bounce(void *ignore, } static void blk_add_trace_bio_complete(void *ignore, - struct request_queue *q, struct bio *bio, - int error) + struct request_queue *q, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error); + blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, + blk_status_to_errno(bio->bi_status)); } static void blk_add_trace_bio_backmerge(void *ignore, @@ -995,8 +995,10 @@ static void blk_add_trace_split(void *ignore, __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf, - BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu), - &rpdu, blk_trace_bio_get_cgid(q, bio)); + BLK_TA_SPLIT, + blk_status_to_errno(bio->bi_status), + sizeof(rpdu), &rpdu, + blk_trace_bio_get_cgid(q, bio)); } rcu_read_unlock(); } @@ -1033,7 +1035,8 @@ static void blk_add_trace_bio_remap(void *ignore, r.sector_from = cpu_to_be64(from); __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, - bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status, + bio_op(bio), bio->bi_opf, BLK_TA_REMAP, + blk_status_to_errno(bio->bi_status), sizeof(r), &r, blk_trace_bio_get_cgid(q, bio)); rcu_read_unlock(); } @@ -1253,21 +1256,10 @@ static inline __u16 t_error(const struct trace_entry *ent) static __u64 get_pdu_int(const struct trace_entry *ent, bool has_cg) { - const __u64 *val = pdu_start(ent, has_cg); + const __be64 *val = pdu_start(ent, has_cg); return be64_to_cpu(*val); } -static void get_pdu_remap(const struct trace_entry *ent, - struct blk_io_trace_remap *r, bool has_cg) -{ - const struct blk_io_trace_remap *__r = pdu_start(ent, has_cg); - __u64 sector_from = __r->sector_from; - - r->device_from = be32_to_cpu(__r->device_from); - r->device_to = be32_to_cpu(__r->device_to); - r->sector_from = be64_to_cpu(sector_from); -} - typedef void (blk_log_action_t) (struct trace_iterator *iter, const char *act, bool has_cg); @@ -1407,13 +1399,13 @@ static void blk_log_with_error(struct trace_seq *s, static void blk_log_remap(struct trace_seq *s, const struct trace_entry *ent, bool has_cg) { - struct blk_io_trace_remap r = { .device_from = 0, }; + const struct blk_io_trace_remap *__r = pdu_start(ent, has_cg); - get_pdu_remap(ent, &r, has_cg); trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n", t_sector(ent), t_sec(ent), - MAJOR(r.device_from), MINOR(r.device_from), - (unsigned long long)r.sector_from); + MAJOR(be32_to_cpu(__r->device_from)), + MINOR(be32_to_cpu(__r->device_from)), + be64_to_cpu(__r->sector_from)); } static void blk_log_plug(struct trace_seq *s, const struct trace_entry *ent, bool has_cg) diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index a2909af4b924..bcc9a98a0524 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -38,6 +38,13 @@ static inline bool vdso_clocksource_ok(const struct vdso_data *vd) } #endif +#ifndef vdso_cycles_ok +static inline bool vdso_cycles_ok(u64 cycles) +{ + return true; +} +#endif + #ifdef CONFIG_TIME_NS static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, struct __kernel_timespec *ts) @@ -62,6 +69,8 @@ static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, return -1; cycles = __arch_get_hw_counter(vd->clock_mode); + if (unlikely(!vdso_cycles_ok(cycles))) + return -1; ns = vdso_ts->nsec; last = vd->cycle_last; ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult); @@ -130,6 +139,8 @@ static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk, return -1; cycles = __arch_get_hw_counter(vd->clock_mode); + if (unlikely(!vdso_cycles_ok(cycles))) + return -1; ns = vdso_ts->nsec; last = vd->cycle_last; ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult); @@ -210,7 +221,7 @@ static __always_inline int do_coarse(const struct vdso_data *vd, clockid_t clk, return 0; } -static __maybe_unused int +static __always_inline int __cvdso_clock_gettime_common(const struct vdso_data *vd, clockid_t clock, struct __kernel_timespec *ts) { |