diff options
Diffstat (limited to 'drivers/clocksource')
-rw-r--r-- | drivers/clocksource/hyperv_timer.c | 249 |
1 files changed, 171 insertions, 78 deletions
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index a02b0a224807..977fd05ac35f 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -18,6 +18,9 @@ #include <linux/sched_clock.h> #include <linux/mm.h> #include <linux/cpuhotplug.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/acpi.h> #include <clocksource/hyperv_timer.h> #include <asm/hyperv-tlfs.h> #include <asm/mshyperv.h> @@ -43,14 +46,13 @@ static u64 hv_sched_clock_offset __ro_after_init; */ static bool direct_mode_enabled; -static int stimer0_irq; -static int stimer0_vector; +static int stimer0_irq = -1; static int stimer0_message_sint; +static DEFINE_PER_CPU(long, stimer0_evt); /* - * ISR for when stimer0 is operating in Direct Mode. Direct Mode - * does not use VMbus or any VMbus messages, so process here and not - * in the VMbus driver code. + * Common code for stimer0 interrupts coming via Direct Mode or + * as a VMbus message. */ void hv_stimer0_isr(void) { @@ -61,6 +63,16 @@ void hv_stimer0_isr(void) } EXPORT_SYMBOL_GPL(hv_stimer0_isr); +/* + * stimer0 interrupt handler for architectures that support + * per-cpu interrupts, which also implies Direct Mode. + */ +static irqreturn_t hv_stimer0_percpu_isr(int irq, void *dev_id) +{ + hv_stimer0_isr(); + return IRQ_HANDLED; +} + static int hv_ce_set_next_event(unsigned long delta, struct clock_event_device *evt) { @@ -68,16 +80,16 @@ static int hv_ce_set_next_event(unsigned long delta, current_tick = hv_read_reference_counter(); current_tick += delta; - hv_init_timer(0, current_tick); + hv_set_register(HV_REGISTER_STIMER0_COUNT, current_tick); return 0; } static int hv_ce_shutdown(struct clock_event_device *evt) { - hv_init_timer(0, 0); - hv_init_timer_config(0, 0); - if (direct_mode_enabled) - hv_disable_stimer0_percpu_irq(stimer0_irq); + hv_set_register(HV_REGISTER_STIMER0_COUNT, 0); + hv_set_register(HV_REGISTER_STIMER0_CONFIG, 0); + if (direct_mode_enabled && stimer0_irq >= 0) + disable_percpu_irq(stimer0_irq); return 0; } @@ -95,8 +107,9 @@ static int hv_ce_set_oneshot(struct clock_event_device *evt) * on the specified hardware vector/IRQ. */ timer_cfg.direct_mode = 1; - timer_cfg.apic_vector = stimer0_vector; - hv_enable_stimer0_percpu_irq(stimer0_irq); + timer_cfg.apic_vector = HYPERV_STIMER0_VECTOR; + if (stimer0_irq >= 0) + enable_percpu_irq(stimer0_irq, IRQ_TYPE_NONE); } else { /* * When it expires, the timer will generate a VMbus message, @@ -105,7 +118,7 @@ static int hv_ce_set_oneshot(struct clock_event_device *evt) timer_cfg.direct_mode = 0; timer_cfg.sintx = stimer0_message_sint; } - hv_init_timer_config(0, timer_cfg.as_uint64); + hv_set_register(HV_REGISTER_STIMER0_CONFIG, timer_cfg.as_uint64); return 0; } @@ -169,10 +182,58 @@ int hv_stimer_cleanup(unsigned int cpu) } EXPORT_SYMBOL_GPL(hv_stimer_cleanup); +/* + * These placeholders are overridden by arch specific code on + * architectures that need special setup of the stimer0 IRQ because + * they don't support per-cpu IRQs (such as x86/x64). + */ +void __weak hv_setup_stimer0_handler(void (*handler)(void)) +{ +}; + +void __weak hv_remove_stimer0_handler(void) +{ +}; + +/* Called only on architectures with per-cpu IRQs (i.e., not x86/x64) */ +static int hv_setup_stimer0_irq(void) +{ + int ret; + + ret = acpi_register_gsi(NULL, HYPERV_STIMER0_VECTOR, + ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_HIGH); + if (ret < 0) { + pr_err("Can't register Hyper-V stimer0 GSI. Error %d", ret); + return ret; + } + stimer0_irq = ret; + + ret = request_percpu_irq(stimer0_irq, hv_stimer0_percpu_isr, + "Hyper-V stimer0", &stimer0_evt); + if (ret) { + pr_err("Can't request Hyper-V stimer0 IRQ %d. Error %d", + stimer0_irq, ret); + acpi_unregister_gsi(stimer0_irq); + stimer0_irq = -1; + } + return ret; +} + +static void hv_remove_stimer0_irq(void) +{ + if (stimer0_irq == -1) { + hv_remove_stimer0_handler(); + } else { + free_percpu_irq(stimer0_irq, &stimer0_evt); + acpi_unregister_gsi(stimer0_irq); + stimer0_irq = -1; + } +} + /* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */ -int hv_stimer_alloc(void) +int hv_stimer_alloc(bool have_percpu_irqs) { - int ret = 0; + int ret; /* * Synthetic timers are always available except on old versions of @@ -188,29 +249,37 @@ int hv_stimer_alloc(void) direct_mode_enabled = ms_hyperv.misc_features & HV_STIMER_DIRECT_MODE_AVAILABLE; - if (direct_mode_enabled) { - ret = hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector, - hv_stimer0_isr); + + /* + * If Direct Mode isn't enabled, the remainder of the initialization + * is done later by hv_stimer_legacy_init() + */ + if (!direct_mode_enabled) + return 0; + + if (have_percpu_irqs) { + ret = hv_setup_stimer0_irq(); if (ret) - goto free_percpu; + goto free_clock_event; + } else { + hv_setup_stimer0_handler(hv_stimer0_isr); + } - /* - * Since we are in Direct Mode, stimer initialization - * can be done now with a CPUHP value in the same range - * as other clockevent devices. - */ - ret = cpuhp_setup_state(CPUHP_AP_HYPERV_TIMER_STARTING, - "clockevents/hyperv/stimer:starting", - hv_stimer_init, hv_stimer_cleanup); - if (ret < 0) - goto free_stimer0_irq; + /* + * Since we are in Direct Mode, stimer initialization + * can be done now with a CPUHP value in the same range + * as other clockevent devices. + */ + ret = cpuhp_setup_state(CPUHP_AP_HYPERV_TIMER_STARTING, + "clockevents/hyperv/stimer:starting", + hv_stimer_init, hv_stimer_cleanup); + if (ret < 0) { + hv_remove_stimer0_irq(); + goto free_clock_event; } return ret; -free_stimer0_irq: - hv_remove_stimer0_irq(stimer0_irq); - stimer0_irq = 0; -free_percpu: +free_clock_event: free_percpu(hv_clock_event); hv_clock_event = NULL; return ret; @@ -254,23 +323,6 @@ void hv_stimer_legacy_cleanup(unsigned int cpu) } EXPORT_SYMBOL_GPL(hv_stimer_legacy_cleanup); - -/* hv_stimer_free - Free global resources allocated by hv_stimer_alloc() */ -void hv_stimer_free(void) -{ - if (!hv_clock_event) - return; - - if (direct_mode_enabled) { - cpuhp_remove_state(CPUHP_AP_HYPERV_TIMER_STARTING); - hv_remove_stimer0_irq(stimer0_irq); - stimer0_irq = 0; - } - free_percpu(hv_clock_event); - hv_clock_event = NULL; -} -EXPORT_SYMBOL_GPL(hv_stimer_free); - /* * Do a global cleanup of clockevents for the cases of kexec and * vmbus exit @@ -287,12 +339,17 @@ void hv_stimer_global_cleanup(void) hv_stimer_legacy_cleanup(cpu); } - /* - * If Direct Mode is enabled, the cpuhp teardown callback - * (hv_stimer_cleanup) will be run on all CPUs to stop the - * stimers. - */ - hv_stimer_free(); + if (!hv_clock_event) + return; + + if (direct_mode_enabled) { + cpuhp_remove_state(CPUHP_AP_HYPERV_TIMER_STARTING); + hv_remove_stimer0_irq(); + stimer0_irq = -1; + } + free_percpu(hv_clock_event); + hv_clock_event = NULL; + } EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup); @@ -302,14 +359,6 @@ EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup); * the other that uses the TSC reference page feature as defined in the * TLFS. The MSR version is for compatibility with old versions of * Hyper-V and 32-bit x86. The TSC reference page version is preferred. - * - * The Hyper-V clocksource ratings of 250 are chosen to be below the - * TSC clocksource rating of 300. In configurations where Hyper-V offers - * an InvariantTSC, the TSC is not marked "unstable", so the TSC clocksource - * is available and preferred. With the higher rating, it will be the - * default. On older hardware and Hyper-V versions, the TSC is marked - * "unstable", so no TSC clocksource is created and the selected Hyper-V - * clocksource will be the default. */ u64 (*hv_read_reference_counter)(void); @@ -331,7 +380,7 @@ static u64 notrace read_hv_clock_tsc(void) u64 current_tick = hv_read_tsc_page(hv_get_tsc_page()); if (current_tick == U64_MAX) - hv_get_time_ref_count(current_tick); + current_tick = hv_get_register(HV_REGISTER_TIME_REF_COUNT); return current_tick; } @@ -352,9 +401,9 @@ static void suspend_hv_clock_tsc(struct clocksource *arg) u64 tsc_msr; /* Disable the TSC page */ - hv_get_reference_tsc(tsc_msr); + tsc_msr = hv_get_register(HV_REGISTER_REFERENCE_TSC); tsc_msr &= ~BIT_ULL(0); - hv_set_reference_tsc(tsc_msr); + hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr); } @@ -364,39 +413,44 @@ static void resume_hv_clock_tsc(struct clocksource *arg) u64 tsc_msr; /* Re-enable the TSC page */ - hv_get_reference_tsc(tsc_msr); + tsc_msr = hv_get_register(HV_REGISTER_REFERENCE_TSC); tsc_msr &= GENMASK_ULL(11, 0); tsc_msr |= BIT_ULL(0) | (u64)phys_addr; - hv_set_reference_tsc(tsc_msr); + hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr); } +#ifdef VDSO_CLOCKMODE_HVCLOCK static int hv_cs_enable(struct clocksource *cs) { - hv_enable_vdso_clocksource(); + vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK); return 0; } +#endif static struct clocksource hyperv_cs_tsc = { .name = "hyperv_clocksource_tsc_page", - .rating = 250, + .rating = 500, .read = read_hv_clock_tsc_cs, .mask = CLOCKSOURCE_MASK(64), .flags = CLOCK_SOURCE_IS_CONTINUOUS, .suspend= suspend_hv_clock_tsc, .resume = resume_hv_clock_tsc, +#ifdef VDSO_CLOCKMODE_HVCLOCK .enable = hv_cs_enable, + .vdso_clock_mode = VDSO_CLOCKMODE_HVCLOCK, +#else + .vdso_clock_mode = VDSO_CLOCKMODE_NONE, +#endif }; static u64 notrace read_hv_clock_msr(void) { - u64 current_tick; /* * Read the partition counter to get the current tick count. This count * is set to 0 when the partition is created and is incremented in * 100 nanosecond units. */ - hv_get_time_ref_count(current_tick); - return current_tick; + return hv_get_register(HV_REGISTER_TIME_REF_COUNT); } static u64 notrace read_hv_clock_msr_cs(struct clocksource *arg) @@ -412,12 +466,36 @@ static u64 notrace read_hv_sched_clock_msr(void) static struct clocksource hyperv_cs_msr = { .name = "hyperv_clocksource_msr", - .rating = 250, + .rating = 500, .read = read_hv_clock_msr_cs, .mask = CLOCKSOURCE_MASK(64), .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; +/* + * Reference to pv_ops must be inline so objtool + * detection of noinstr violations can work correctly. + */ +#ifdef CONFIG_GENERIC_SCHED_CLOCK +static __always_inline void hv_setup_sched_clock(void *sched_clock) +{ + /* + * We're on an architecture with generic sched clock (not x86/x64). + * The Hyper-V sched clock read function returns nanoseconds, not + * the normal 100ns units of the Hyper-V synthetic clock. + */ + sched_clock_register(sched_clock, 64, NSEC_PER_SEC); +} +#elif defined CONFIG_PARAVIRT +static __always_inline void hv_setup_sched_clock(void *sched_clock) +{ + /* We're on x86/x64 *and* using PV ops */ + paravirt_set_sched_clock(sched_clock); +} +#else /* !CONFIG_GENERIC_SCHED_CLOCK && !CONFIG_PARAVIRT */ +static __always_inline void hv_setup_sched_clock(void *sched_clock) {} +#endif /* CONFIG_GENERIC_SCHED_CLOCK */ + static bool __init hv_init_tsc_clocksource(void) { u64 tsc_msr; @@ -429,6 +507,22 @@ static bool __init hv_init_tsc_clocksource(void) if (hv_root_partition) return false; + /* + * If Hyper-V offers TSC_INVARIANT, then the virtualized TSC correctly + * handles frequency and offset changes due to live migration, + * pause/resume, and other VM management operations. So lower the + * Hyper-V Reference TSC rating, causing the generic TSC to be used. + * TSC_INVARIANT is not offered on ARM64, so the Hyper-V Reference + * TSC will be preferred over the virtualized ARM64 arch counter. + * While the Hyper-V MSR clocksource won't be used since the + * Reference TSC clocksource is present, change its rating as + * well for consistency. + */ + if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) { + hyperv_cs_tsc.rating = 250; + hyperv_cs_msr.rating = 250; + } + hv_read_reference_counter = read_hv_clock_tsc; phys_addr = virt_to_phys(hv_get_tsc_page()); @@ -439,12 +533,11 @@ static bool __init hv_init_tsc_clocksource(void) * (which already has at least the low 12 bits set to zero since * it is page aligned). Also set the "enable" bit, which is bit 0. */ - hv_get_reference_tsc(tsc_msr); + tsc_msr = hv_get_register(HV_REGISTER_REFERENCE_TSC); tsc_msr &= GENMASK_ULL(11, 0); tsc_msr = tsc_msr | 0x1 | (u64)phys_addr; - hv_set_reference_tsc(tsc_msr); + hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr); - hv_set_clocksource_vdso(hyperv_cs_tsc); clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); hv_sched_clock_offset = hv_read_reference_counter(); |