diff options
author | Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> | 2019-03-05 01:12:19 +0530 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2019-05-03 02:54:57 +1000 |
commit | de269129a48a2d590ba1d20c719e19d86e3ddb3f (patch) | |
tree | f24b4a3afde2d90ce614a347129acf6324315a1a | |
parent | 0acb5f64560a052fd66ab37b212a72964847160f (diff) |
powerpc/hmi: Fix kernel hang when TB is in error state.
On TOD/TB errors timebase register stops/freezes until HMI error recovery
gets TOD/TB back into running state. On successful recovery, TB starts
running again and udelay() that relies on TB value continues to function
properly. But in case when HMI fails to recover from TOD/TB errors, the
TB register stay freezed. With TB not running the __delay() function
keeps looping and never return. If __delay() is called while in panic
path then system hangs and never reboots after panic.
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-rw-r--r-- | arch/powerpc/include/asm/opal-api.h | 10 | ||||
-rw-r--r-- | arch/powerpc/include/asm/opal.h | 2 | ||||
-rw-r--r-- | arch/powerpc/include/asm/time.h | 2 | ||||
-rw-r--r-- | arch/powerpc/kernel/time.c | 9 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal-call.c | 1 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal.c | 21 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/setup.c | 5 |
7 files changed, 49 insertions, 1 deletions
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index e1d118ac61dc..234fde15b37c 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -209,6 +209,7 @@ #define OPAL_SENSOR_GROUP_ENABLE 163 #define OPAL_PCI_GET_PBCQ_TUNNEL_BAR 164 #define OPAL_PCI_SET_PBCQ_TUNNEL_BAR 165 +#define OPAL_HANDLE_HMI2 166 #define OPAL_NX_COPROC_INIT 167 #define OPAL_XIVE_GET_VP_STATE 170 #define OPAL_LAST 170 @@ -635,6 +636,15 @@ struct OpalHMIEvent { } u; }; +/* OPAL_HANDLE_HMI2 out_flags */ +enum { + OPAL_HMI_FLAGS_TB_RESYNC = (1ull << 0), /* Timebase has been resynced */ + OPAL_HMI_FLAGS_DEC_LOST = (1ull << 1), /* DEC lost, needs to be reprogrammed */ + OPAL_HMI_FLAGS_HDEC_LOST = (1ull << 2), /* HDEC lost, needs to be reprogrammed */ + OPAL_HMI_FLAGS_TOD_TB_FAIL = (1ull << 3), /* TOD/TB recovery failed. */ + OPAL_HMI_FLAGS_NEW_EVENT = (1ull << 63), /* An event has been created */ +}; + enum { OPAL_P7IOC_DIAG_TYPE_NONE = 0, OPAL_P7IOC_DIAG_TYPE_RGC = 1, diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 4e978d4dea5c..4cc37e708bc7 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -203,6 +203,7 @@ int64_t opal_set_param(uint64_t token, uint32_t param_id, uint64_t buffer, int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data); int64_t opal_sensor_read_u64(u32 sensor_hndl, int token, __be64 *sensor_data); int64_t opal_handle_hmi(void); +int64_t opal_handle_hmi2(__be64 *out_flags); int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end); int64_t opal_unregister_dump_region(uint32_t id); int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val); @@ -359,6 +360,7 @@ int opal_power_control_init(void); extern int opal_machine_check(struct pt_regs *regs); extern bool opal_mce_check_early_recovery(struct pt_regs *regs); extern int opal_hmi_exception_early(struct pt_regs *regs); +extern int opal_hmi_exception_early2(struct pt_regs *regs); extern int opal_handle_hmi_exception(struct pt_regs *regs); extern void opal_shutdown(void); diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 54bf7e68a7e1..57e968413d1e 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -36,6 +36,8 @@ extern unsigned long ppc_proc_freq; extern unsigned long ppc_tb_freq; #define DEFAULT_TB_FREQ 125000000UL +extern bool tb_invalid; + struct div_result { u64 result_high; u64 result_low; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 6ef32472ee1d..325d60633dfa 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -150,6 +150,8 @@ EXPORT_SYMBOL_GPL(ppc_proc_freq); unsigned long ppc_tb_freq; EXPORT_SYMBOL_GPL(ppc_tb_freq); +bool tb_invalid; + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* * Factor for converting from cputime_t (timebase ticks) to @@ -459,6 +461,13 @@ void __delay(unsigned long loops) diff += 1000000000; spin_cpu_relax(); } while (diff < loops); + } else if (tb_invalid) { + /* + * TB is in error state and isn't ticking anymore. + * HMI handler was unable to recover from TB error. + * Return immediately, so that kernel won't get stuck here. + */ + spin_cpu_relax(); } else { start = get_tbl(); while (get_tbl() - start < loops) diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c index 7cba0d5da3ff..36c8fa3647a2 100644 --- a/arch/powerpc/platforms/powernv/opal-call.c +++ b/arch/powerpc/platforms/powernv/opal-call.c @@ -220,6 +220,7 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ); OPAL_CALL(opal_get_param, OPAL_GET_PARAM); OPAL_CALL(opal_set_param, OPAL_SET_PARAM); OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); +OPAL_CALL(opal_handle_hmi2, OPAL_HANDLE_HMI2); OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE); OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG); OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 737c51d63480..f2b063b027f0 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -614,6 +614,27 @@ int opal_hmi_exception_early(struct pt_regs *regs) return 0; } +int opal_hmi_exception_early2(struct pt_regs *regs) +{ + s64 rc; + __be64 out_flags; + + /* + * call opal hmi handler. + * Check 64-bit flag mask to find out if an event was generated, + * and whether TB is still valid or not etc. + */ + rc = opal_handle_hmi2(&out_flags); + if (rc != OPAL_SUCCESS) + return 0; + + if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_NEW_EVENT) + local_paca->hmi_event_available = 1; + if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_TOD_TB_FAIL) + tb_invalid = true; + return 1; +} + /* HMI exception handler called in virtual mode during check_irq_replay. */ int opal_handle_hmi_exception(struct pt_regs *regs) { diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 14befee4b3f1..3cf40f689aac 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -401,7 +401,10 @@ static void __init pnv_setup_machdep_opal(void) /* ppc_md.system_reset_exception gets filled in by pnv_smp_init() */ ppc_md.machine_check_exception = opal_machine_check; ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery; - ppc_md.hmi_exception_early = opal_hmi_exception_early; + if (opal_check_token(OPAL_HANDLE_HMI2)) + ppc_md.hmi_exception_early = opal_hmi_exception_early2; + else + ppc_md.hmi_exception_early = opal_hmi_exception_early; ppc_md.handle_hmi_exception = opal_handle_hmi_exception; } |