summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJiri Pirko <jiri@nvidia.com>2020-09-15 11:40:58 +0300
committerDavid S. Miller <davem@davemloft.net>2020-09-15 15:57:16 -0700
commit7d83ee11100812138a9cc642d5341d7770cf6584 (patch)
tree266fba7445fda8c530ae3b74236d1fec39d3a556
parente2ce94dc1d89e0f76ddd202cea72e0f505083d0a (diff)
mlxsw: core: Introduce fw_fatal health reporter
Introduce devlink health reporter to report FW fatal events. Implement the event listener using MFDE trap and enable the events to be propagated using MFGD register configuration. Signed-off-by: Jiri Pirko <jiri@nvidia.com> Signed-off-by: Ido Schimmel <idosch@nvidia.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.c240
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/trap.h2
5 files changed, 256 insertions, 0 deletions
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 7b5939e068d1..1bb21fe295b9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -84,6 +84,9 @@ struct mlxsw_core {
struct mlxsw_core_port *ports;
unsigned int max_ports;
bool fw_flash_in_progress;
+ struct {
+ struct devlink_health_reporter *fw_fatal;
+ } health;
unsigned long driver_priv[];
/* driver_priv has to be always the last item */
};
@@ -1612,6 +1615,236 @@ static void mlxsw_core_params_unregister(struct mlxsw_core *mlxsw_core)
mlxsw_core->driver->params_unregister(mlxsw_core);
}
+struct mlxsw_core_health_event {
+ struct mlxsw_core *mlxsw_core;
+ char mfde_pl[MLXSW_REG_MFDE_LEN];
+ struct work_struct work;
+};
+
+static void mlxsw_core_health_event_work(struct work_struct *work)
+{
+ struct mlxsw_core_health_event *event;
+ struct mlxsw_core *mlxsw_core;
+
+ event = container_of(work, struct mlxsw_core_health_event, work);
+ mlxsw_core = event->mlxsw_core;
+ devlink_health_report(mlxsw_core->health.fw_fatal, "FW fatal event occurred",
+ event->mfde_pl);
+ kfree(event);
+}
+
+static void mlxsw_core_health_listener_func(const struct mlxsw_reg_info *reg,
+ char *mfde_pl, void *priv)
+{
+ struct mlxsw_core_health_event *event;
+ struct mlxsw_core *mlxsw_core = priv;
+
+ event = kmalloc(sizeof(*event), GFP_ATOMIC);
+ if (!event)
+ return;
+ event->mlxsw_core = mlxsw_core;
+ memcpy(event->mfde_pl, mfde_pl, sizeof(event->mfde_pl));
+ INIT_WORK(&event->work, mlxsw_core_health_event_work);
+ mlxsw_core_schedule_work(&event->work);
+}
+
+static const struct mlxsw_listener mlxsw_core_health_listener =
+ MLXSW_EVENTL(mlxsw_core_health_listener_func, MFDE, MFDE);
+
+static int mlxsw_core_health_fw_fatal_dump(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg, void *priv_ctx,
+ struct netlink_ext_ack *extack)
+{
+ char *mfde_pl = priv_ctx;
+ char *val_str;
+ u8 event_id;
+ u32 val;
+ int err;
+
+ if (!priv_ctx)
+ /* User-triggered dumps are not possible */
+ return -EOPNOTSUPP;
+
+ val = mlxsw_reg_mfde_irisc_id_get(mfde_pl);
+ err = devlink_fmsg_u8_pair_put(fmsg, "irisc_id", val);
+ if (err)
+ return err;
+ err = devlink_fmsg_arr_pair_nest_start(fmsg, "event");
+ if (err)
+ return err;
+
+ event_id = mlxsw_reg_mfde_event_id_get(mfde_pl);
+ err = devlink_fmsg_u8_pair_put(fmsg, "id", event_id);
+ if (err)
+ return err;
+ switch (event_id) {
+ case MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO:
+ val_str = "CR space timeout";
+ break;
+ case MLXSW_REG_MFDE_EVENT_ID_KVD_IM_STOP:
+ val_str = "KVD insertion machine stopped";
+ break;
+ default:
+ val_str = NULL;
+ }
+ if (val_str) {
+ err = devlink_fmsg_string_pair_put(fmsg, "desc", val_str);
+ if (err)
+ return err;
+ }
+ err = devlink_fmsg_arr_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ val = mlxsw_reg_mfde_method_get(mfde_pl);
+ switch (val) {
+ case MLXSW_REG_MFDE_METHOD_QUERY:
+ val_str = "query";
+ break;
+ case MLXSW_REG_MFDE_METHOD_WRITE:
+ val_str = "write";
+ break;
+ default:
+ val_str = NULL;
+ }
+ if (val_str) {
+ err = devlink_fmsg_string_pair_put(fmsg, "method", val_str);
+ if (err)
+ return err;
+ }
+
+ val = mlxsw_reg_mfde_long_process_get(mfde_pl);
+ err = devlink_fmsg_bool_pair_put(fmsg, "long_process", val);
+ if (err)
+ return err;
+
+ val = mlxsw_reg_mfde_command_type_get(mfde_pl);
+ switch (val) {
+ case MLXSW_REG_MFDE_COMMAND_TYPE_MAD:
+ val_str = "mad";
+ break;
+ case MLXSW_REG_MFDE_COMMAND_TYPE_EMAD:
+ val_str = "emad";
+ break;
+ case MLXSW_REG_MFDE_COMMAND_TYPE_CMDIF:
+ val_str = "cmdif";
+ break;
+ default:
+ val_str = NULL;
+ }
+ if (val_str) {
+ err = devlink_fmsg_string_pair_put(fmsg, "command_type", val_str);
+ if (err)
+ return err;
+ }
+
+ val = mlxsw_reg_mfde_reg_attr_id_get(mfde_pl);
+ err = devlink_fmsg_u32_pair_put(fmsg, "reg_attr_id", val);
+ if (err)
+ return err;
+
+ if (event_id == MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO) {
+ val = mlxsw_reg_mfde_log_address_get(mfde_pl);
+ err = devlink_fmsg_u32_pair_put(fmsg, "log_address", val);
+ if (err)
+ return err;
+ val = mlxsw_reg_mfde_log_id_get(mfde_pl);
+ err = devlink_fmsg_u8_pair_put(fmsg, "log_irisc_id", val);
+ if (err)
+ return err;
+ } else if (event_id == MLXSW_REG_MFDE_EVENT_ID_KVD_IM_STOP) {
+ val = mlxsw_reg_mfde_pipes_mask_get(mfde_pl);
+ err = devlink_fmsg_u32_pair_put(fmsg, "pipes_mask", val);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int
+mlxsw_core_health_fw_fatal_test(struct devlink_health_reporter *reporter,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_core *mlxsw_core = devlink_health_reporter_priv(reporter);
+ char mfgd_pl[MLXSW_REG_MFGD_LEN];
+ int err;
+
+ /* Read the register first to make sure no other bits are changed. */
+ err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mfgd), mfgd_pl);
+ if (err)
+ return err;
+ mlxsw_reg_mfgd_trigger_test_set(mfgd_pl, true);
+ return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mfgd), mfgd_pl);
+}
+
+static const struct devlink_health_reporter_ops
+mlxsw_core_health_fw_fatal_ops = {
+ .name = "fw_fatal",
+ .dump = mlxsw_core_health_fw_fatal_dump,
+ .test = mlxsw_core_health_fw_fatal_test,
+};
+
+static int mlxsw_core_health_fw_fatal_config(struct mlxsw_core *mlxsw_core,
+ bool enable)
+{
+ char mfgd_pl[MLXSW_REG_MFGD_LEN];
+ int err;
+
+ /* Read the register first to make sure no other bits are changed. */
+ err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mfgd), mfgd_pl);
+ if (err)
+ return err;
+ mlxsw_reg_mfgd_fatal_event_mode_set(mfgd_pl, enable);
+ return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mfgd), mfgd_pl);
+}
+
+static int mlxsw_core_health_init(struct mlxsw_core *mlxsw_core)
+{
+ struct devlink *devlink = priv_to_devlink(mlxsw_core);
+ struct devlink_health_reporter *fw_fatal;
+ int err;
+
+ if (!mlxsw_core->driver->fw_fatal_enabled)
+ return 0;
+
+ fw_fatal = devlink_health_reporter_create(devlink, &mlxsw_core_health_fw_fatal_ops,
+ 0, mlxsw_core);
+ if (IS_ERR(fw_fatal)) {
+ dev_err(mlxsw_core->bus_info->dev, "Failed to create fw fatal reporter");
+ return PTR_ERR(fw_fatal);
+ }
+ mlxsw_core->health.fw_fatal = fw_fatal;
+
+ err = mlxsw_core_trap_register(mlxsw_core, &mlxsw_core_health_listener, mlxsw_core);
+ if (err)
+ goto err_trap_register;
+
+ err = mlxsw_core_health_fw_fatal_config(mlxsw_core, true);
+ if (err)
+ goto err_fw_fatal_config;
+
+ return 0;
+
+err_fw_fatal_config:
+ mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_core_health_listener, mlxsw_core);
+err_trap_register:
+ devlink_health_reporter_destroy(mlxsw_core->health.fw_fatal);
+ return err;
+}
+
+static void mlxsw_core_health_fini(struct mlxsw_core *mlxsw_core)
+{
+ if (!mlxsw_core->driver->fw_fatal_enabled)
+ return;
+
+ mlxsw_core_health_fw_fatal_config(mlxsw_core, false);
+ mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_core_health_listener, mlxsw_core);
+ /* Make sure there is no more event work scheduled */
+ mlxsw_core_flush_owq();
+ devlink_health_reporter_destroy(mlxsw_core->health.fw_fatal);
+}
+
static int
__mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
const struct mlxsw_bus *mlxsw_bus,
@@ -1695,6 +1928,10 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
if (err)
goto err_fw_rev_validate;
+ err = mlxsw_core_health_init(mlxsw_core);
+ if (err)
+ goto err_health_init;
+
if (mlxsw_driver->init) {
err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info, extack);
if (err)
@@ -1723,6 +1960,8 @@ err_hwmon_init:
if (mlxsw_core->driver->fini)
mlxsw_core->driver->fini(mlxsw_core);
err_driver_init:
+ mlxsw_core_health_fini(mlxsw_core);
+err_health_init:
err_fw_rev_validate:
if (!reload)
mlxsw_core_params_unregister(mlxsw_core);
@@ -1795,6 +2034,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
mlxsw_hwmon_fini(mlxsw_core->hwmon);
if (mlxsw_core->driver->fini)
mlxsw_core->driver->fini(mlxsw_core);
+ mlxsw_core_health_fini(mlxsw_core);
if (!reload)
mlxsw_core_params_unregister(mlxsw_core);
if (!reload)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 6ec769906637..2ca085a44774 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -370,6 +370,7 @@ struct mlxsw_driver {
u8 txhdr_len;
const struct mlxsw_config_profile *profile;
bool res_query_enabled;
+ bool fw_fatal_enabled;
};
int mlxsw_core_kvd_sizes_get(struct mlxsw_core *mlxsw_core,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 421f02eac20f..6e3d55006089 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -5579,6 +5579,7 @@ MLXSW_ITEM32(reg, htgt, type, 0x00, 8, 4);
enum mlxsw_reg_htgt_trap_group {
MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
+ MLXSW_REG_HTGT_TRAP_GROUP_MFDE,
MLXSW_REG_HTGT_TRAP_GROUP_SP_STP,
MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP,
MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 18d2eacfae83..351d385158e6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2529,11 +2529,20 @@ static void mlxsw_sp_lag_fini(struct mlxsw_sp *mlxsw_sp)
static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core)
{
char htgt_pl[MLXSW_REG_HTGT_LEN];
+ int err;
mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
MLXSW_REG_HTGT_INVALID_POLICER,
MLXSW_REG_HTGT_DEFAULT_PRIORITY,
MLXSW_REG_HTGT_DEFAULT_TC);
+ err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_MFDE,
+ MLXSW_REG_HTGT_INVALID_POLICER,
+ MLXSW_REG_HTGT_DEFAULT_PRIORITY,
+ MLXSW_REG_HTGT_DEFAULT_TC);
return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
}
@@ -3287,6 +3296,7 @@ static struct mlxsw_driver mlxsw_sp1_driver = {
.txhdr_len = MLXSW_TXHDR_LEN,
.profile = &mlxsw_sp1_config_profile,
.res_query_enabled = true,
+ .fw_fatal_enabled = true,
};
static struct mlxsw_driver mlxsw_sp2_driver = {
@@ -3326,6 +3336,7 @@ static struct mlxsw_driver mlxsw_sp2_driver = {
.txhdr_len = MLXSW_TXHDR_LEN,
.profile = &mlxsw_sp2_config_profile,
.res_query_enabled = true,
+ .fw_fatal_enabled = true,
};
static struct mlxsw_driver mlxsw_sp3_driver = {
@@ -3365,6 +3376,7 @@ static struct mlxsw_driver mlxsw_sp3_driver = {
.txhdr_len = MLXSW_TXHDR_LEN,
.profile = &mlxsw_sp2_config_profile,
.res_query_enabled = true,
+ .fw_fatal_enabled = true,
};
bool mlxsw_sp_port_dev_check(const struct net_device *dev)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index 33909887d0ac..fe0b8af287a7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -120,6 +120,8 @@ enum {
};
enum mlxsw_event_trap_id {
+ /* Fatal Event generated by FW */
+ MLXSW_TRAP_ID_MFDE = 0x3,
/* Port Up/Down event generated by hardware */
MLXSW_TRAP_ID_PUDE = 0x8,
/* PTP Ingress FIFO has a new entry */