diff options
author | David S. Miller <davem@davemloft.net> | 2016-07-05 09:06:31 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-07-05 09:06:31 -0700 |
commit | 684a95c064fc63e48c9936fe3d9dfd5ed1ea3b95 (patch) | |
tree | ba05255580a6015aedcaeaff05ba70d8413cdc5d | |
parent | 9046a745e29aa4c7e4f1dda3a50199dbe62fc9e8 (diff) | |
parent | 0b2361d9d946558c90f0ae8b21725022bea9f2cb (diff) |
Merge branch 'mlxsw-ipv4-unicast-routing'
Jiri Pirko says:
====================
mlxsw: Implement IPV4 unicast routing
This patchset enables IPv4 unicast routing in the Mellanox Spectrum ASIC
switch driver. This builds upon the work that was done by a couple of
previous patchsets.
Patches 1,2,6 add a couple of dependencies outside the driver. Namely, the
ability to propagate ndo_neigh_construct()/destroy() through stacked devices and
a notification whenever DELAY_PROBE_TIME changes. When propagated down, the
ndos allow drivers to add and remove neighbour entries from their private
neighbour table. The DELAY_PROBE_TIME notification gives drivers the ability to
correctly configure their polling interval for neighbour activity, so that
active neighbour won't be marked as STALE.
Patches 3-5,7-8 add the neighbour offloading infrastructure, where patch 7 uses
the DELAY_PROBE_TIME notification in order to correctly configure the device's
polling interval. Patch 8 finally programs neighbours to the device's table
based on NEIGH_UPDATE notifications, so that directly connected routes can
be used.
Patches 9-16 build upon the previous patches and extend the router with
remote routes (nexthop) support.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/bonding/bond_main.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/Makefile | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/cmd.h | 43 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/core.h | 6 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/pci.c | 14 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/reg.h | 490 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 6 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 24 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c | 91 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 1006 | ||||
-rw-r--r-- | drivers/net/ethernet/rocker/rocker_main.c | 3 | ||||
-rw-r--r-- | drivers/net/team/team.c | 2 | ||||
-rw-r--r-- | include/linux/netdevice.h | 10 | ||||
-rw-r--r-- | include/net/netevent.h | 1 | ||||
-rw-r--r-- | net/8021q/vlan_dev.c | 2 | ||||
-rw-r--r-- | net/atm/clip.c | 2 | ||||
-rw-r--r-- | net/bridge/br_device.c | 2 | ||||
-rw-r--r-- | net/core/dev.c | 44 | ||||
-rw-r--r-- | net/core/neighbour.c | 6 | ||||
-rw-r--r-- | net/ieee802154/6lowpan/core.c | 2 |
20 files changed, 1747 insertions, 12 deletions
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 90157e20357e..480d73ac7d1b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4137,6 +4137,8 @@ static const struct net_device_ops bond_netdev_ops = { .ndo_add_slave = bond_enslave, .ndo_del_slave = bond_release, .ndo_fix_features = bond_fix_features, + .ndo_neigh_construct = netdev_default_l2upper_neigh_construct, + .ndo_neigh_destroy = netdev_default_l2upper_neigh_destroy, .ndo_bridge_setlink = switchdev_port_bridge_setlink, .ndo_bridge_getlink = switchdev_port_bridge_getlink, .ndo_bridge_dellink = switchdev_port_bridge_dellink, diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index ea05f8a10e8c..d20ae1838a64 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -7,5 +7,6 @@ obj-$(CONFIG_MLXSW_SWITCHX2) += mlxsw_switchx2.o mlxsw_switchx2-objs := switchx2.o obj-$(CONFIG_MLXSW_SPECTRUM) += mlxsw_spectrum.o mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ - spectrum_switchdev.o spectrum_router.o + spectrum_switchdev.o spectrum_router.o \ + spectrum_kvdl.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h index cd63b8263688..f9cd6e3f7709 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h +++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h @@ -607,6 +607,24 @@ MLXSW_ITEM32(cmd_mbox, config_profile, */ MLXSW_ITEM32(cmd_mbox, config_profile, set_ar_sec, 0x0C, 15, 1); +/* cmd_mbox_config_set_kvd_linear_size + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_linear_size, 0x0C, 24, 1); + +/* cmd_mbox_config_set_kvd_hash_single_size + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_single_size, 0x0C, 25, 1); + +/* cmd_mbox_config_set_kvd_hash_double_size + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_double_size, 0x0C, 26, 1); + /* cmd_mbox_config_profile_max_vepa_channels * Maximum number of VEPA channels per port (0 through 16) * 0 - multi-channel VEPA is disabled @@ -733,6 +751,31 @@ MLXSW_ITEM32(cmd_mbox, config_profile, adaptive_routing_group_cap, 0x4C, 0, 16); */ MLXSW_ITEM32(cmd_mbox, config_profile, arn, 0x50, 31, 1); +/* cmd_mbox_config_kvd_linear_size + * KVD Linear Size + * Valid for Spectrum only + * Allowed values are 128*N where N=0 or higher + */ +MLXSW_ITEM32(cmd_mbox, config_profile, kvd_linear_size, 0x54, 0, 24); + +/* cmd_mbox_config_kvd_hash_single_size + * KVD Hash single-entries size + * Valid for Spectrum only + * Allowed values are 128*N where N=0 or higher + * Must be greater or equal to cap_min_kvd_hash_single_size + * Must be smaller or equal to cap_kvd_size - kvd_linear_size + */ +MLXSW_ITEM32(cmd_mbox, config_profile, kvd_hash_single_size, 0x58, 0, 24); + +/* cmd_mbox_config_kvd_hash_double_size + * KVD Hash double-entries size (units of single-size entries) + * Valid for Spectrum only + * Allowed values are 128*N where N=0 or higher + * Must be either 0 or greater or equal to cap_min_kvd_hash_double_size + * Must be smaller or equal to cap_kvd_size - kvd_linear_size + */ +MLXSW_ITEM32(cmd_mbox, config_profile, kvd_hash_double_size, 0x5C, 0, 24); + /* cmd_mbox_config_profile_swid_config_mask * Modify Switch Partition Configuration mask. When set, the configu- * ration value for the Switch Partition are taken from the mailbox. diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 436bc49df6ab..2fe385cce203 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -190,7 +190,8 @@ struct mlxsw_config_profile { used_max_ib_mc:1, used_max_pkey:1, used_ar_sec:1, - used_adaptive_routing_group_cap:1; + used_adaptive_routing_group_cap:1, + used_kvd_sizes:1; u8 max_vepa_channels; u16 max_lag; u16 max_port_per_lag; @@ -211,6 +212,9 @@ struct mlxsw_config_profile { u8 ar_sec; u16 adaptive_routing_group_cap; u8 arn; + u32 kvd_linear_size; + u32 kvd_hash_single_size; + u32 kvd_hash_double_size; struct mlxsw_swid_config swid_config[MLXSW_CONFIG_PROFILE_SWID_COUNT]; }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 7f4173c8eda3..ddbc9f22278d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1255,6 +1255,20 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox, mlxsw_cmd_mbox_config_profile_adaptive_routing_group_cap_set( mbox, profile->adaptive_routing_group_cap); } + if (profile->used_kvd_sizes) { + mlxsw_cmd_mbox_config_profile_set_kvd_linear_size_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_kvd_linear_size_set( + mbox, profile->kvd_linear_size); + mlxsw_cmd_mbox_config_profile_set_kvd_hash_single_size_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_kvd_hash_single_size_set( + mbox, profile->kvd_hash_single_size); + mlxsw_cmd_mbox_config_profile_set_kvd_hash_double_size_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_kvd_hash_double_size_set( + mbox, profile->kvd_hash_double_size); + } for (i = 0; i < MLXSW_CONFIG_PROFILE_SWID_COUNT; i++) mlxsw_pci_config_profile_swid_config(mlxsw_pci, mbox, i, diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 9280d96bb291..0cc148566677 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -4,6 +4,7 @@ * Copyright (c) 2015-2016 Ido Schimmel <idosch@mellanox.com> * Copyright (c) 2015 Elad Raz <eladr@mellanox.com> * Copyright (c) 2015-2016 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -3454,6 +3455,137 @@ static inline void mlxsw_reg_ritr_pack(char *payload, bool enable, mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac); } +/* RATR - Router Adjacency Table Register + * -------------------------------------- + * The RATR register is used to configure the Router Adjacency (next-hop) + * Table. + */ +#define MLXSW_REG_RATR_ID 0x8008 +#define MLXSW_REG_RATR_LEN 0x2C + +static const struct mlxsw_reg_info mlxsw_reg_ratr = { + .id = MLXSW_REG_RATR_ID, + .len = MLXSW_REG_RATR_LEN, +}; + +enum mlxsw_reg_ratr_op { + /* Read */ + MLXSW_REG_RATR_OP_QUERY_READ = 0, + /* Read and clear activity */ + MLXSW_REG_RATR_OP_QUERY_READ_CLEAR = 2, + /* Write Adjacency entry */ + MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY = 1, + /* Write Adjacency entry only if the activity is cleared. + * The write may not succeed if the activity is set. There is not + * direct feedback if the write has succeeded or not, however + * the get will reveal the actual entry (SW can compare the get + * response to the set command). + */ + MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY = 3, +}; + +/* reg_ratr_op + * Note that Write operation may also be used for updating + * counter_set_type and counter_index. In this case all other + * fields must not be updated. + * Access: OP + */ +MLXSW_ITEM32(reg, ratr, op, 0x00, 28, 4); + +/* reg_ratr_v + * Valid bit. Indicates if the adjacency entry is valid. + * Note: the device may need some time before reusing an invalidated + * entry. During this time the entry can not be reused. It is + * recommended to use another entry before reusing an invalidated + * entry (e.g. software can put it at the end of the list for + * reusing). Trying to access an invalidated entry not yet cleared + * by the device results with failure indicating "Try Again" status. + * When valid is '0' then egress_router_interface,trap_action, + * adjacency_parameters and counters are reserved + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, v, 0x00, 24, 1); + +/* reg_ratr_a + * Activity. Set for new entries. Set if a packet lookup has hit on + * the specific entry. To clear the a bit, use "clear activity". + * Access: RO + */ +MLXSW_ITEM32(reg, ratr, a, 0x00, 16, 1); + +/* reg_ratr_adjacency_index_low + * Bits 15:0 of index into the adjacency table. + * For SwitchX and SwitchX-2, the adjacency table is linear and + * used for adjacency entries only. + * For Spectrum, the index is to the KVD linear. + * Access: Index + */ +MLXSW_ITEM32(reg, ratr, adjacency_index_low, 0x04, 0, 16); + +/* reg_ratr_egress_router_interface + * Range is 0 .. cap_max_router_interfaces - 1 + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, egress_router_interface, 0x08, 0, 16); + +enum mlxsw_reg_ratr_trap_action { + MLXSW_REG_RATR_TRAP_ACTION_NOP, + MLXSW_REG_RATR_TRAP_ACTION_TRAP, + MLXSW_REG_RATR_TRAP_ACTION_MIRROR_TO_CPU, + MLXSW_REG_RATR_TRAP_ACTION_MIRROR, + MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS, +}; + +/* reg_ratr_trap_action + * see mlxsw_reg_ratr_trap_action + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, trap_action, 0x0C, 28, 4); + +enum mlxsw_reg_ratr_trap_id { + MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS0 = 0, + MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS1 = 1, +}; + +/* reg_ratr_adjacency_index_high + * Bits 23:16 of the adjacency_index. + * Access: Index + */ +MLXSW_ITEM32(reg, ratr, adjacency_index_high, 0x0C, 16, 8); + +/* reg_ratr_trap_id + * Trap ID to be reported to CPU. + * Trap-ID is RTR_EGRESS0 or RTR_EGRESS1. + * For trap_action of NOP, MIRROR and DISCARD_ERROR + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, trap_id, 0x0C, 0, 8); + +/* reg_ratr_eth_destination_mac + * MAC address of the destination next-hop. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, ratr, eth_destination_mac, 0x12, 6); + +static inline void +mlxsw_reg_ratr_pack(char *payload, + enum mlxsw_reg_ratr_op op, bool valid, + u32 adjacency_index, u16 egress_rif) +{ + MLXSW_REG_ZERO(ratr, payload); + mlxsw_reg_ratr_op_set(payload, op); + mlxsw_reg_ratr_v_set(payload, valid); + mlxsw_reg_ratr_adjacency_index_low_set(payload, adjacency_index); + mlxsw_reg_ratr_adjacency_index_high_set(payload, adjacency_index >> 16); + mlxsw_reg_ratr_egress_router_interface_set(payload, egress_rif); +} + +static inline void mlxsw_reg_ratr_eth_entry_pack(char *payload, + const char *dest_mac) +{ + mlxsw_reg_ratr_eth_destination_mac_memcpy_to(payload, dest_mac); +} + /* RALTA - Router Algorithmic LPM Tree Allocation Register * ------------------------------------------------------- * RALTA is used to allocate the LPM trees of the SHSPM method. @@ -3884,6 +4016,356 @@ mlxsw_reg_ralue_act_ip2me_pack(char *payload) MLXSW_REG_RALUE_ACTION_TYPE_IP2ME); } +/* RAUHT - Router Algorithmic LPM Unicast Host Table Register + * ---------------------------------------------------------- + * The RAUHT register is used to configure and query the Unicast Host table in + * devices that implement the Algorithmic LPM. + */ +#define MLXSW_REG_RAUHT_ID 0x8014 +#define MLXSW_REG_RAUHT_LEN 0x74 + +static const struct mlxsw_reg_info mlxsw_reg_rauht = { + .id = MLXSW_REG_RAUHT_ID, + .len = MLXSW_REG_RAUHT_LEN, +}; + +enum mlxsw_reg_rauht_type { + MLXSW_REG_RAUHT_TYPE_IPV4, + MLXSW_REG_RAUHT_TYPE_IPV6, +}; + +/* reg_rauht_type + * Access: Index + */ +MLXSW_ITEM32(reg, rauht, type, 0x00, 24, 2); + +enum mlxsw_reg_rauht_op { + MLXSW_REG_RAUHT_OP_QUERY_READ = 0, + /* Read operation */ + MLXSW_REG_RAUHT_OP_QUERY_CLEAR_ON_READ = 1, + /* Clear on read operation. Used to read entry and clear + * activity bit. + */ + MLXSW_REG_RAUHT_OP_WRITE_ADD = 0, + /* Add. Used to write a new entry to the table. All R/W fields are + * relevant for new entry. Activity bit is set for new entries. + */ + MLXSW_REG_RAUHT_OP_WRITE_UPDATE = 1, + /* Update action. Used to update an existing route entry and + * only update the following fields: + * trap_action, trap_id, mac, counter_set_type, counter_index + */ + MLXSW_REG_RAUHT_OP_WRITE_CLEAR_ACTIVITY = 2, + /* Clear activity. A bit is cleared for the entry. */ + MLXSW_REG_RAUHT_OP_WRITE_DELETE = 3, + /* Delete entry */ + MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL = 4, + /* Delete all host entries on a RIF. In this command, dip + * field is reserved. + */ +}; + +/* reg_rauht_op + * Access: OP + */ +MLXSW_ITEM32(reg, rauht, op, 0x00, 20, 3); + +/* reg_rauht_a + * Activity. Set for new entries. Set if a packet lookup has hit on + * the specific entry. + * To clear the a bit, use "clear activity" op. + * Enabled by activity_dis in RGCR + * Access: RO + */ +MLXSW_ITEM32(reg, rauht, a, 0x00, 16, 1); + +/* reg_rauht_rif + * Router Interface + * Access: Index + */ +MLXSW_ITEM32(reg, rauht, rif, 0x00, 0, 16); + +/* reg_rauht_dip* + * Destination address. + * Access: Index + */ +MLXSW_ITEM32(reg, rauht, dip4, 0x1C, 0x0, 32); + +enum mlxsw_reg_rauht_trap_action { + MLXSW_REG_RAUHT_TRAP_ACTION_NOP, + MLXSW_REG_RAUHT_TRAP_ACTION_TRAP, + MLXSW_REG_RAUHT_TRAP_ACTION_MIRROR_TO_CPU, + MLXSW_REG_RAUHT_TRAP_ACTION_MIRROR, + MLXSW_REG_RAUHT_TRAP_ACTION_DISCARD_ERRORS, +}; + +/* reg_rauht_trap_action + * Access: RW + */ +MLXSW_ITEM32(reg, rauht, trap_action, 0x60, 28, 4); + +enum mlxsw_reg_rauht_trap_id { + MLXSW_REG_RAUHT_TRAP_ID_RTR_EGRESS0, + MLXSW_REG_RAUHT_TRAP_ID_RTR_EGRESS1, +}; + +/* reg_rauht_trap_id + * Trap ID to be reported to CPU. + * Trap-ID is RTR_EGRESS0 or RTR_EGRESS1. + * For trap_action of NOP, MIRROR and DISCARD_ERROR, + * trap_id is reserved. + * Access: RW + */ +MLXSW_ITEM32(reg, rauht, trap_id, 0x60, 0, 9); + +/* reg_rauht_counter_set_type + * Counter set type for flow counters + * Access: RW + */ +MLXSW_ITEM32(reg, rauht, counter_set_type, 0x68, 24, 8); + +/* reg_rauht_counter_index + * Counter index for flow counters + * Access: RW + */ +MLXSW_ITEM32(reg, rauht, counter_index, 0x68, 0, 24); + +/* reg_rauht_mac + * MAC address. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, rauht, mac, 0x6E, 6); + +static inline void mlxsw_reg_rauht_pack(char *payload, + enum mlxsw_reg_rauht_op op, u16 rif, + const char *mac) +{ + MLXSW_REG_ZERO(rauht, payload); + mlxsw_reg_rauht_op_set(payload, op); + mlxsw_reg_rauht_rif_set(payload, rif); + mlxsw_reg_rauht_mac_memcpy_to(payload, mac); +} + +static inline void mlxsw_reg_rauht_pack4(char *payload, + enum mlxsw_reg_rauht_op op, u16 rif, + const char *mac, u32 dip) +{ + mlxsw_reg_rauht_pack(payload, op, rif, mac); + mlxsw_reg_rauht_dip4_set(payload, dip); +} + +/* RALEU - Router Algorithmic LPM ECMP Update Register + * --------------------------------------------------- + * The register enables updating the ECMP section in the action for multiple + * LPM Unicast entries in a single operation. The update is executed to + * all entries of a {virtual router, protocol} tuple using the same ECMP group. + */ +#define MLXSW_REG_RALEU_ID 0x8015 +#define MLXSW_REG_RALEU_LEN 0x28 + +static const struct mlxsw_reg_info mlxsw_reg_raleu = { + .id = MLXSW_REG_RALEU_ID, + .len = MLXSW_REG_RALEU_LEN, +}; + +/* reg_raleu_protocol + * Protocol. + * Access: Index + */ +MLXSW_ITEM32(reg, raleu, protocol, 0x00, 24, 4); + +/* reg_raleu_virtual_router + * Virtual Router ID + * Range is 0..cap_max_virtual_routers-1 + * Access: Index + */ +MLXSW_ITEM32(reg, raleu, virtual_router, 0x00, 0, 16); + +/* reg_raleu_adjacency_index + * Adjacency Index used for matching on the existing entries. + * Access: Index + */ +MLXSW_ITEM32(reg, raleu, adjacency_index, 0x10, 0, 24); + +/* reg_raleu_ecmp_size + * ECMP Size used for matching on the existing entries. + * Access: Index + */ +MLXSW_ITEM32(reg, raleu, ecmp_size, 0x14, 0, 13); + +/* reg_raleu_new_adjacency_index + * New Adjacency Index. + * Access: WO + */ +MLXSW_ITEM32(reg, raleu, new_adjacency_index, 0x20, 0, 24); + +/* reg_raleu_new_ecmp_size + * New ECMP Size. + * Access: WO + */ +MLXSW_ITEM32(reg, raleu, new_ecmp_size, 0x24, 0, 13); + +static inline void mlxsw_reg_raleu_pack(char *payload, + enum mlxsw_reg_ralxx_protocol protocol, + u16 virtual_router, + u32 adjacency_index, u16 ecmp_size, + u32 new_adjacency_index, + u16 new_ecmp_size) +{ + MLXSW_REG_ZERO(raleu, payload); + mlxsw_reg_raleu_protocol_set(payload, protocol); + mlxsw_reg_raleu_virtual_router_set(payload, virtual_router); + mlxsw_reg_raleu_adjacency_index_set(payload, adjacency_index); + mlxsw_reg_raleu_ecmp_size_set(payload, ecmp_size); + mlxsw_reg_raleu_new_adjacency_index_set(payload, new_adjacency_index); + mlxsw_reg_raleu_new_ecmp_size_set(payload, new_ecmp_size); +} + +/* RAUHTD - Router Algorithmic LPM Unicast Host Table Dump Register + * ---------------------------------------------------------------- + * The RAUHTD register allows dumping entries from the Router Unicast Host + * Table. For a given session an entry is dumped no more than one time. The + * first RAUHTD access after reset is a new session. A session ends when the + * num_rec response is smaller than num_rec request or for IPv4 when the + * num_entries is smaller than 4. The clear activity affect the current session + * or the last session if a new session has not started. + */ +#define MLXSW_REG_RAUHTD_ID 0x8018 +#define MLXSW_REG_RAUHTD_BASE_LEN 0x20 +#define MLXSW_REG_RAUHTD_REC_LEN 0x20 +#define MLXSW_REG_RAUHTD_REC_MAX_NUM 32 +#define MLXSW_REG_RAUHTD_LEN (MLXSW_REG_RAUHTD_BASE_LEN + \ + MLXSW_REG_RAUHTD_REC_MAX_NUM * MLXSW_REG_RAUHTD_REC_LEN) +#define MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC 4 + +static const struct mlxsw_reg_info mlxsw_reg_rauhtd = { + .id = MLXSW_REG_RAUHTD_ID, + .len = MLXSW_REG_RAUHTD_LEN, +}; + +#define MLXSW_REG_RAUHTD_FILTER_A BIT(0) +#define MLXSW_REG_RAUHTD_FILTER_RIF BIT(3) + +/* reg_rauhtd_filter_fields + * if a bit is '0' then the relevant field is ignored and dump is done + * regardless of the field value + * Bit0 - filter by activity: entry_a + * Bit3 - filter by entry rip: entry_rif + * Access: Index + */ +MLXSW_ITEM32(reg, rauhtd, filter_fields, 0x00, 0, 8); + +enum mlxsw_reg_rauhtd_op { + MLXSW_REG_RAUHTD_OP_DUMP, + MLXSW_REG_RAUHTD_OP_DUMP_AND_CLEAR, +}; + +/* reg_rauhtd_op + * Access: OP + */ +MLXSW_ITEM32(reg, rauhtd, op, 0x04, 24, 2); + +/* reg_rauhtd_num_rec + * At request: number of records requested + * At response: number of records dumped + * For IPv4, each record has 4 entries at request and up to 4 entries + * at response + * Range is 0..MLXSW_REG_RAUHTD_REC_MAX_NUM + * Access: Index + */ +MLXSW_ITEM32(reg, rauhtd, num_rec, 0x04, 0, 8); + +/* reg_rauhtd_entry_a + * Dump only if activity has value of entry_a + * Reserved if filter_fields bit0 is '0' + * Access: Index + */ +MLXSW_ITEM32(reg, rauhtd, entry_a, 0x08, 16, 1); + +enum mlxsw_reg_rauhtd_type { + MLXSW_REG_RAUHTD_TYPE_IPV4, + MLXSW_REG_RAUHTD_TYPE_IPV6, +}; + +/* reg_rauhtd_type + * Dump only if record type is: + * 0 - IPv4 + * 1 - IPv6 + * Access: Index + */ +MLXSW_ITEM32(reg, rauhtd, type, 0x08, 0, 4); + +/* reg_rauhtd_entry_rif + * Dump only if RIF has value of entry_rif + * Reserved if filter_fields bit3 is '0' + * Access: Index + */ +MLXSW_ITEM32(reg, rauhtd, entry_rif, 0x0C, 0, 16); + +static inline void mlxsw_reg_rauhtd_pack(char *payload, + enum mlxsw_reg_rauhtd_type type) +{ + MLXSW_REG_ZERO(rauhtd, payload); + mlxsw_reg_rauhtd_filter_fields_set(payload, MLXSW_REG_RAUHTD_FILTER_A); + mlxsw_reg_rauhtd_op_set(payload, MLXSW_REG_RAUHTD_OP_DUMP_AND_CLEAR); + mlxsw_reg_rauhtd_num_rec_set(payload, MLXSW_REG_RAUHTD_REC_MAX_NUM); + mlxsw_reg_rauhtd_entry_a_set(payload, 1); + mlxsw_reg_rauhtd_type_set(payload, type); +} + +/* reg_rauhtd_ipv4_rec_num_entries + * Number of valid entries in this record: + * 0 - 1 valid entry + * 1 - 2 valid entries + * 2 - 3 valid entries + * 3 - 4 valid entries + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_rec_num_entries, + MLXSW_REG_RAUHTD_BASE_LEN, 28, 2, + MLXSW_REG_RAUHTD_REC_LEN, 0x00, false); + +/* reg_rauhtd_rec_type + * Record type. + * 0 - IPv4 + * 1 - IPv6 + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, rec_type, MLXSW_REG_RAUHTD_BASE_LEN, 24, 2, + MLXSW_REG_RAUHTD_REC_LEN, 0x00, false); + +#define MLXSW_REG_RAUHTD_IPV4_ENT_LEN 0x8 + +/* reg_rauhtd_ipv4_ent_a + * Activity. Set for new entries. Set if a packet lookup has hit on the + * specific entry. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_a, MLXSW_REG_RAUHTD_BASE_LEN, 16, 1, + MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x00, false); + +/* reg_rauhtd_ipv4_ent_rif + * Router interface. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0, + 16, MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x00, false); + +/* reg_rauhtd_ipv4_ent_dip + * Destination IPv4 address. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_dip, MLXSW_REG_RAUHTD_BASE_LEN, 0, + 32, MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x04, false); + +static inline void mlxsw_reg_rauhtd_ent_ipv4_unpack(char *payload, + int ent_index, u16 *p_rif, + u32 *p_dip) +{ + *p_rif = mlxsw_reg_rauhtd_ipv4_ent_rif_get(payload, ent_index); + *p_dip = mlxsw_reg_rauhtd_ipv4_ent_dip_get(payload, ent_index); +} + /* MFCR - Management Fan Control Register * -------------------------------------- * This register controls the settings of the Fan Speed PWM mechanism. @@ -4626,6 +5108,8 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "RGCR"; case MLXSW_REG_RITR_ID: return "RITR"; + case MLXSW_REG_RATR_ID: + return "RATR"; case MLXSW_REG_RALTA_ID: return "RALTA"; case MLXSW_REG_RALST_ID: @@ -4634,6 +5118,12 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "RALTB"; case MLXSW_REG_RALUE_ID: return "RALUE"; + case MLXSW_REG_RAUHT_ID: + return "RAUHT"; + case MLXSW_REG_RALEU_ID: + return "RALEU"; + case MLXSW_REG_RAUHTD_ID: + return "RAUHTD"; case MLXSW_REG_MFCR_ID: return "MFCR"; case MLXSW_REG_MFSC_ID: diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 7b2b741b2a23..c812513e079d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -803,6 +803,8 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_get_stats64 = mlxsw_sp_port_get_stats64, .ndo_vlan_rx_add_vid = mlxsw_sp_port_add_vid, .ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid, + .ndo_neigh_construct = mlxsw_sp_router_neigh_construct, + .ndo_neigh_destroy = mlxsw_sp_router_neigh_destroy, .ndo_fdb_add = switchdev_port_fdb_add, .ndo_fdb_del = switchdev_port_fdb_del, .ndo_fdb_dump = switchdev_port_fdb_dump, @@ -2354,6 +2356,10 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = { .max_ib_mc = 0, .used_max_pkey = 1, .max_pkey = 0, + .used_kvd_sizes = 1, + .kvd_linear_size = MLXSW_SP_KVD_LINEAR_SIZE, + .kvd_hash_single_size = MLXSW_SP_KVD_HASH_SINGLE_SIZE, + .kvd_hash_double_size = MLXSW_SP_KVD_HASH_DOUBLE_SIZE, .swid_config = { { .used_type = 1, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 958e821ce845..ef4ac8987a2a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -39,6 +39,7 @@ #include <linux/types.h> #include <linux/netdevice.h> +#include <linux/rhashtable.h> #include <linux/bitops.h> #include <linux/if_vlan.h> #include <linux/list.h> @@ -75,6 +76,10 @@ #define MLXSW_SP_BYTES_TO_CELLS(b) DIV_ROUND_UP(b, MLXSW_SP_BYTES_PER_CELL) #define MLXSW_SP_CELLS_TO_BYTES(c) (c * MLXSW_SP_BYTES_PER_CELL) +#define MLXSW_SP_KVD_LINEAR_SIZE 65536 /* entries */ +#define MLXSW_SP_KVD_HASH_SINGLE_SIZE 163840 /* entries */ +#define MLXSW_SP_KVD_HASH_DOUBLE_SIZE 32768 /* entries */ + /* Maximum delay buffer needed in case of PAUSE frames, in cells. * Assumes 100m cable and maximum MTU. */ @@ -212,6 +217,15 @@ struct mlxsw_sp_vr { struct mlxsw_sp_router { struct mlxsw_sp_lpm_tree lpm_trees[MLXSW_SP_LPM_TREE_COUNT]; struct mlxsw_sp_vr vrs[MLXSW_SP_VIRTUAL_ROUTER_MAX]; + struct rhashtable neigh_ht; + struct { + struct delayed_work dw; + unsigned long interval; /* ms */ + } neighs_update; + struct delayed_work nexthop_probe_dw; +#define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */ + struct list_head nexthop_group_list; + struct list_head nexthop_neighs_list; }; struct mlxsw_sp { @@ -243,6 +257,9 @@ struct mlxsw_sp { u8 port_to_module[MLXSW_PORT_MAX_PORTS]; struct mlxsw_sp_sb sb; struct mlxsw_sp_router router; + struct { + DECLARE_BITMAP(usage, MLXSW_SP_KVD_LINEAR_SIZE); + } kvdl; }; static inline struct mlxsw_sp_upper * @@ -524,5 +541,12 @@ int mlxsw_sp_router_fib4_add(struct mlxsw_sp_port *mlxsw_sp_port, struct switchdev_trans *trans); int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port, const struct switchdev_obj_ipv4_fib *fib4); +int mlxsw_sp_router_neigh_construct(struct net_device *dev, + struct neighbour *n); +void mlxsw_sp_router_neigh_destroy(struct net_device *dev, + struct neighbour *n); + +int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count); +void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index); #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c new file mode 100644 index 000000000000..ac321e8e5c1a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c @@ -0,0 +1,91 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/bitops.h> + +#include "spectrum.h" + +#define MLXSW_SP_KVDL_SINGLE_BASE 0 +#define MLXSW_SP_KVDL_SINGLE_SIZE 16384 +#define MLXSW_SP_KVDL_CHUNKS_BASE \ + (MLXSW_SP_KVDL_SINGLE_BASE + MLXSW_SP_KVDL_SINGLE_SIZE) +#define MLXSW_SP_KVDL_CHUNKS_SIZE \ + (MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP_KVDL_CHUNKS_BASE) +#define MLXSW_SP_CHUNK_MAX 32 + +int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count) +{ + int entry_index; + int size; + int type_base; + int type_size; + int type_entries; + + if (entry_count == 0 || entry_count > MLXSW_SP_CHUNK_MAX) { + return -EINVAL; + } else if (entry_count == 1) { + type_base = MLXSW_SP_KVDL_SINGLE_BASE; + type_size = MLXSW_SP_KVDL_SINGLE_SIZE; + type_entries = 1; + } else { + type_base = MLXSW_SP_KVDL_CHUNKS_BASE; + type_size = MLXSW_SP_KVDL_CHUNKS_SIZE; + type_entries = MLXSW_SP_CHUNK_MAX; + } + + entry_index = type_base; + size = type_base + type_size; + for_each_clear_bit_from(entry_index, mlxsw_sp->kvdl.usage, size) { + int i; + + for (i = 0; i < type_entries; i++) + set_bit(entry_index + i, mlxsw_sp->kvdl.usage); + return entry_index; + } + return -ENOBUFS; +} + +void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index) +{ + int type_entries; + int i; + + if (entry_index < MLXSW_SP_KVDL_CHUNKS_BASE) + type_entries = 1; + else + type_entries = MLXSW_SP_CHUNK_MAX; + for (i = 0; i < type_entries; i++) + clear_bit(entry_index + i, mlxsw_sp->kvdl.usage); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 7e3992a681b3..e084ea5448ac 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3,6 +3,7 @@ * Copyright (c) 2016 Mellanox Technologies. All rights reserved. * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com> + * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,6 +39,10 @@ #include <linux/rhashtable.h> #include <linux/bitops.h> #include <linux/in6.h> +#include <linux/notifier.h> +#include <net/netevent.h> +#include <net/neighbour.h> +#include <net/arp.h> #include "spectrum.h" #include "core.h" @@ -112,6 +117,8 @@ enum mlxsw_sp_fib_entry_type { MLXSW_SP_FIB_ENTRY_TYPE_TRAP, }; +struct mlxsw_sp_nexthop_group; + struct mlxsw_sp_fib_entry { struct rhash_head ht_node; struct mlxsw_sp_fib_key key; @@ -119,6 +126,8 @@ struct mlxsw_sp_fib_entry { u8 added:1; u16 rif; /* used for action local */ struct mlxsw_sp_vr *vr; + struct list_head nexthop_group_node; + struct mlxsw_sp_nexthop_group *nh_group; }; struct mlxsw_sp_fib { @@ -544,6 +553,949 @@ static void mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) } } +struct mlxsw_sp_neigh_key { + unsigned char addr[sizeof(struct in6_addr)]; + struct net_device *dev; +}; + +struct mlxsw_sp_neigh_entry { + struct rhash_head ht_node; + struct mlxsw_sp_neigh_key key; + u16 rif; + struct neighbour *n; + bool offloaded; + struct delayed_work dw; + struct mlxsw_sp_port *mlxsw_sp_port; + unsigned char ha[ETH_ALEN]; + struct list_head nexthop_list; /* list of nexthops using + * this neigh entry + */ + struct list_head nexthop_neighs_list_node; +}; + +static const struct rhashtable_params mlxsw_sp_neigh_ht_params = { + .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key), + .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node), + .key_len = sizeof(struct mlxsw_sp_neigh_key), +}; + +static int +mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht, + &neigh_entry->ht_node, + mlxsw_sp_neigh_ht_params); +} + +static void +mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht, + &neigh_entry->ht_node, + mlxsw_sp_neigh_ht_params); +} + +static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work); + +static struct mlxsw_sp_neigh_entry * +mlxsw_sp_neigh_entry_create(const void *addr, size_t addr_len, + struct net_device *dev, u16 rif, + struct neighbour *n) +{ + struct mlxsw_sp_neigh_entry *neigh_entry; + + neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_ATOMIC); + if (!neigh_entry) + return NULL; + memcpy(neigh_entry->key.addr, addr, addr_len); + neigh_entry->key.dev = dev; + neigh_entry->rif = rif; + neigh_entry->n = n; + INIT_DELAYED_WORK(&neigh_entry->dw, mlxsw_sp_router_neigh_update_hw); + INIT_LIST_HEAD(&neigh_entry->nexthop_list); + return neigh_entry; +} + +static void +mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp_neigh_entry *neigh_entry) +{ + kfree(neigh_entry); +} + +static struct mlxsw_sp_neigh_entry * +mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, const void *addr, + size_t addr_len, struct net_device *dev) +{ + struct mlxsw_sp_neigh_key key = {{ 0 } }; + + memcpy(key.addr, addr, addr_len); + key.dev = dev; + return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht, + &key, mlxsw_sp_neigh_ht_params); +} + +int mlxsw_sp_router_neigh_construct(struct net_device *dev, + struct neighbour *n) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_neigh_entry *neigh_entry; + struct mlxsw_sp_rif *r; + u32 dip; + int err; + + if (n->tbl != &arp_tbl) + return 0; + + dip = ntohl(*((__be32 *) n->primary_key)); + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip), + n->dev); + if (neigh_entry) { + WARN_ON(neigh_entry->n != n); + return 0; + } + + r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (WARN_ON(!r)) + return -EINVAL; + + neigh_entry = mlxsw_sp_neigh_entry_create(&dip, sizeof(dip), n->dev, + r->rif, n); + if (!neigh_entry) + return -ENOMEM; + err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry); + if (err) + goto err_neigh_entry_insert; + return 0; + +err_neigh_entry_insert: + mlxsw_sp_neigh_entry_destroy(neigh_entry); + return err; +} + +void mlxsw_sp_router_neigh_destroy(struct net_device *dev, + struct neighbour *n) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_neigh_entry *neigh_entry; + u32 dip; + + if (n->tbl != &arp_tbl) + return; + + dip = ntohl(*((__be32 *) n->primary_key)); + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip), + n->dev); + if (!neigh_entry) + return; + mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); + mlxsw_sp_neigh_entry_destroy(neigh_entry); +} + +static void +mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp) +{ + unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); + + mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval); +} + +static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + int ent_index) +{ + struct net_device *dev; + struct neighbour *n; + __be32 dipn; + u32 dip; + u16 rif; + + mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip); + + if (!mlxsw_sp->rifs[rif]) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n"); + return; + } + + dipn = htonl(dip); + dev = mlxsw_sp->rifs[rif]->dev; + n = neigh_lookup(&arp_tbl, &dipn, dev); + if (!n) { + netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n", + &dip); + return; + } + + netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip); + neigh_event_send(n, NULL); + neigh_release(n); +} + +static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + int rec_index) +{ + u8 num_entries; + int i; + + num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl, + rec_index); + /* Hardware starts counting at 0, so add 1. */ + num_entries++; + + /* Each record consists of several neighbour entries. */ + for (i = 0; i < num_entries; i++) { + int ent_index; + + ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i; + mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl, + ent_index); + } + +} + +static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, int rec_index) +{ + switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) { + case MLXSW_REG_RAUHTD_TYPE_IPV4: + mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl, + rec_index); + break; + case MLXSW_REG_RAUHTD_TYPE_IPV6: + WARN_ON_ONCE(1); + break; + } +} + +static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) +{ + char *rauhtd_pl; + u8 num_rec; + int i, err; + + rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL); + if (!rauhtd_pl) + return -ENOMEM; + + /* Make sure the neighbour's netdev isn't removed in the + * process. + */ + rtnl_lock(); + do { + mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd), + rauhtd_pl); + if (err) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n"); + break; + } + num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl); + for (i = 0; i < num_rec; i++) + mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl, + i); + } while (num_rec); + rtnl_unlock(); + + kfree(rauhtd_pl); + return err; +} + +static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_neigh_entry *neigh_entry; + + /* Take RTNL mutex here to prevent lists from changes */ + rtnl_lock(); + list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list, + nexthop_neighs_list_node) { + /* If this neigh have nexthops, make the kernel think this neigh + * is active regardless of the traffic. + */ + if (!list_empty(&neigh_entry->nexthop_list)) + neigh_event_send(neigh_entry->n, NULL); + } + rtnl_unlock(); +} + +static void +mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp) +{ + unsigned long interval = mlxsw_sp->router.neighs_update.interval; + + mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, + msecs_to_jiffies(interval)); +} + +static void mlxsw_sp_router_neighs_update_work(struct work_struct *work) +{ + struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp, + router.neighs_update.dw.work); + int err; + + err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp); + if (err) + dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity"); + + mlxsw_sp_router_neighs_update_nh(mlxsw_sp); + + mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp); +} + +static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work) +{ + struct mlxsw_sp_neigh_entry *neigh_entry; + struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp, + router.nexthop_probe_dw.work); + + /* Iterate over nexthop neighbours, find those who are unresolved and + * send arp on them. This solves the chicken-egg problem when + * the nexthop wouldn't get offloaded until the neighbor is resolved + * but it wouldn't get resolved ever in case traffic is flowing in HW + * using different nexthop. + * + * Take RTNL mutex here to prevent lists from changes. + */ + rtnl_lock(); + list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list, + nexthop_neighs_list_node) { + if (!(neigh_entry->n->nud_state & NUD_VALID) && + !list_empty(&neigh_entry->nexthop_list)) + neigh_event_send(neigh_entry->n, NULL); + } + rtnl_unlock(); + + mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, + MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL); +} + +static void +mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + bool removing); + +static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work) +{ + struct mlxsw_sp_neigh_entry *neigh_entry = + container_of(work, struct mlxsw_sp_neigh_entry, dw.work); + struct neighbour *n = neigh_entry->n; + struct mlxsw_sp_port *mlxsw_sp_port = neigh_entry->mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char rauht_pl[MLXSW_REG_RAUHT_LEN]; + struct net_device *dev; + bool entry_connected; + u8 nud_state; + bool updating; + bool removing; + bool adding; + u32 dip; + int err; + + read_lock_bh(&n->lock); + dip = ntohl(*((__be32 *) n->primary_key)); + memcpy(neigh_entry->ha, n->ha, sizeof(neigh_entry->ha)); + nud_state = n->nud_state; + dev = n->dev; + read_unlock_bh(&n->lock); + + entry_connected = nud_state & NUD_VALID; + adding = (!neigh_entry->offloaded) && entry_connected; + updating = neigh_entry->offloaded && entry_connected; + removing = neigh_entry->offloaded && !entry_connected; + + if (adding || updating) { + mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_ADD, + neigh_entry->rif, + neigh_entry->ha, dip); + err = mlxsw_reg_write(mlxsw_sp->core, + MLXSW_REG(rauht), rauht_pl); + if (err) { + netdev_err(dev, "Could not add neigh %pI4h\n", &dip); + neigh_entry->offloaded = false; + } else { + neigh_entry->offloaded = true; + } + mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, false); + } else if (removing) { + mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE, + neigh_entry->rif, + neigh_entry->ha, dip); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), + rauht_pl); + if (err) { + netdev_err(dev, "Could not delete neigh %pI4h\n", &dip); + neigh_entry->offloaded = true; + } else { + neigh_entry->offloaded = false; + } + mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, true); + } + + neigh_release(n); + mlxsw_sp_port_dev_put(mlxsw_sp_port); +} + +static int mlxsw_sp_router_netevent_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct mlxsw_sp_neigh_entry *neigh_entry; + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp; + unsigned long interval; + struct net_device *dev; + struct neigh_parms *p; + struct neighbour *n; + u32 dip; + + switch (event) { + case NETEVENT_DELAY_PROBE_TIME_UPDATE: + p = ptr; + + /* We don't care about changes in the default table. */ + if (!p->dev || p->tbl != &arp_tbl) + return NOTIFY_DONE; + + /* We are in atomic context and can't take RTNL mutex, + * so use RCU variant to walk the device chain. + */ + mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev); + if (!mlxsw_sp_port) + return NOTIFY_DONE; + + mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME)); + mlxsw_sp->router.neighs_update.interval = interval; + + mlxsw_sp_port_dev_put(mlxsw_sp_port); + break; + case NETEVENT_NEIGH_UPDATE: + n = ptr; + dev = n->dev; + + if (n->tbl != &arp_tbl) + return NOTIFY_DONE; + + mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(dev); + if (!mlxsw_sp_port) + return NOTIFY_DONE; + + mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + dip = ntohl(*((__be32 *) n->primary_key)); + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, + &dip, + sizeof(__be32), + dev); + if (WARN_ON(!neigh_entry) || WARN_ON(neigh_entry->n != n)) { + mlxsw_sp_port_dev_put(mlxsw_sp_port); + return NOTIFY_DONE; + } + neigh_entry->mlxsw_sp_port = mlxsw_sp_port; + + /* Take a reference to ensure the neighbour won't be + * destructed until we drop the reference in delayed + * work. + */ + neigh_clone(n); + if (!mlxsw_core_schedule_dw(&neigh_entry->dw, 0)) { + neigh_release(n); + mlxsw_sp_port_dev_put(mlxsw_sp_port); + } + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = { + .notifier_call = mlxsw_sp_router_netevent_event, +}; + +static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + err = rhashtable_init(&mlxsw_sp->router.neigh_ht, + &mlxsw_sp_neigh_ht_params); + if (err) + return err; + + /* Initialize the polling interval according to the default + * table. + */ + mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp); + + err = register_netevent_notifier(&mlxsw_sp_router_netevent_nb); + if (err) + goto err_register_netevent_notifier; + + /* Create the delayed works for the activity_update */ + INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw, + mlxsw_sp_router_neighs_update_work); + INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw, + mlxsw_sp_router_probe_unresolved_nexthops); + mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0); + mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0); + return 0; + +err_register_netevent_notifier: + rhashtable_destroy(&mlxsw_sp->router.neigh_ht); + return err; +} + +static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp) +{ + cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw); + cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw); + unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); + rhashtable_destroy(&mlxsw_sp->router.neigh_ht); +} + +struct mlxsw_sp_nexthop { + struct list_head neigh_list_node; /* member of neigh entry list */ + struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group + * this belongs to + */ + u8 should_offload:1, /* set indicates this neigh is connected and + * should be put to KVD linear area of this group. + */ + offloaded:1, /* set in case the neigh is actually put into + * KVD linear area of this group. + */ + update:1; /* set indicates that MAC of this neigh should be + * updated in HW + */ + struct mlxsw_sp_neigh_entry *neigh_entry; +}; + +struct mlxsw_sp_nexthop_group { + struct list_head list; /* node in mlxsw->router.nexthop_group_list */ + struct list_head fib_list; /* list of fib entries that use this group */ + u8 adj_index_valid:1; + u32 adj_index; + u16 ecmp_size; + u16 count; + struct mlxsw_sp_nexthop nexthops[0]; +}; + +static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_vr *vr, + u32 adj_index, u16 ecmp_size, + u32 new_adj_index, + u16 new_ecmp_size) +{ + char raleu_pl[MLXSW_REG_RALEU_LEN]; + + mlxsw_reg_raleu_pack(raleu_pl, vr->proto, vr->id, + adj_index, ecmp_size, + new_adj_index, new_ecmp_size); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl); +} + +static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + u32 old_adj_index, u16 old_ecmp_size) +{ + struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_vr *vr = NULL; + int err; + + list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { + if (vr == fib_entry->vr) + continue; + vr = fib_entry->vr; + err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr, + old_adj_index, + old_ecmp_size, + nh_grp->adj_index, + nh_grp->ecmp_size); + if (err) + return err; + } + return 0; +} + +static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_nexthop *nh) +{ + struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; + char ratr_pl[MLXSW_REG_RATR_LEN]; + + mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, + true, adj_index, neigh_entry->rif); + mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); +} + +static int +mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + u32 adj_index = nh_grp->adj_index; /* base */ + struct mlxsw_sp_nexthop *nh; + int i; + int err; + + for (i = 0; i < nh_grp->count; i++) { + nh = &nh_grp->nexthops[i]; + + if (!nh->should_offload) { + nh->offloaded = 0; + continue; + } + + if (nh->update) { + err = mlxsw_sp_nexthop_mac_update(mlxsw_sp, + adj_index, nh); + if (err) + return err; + nh->update = 0; + nh->offloaded = 1; + } + adj_index++; + } + return 0; +} + +static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry); + +static int +mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_fib_entry *fib_entry; + int err; + + list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { + err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); + if (err) + return err; + } + return 0; +} + +static void +mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_nexthop *nh; + bool offload_change = false; + u32 adj_index; + u16 ecmp_size = 0; + bool old_adj_index_valid; + u32 old_adj_index; + u16 old_ecmp_size; + int ret; + int i; + int err; + + for (i = 0; i < nh_grp->count; i++) { + nh = &nh_grp->nexthops[i]; + + if (nh->should_offload ^ nh->offloaded) { + offload_change = true; + if (nh->should_offload) + nh->update = 1; + } + if (nh->should_offload) + ecmp_size++; + } + if (!offload_change) { + /* Nothing was added or removed, so no need to reallocate. Just + * update MAC on existing adjacency indexes. + */ + err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp); + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); + goto set_trap; + } + return; + } + if (!ecmp_size) + /* No neigh of this group is connected so we just set + * the trap and let everthing flow through kernel. + */ + goto set_trap; + + ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size); + if (ret < 0) { + /* We ran out of KVD linear space, just set the + * trap and let everything flow through kernel. + */ + dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n"); + goto set_trap; + } + adj_index = ret; + old_adj_index_valid = nh_grp->adj_index_valid; + old_adj_index = nh_grp->adj_index; + old_ecmp_size = nh_grp->ecmp_size; + nh_grp->adj_index_valid = 1; + nh_grp->adj_index = adj_index; + nh_grp->ecmp_size = ecmp_size; + err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp); + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); + goto set_trap; + } + + if (!old_adj_index_valid) { + /* The trap was set for fib entries, so we have to call + * fib entry update to unset it and use adjacency index. + */ + err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n"); + goto set_trap; + } + return; + } + + err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp, + old_adj_index, old_ecmp_size); + mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index); + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n"); + goto set_trap; + } + return; + +set_trap: + old_adj_index_valid = nh_grp->adj_index_valid; + nh_grp->adj_index_valid = 0; + for (i = 0; i < nh_grp->count; i++) { + nh = &nh_grp->nexthops[i]; + nh->offloaded = 0; + } + err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); + if (err) + dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n"); + if (old_adj_index_valid) + mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index); +} + +static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, + bool removing) +{ + if (!removing && !nh->should_offload) + nh->should_offload = 1; + else if (removing && nh->offloaded) + nh->should_offload = 0; + nh->update = 1; +} + +static void +mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + bool removing) +{ + struct mlxsw_sp_nexthop *nh; + + /* Take RTNL mutex here to prevent lists from changes */ + rtnl_lock(); + list_for_each_entry(nh, &neigh_entry->nexthop_list, + neigh_list_node) { + __mlxsw_sp_nexthop_neigh_update(nh, removing); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); + } + rtnl_unlock(); +} + +static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop *nh, + struct fib_nh *fib_nh) +{ + struct mlxsw_sp_neigh_entry *neigh_entry; + u32 gwip = ntohl(fib_nh->nh_gw); + struct net_device *dev = fib_nh->nh_dev; + struct neighbour *n; + u8 nud_state; + + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip, + sizeof(gwip), dev); + if (!neigh_entry) { + __be32 gwipn = htonl(gwip); + + n = neigh_create(&arp_tbl, &gwipn, dev); + if (IS_ERR(n)) + return PTR_ERR(n); + neigh_event_send(n, NULL); + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip, + sizeof(gwip), dev); + if (!neigh_entry) { + neigh_release(n); + return -EINVAL; + } + } else { + /* Take a reference of neigh here ensuring that neigh would + * not be detructed before the nexthop entry is finished. + * The second branch takes the reference in neith_create() + */ + n = neigh_entry->n; + neigh_clone(n); + } + + /* If that is the first nexthop connected to that neigh, add to + * nexthop_neighs_list + */ + if (list_empty(&neigh_entry->nexthop_list)) + list_add_tail(&neigh_entry->nexthop_neighs_list_node, + &mlxsw_sp->router.nexthop_neighs_list); + + nh->nh_grp = nh_grp; + nh->neigh_entry = neigh_entry; + list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list); + read_lock_bh(&n->lock); + nud_state = n->nud_state; + read_unlock_bh(&n->lock); + __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID)); + + return 0; +} + +static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; + + list_del(&nh->neigh_list_node); + + /* If that is the last nexthop connected to that neigh, remove from + * nexthop_neighs_list + */ + if (list_empty(&nh->neigh_entry->nexthop_list)) + list_del(&nh->neigh_entry->nexthop_neighs_list_node); + + neigh_release(neigh_entry->n); +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + struct mlxsw_sp_nexthop *nh; + struct fib_nh *fib_nh; + size_t alloc_size; + int i; + int err; + + alloc_size = sizeof(*nh_grp) + + fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop); + nh_grp = kzalloc(alloc_size, GFP_KERNEL); + if (!nh_grp) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&nh_grp->fib_list); + nh_grp->count = fi->fib_nhs; + for (i = 0; i < nh_grp->count; i++) { + nh = &nh_grp->nexthops[i]; + fib_nh = &fi->fib_nh[i]; + err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh); + if (err) + goto err_nexthop_init; + } + list_add_tail(&nh_grp->list, &mlxsw_sp->router.nexthop_group_list); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + return nh_grp; + +err_nexthop_init: + for (i--; i >= 0; i--) + mlxsw_sp_nexthop_fini(mlxsw_sp, nh); + kfree(nh_grp); + return ERR_PTR(err); +} + +static void +mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_nexthop *nh; + int i; + + list_del(&nh_grp->list); + for (i = 0; i < nh_grp->count; i++) { + nh = &nh_grp->nexthops[i]; + mlxsw_sp_nexthop_fini(mlxsw_sp, nh); + } + kfree(nh_grp); +} + +static bool mlxsw_sp_nexthop_match(struct mlxsw_sp_nexthop *nh, + struct fib_info *fi) +{ + int i; + + for (i = 0; i < fi->fib_nhs; i++) { + struct fib_nh *fib_nh = &fi->fib_nh[i]; + u32 gwip = ntohl(fib_nh->nh_gw); + + if (memcmp(nh->neigh_entry->key.addr, + &gwip, sizeof(u32)) == 0 && + nh->neigh_entry->key.dev == fib_nh->nh_dev) + return true; + } + return false; +} + +static bool mlxsw_sp_nexthop_group_match(struct mlxsw_sp_nexthop_group *nh_grp, + struct fib_info *fi) +{ + int i; + + if (nh_grp->count != fi->fib_nhs) + return false; + for (i = 0; i < nh_grp->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + + if (!mlxsw_sp_nexthop_match(nh, fi)) + return false; + } + return true; +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop_group_find(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + + list_for_each_entry(nh_grp, &mlxsw_sp->router.nexthop_group_list, + list) { + if (mlxsw_sp_nexthop_group_match(nh_grp, fi)) + return nh_grp; + } + return NULL; +} + +static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + struct fib_info *fi) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + + nh_grp = mlxsw_sp_nexthop_group_find(mlxsw_sp, fi); + if (!nh_grp) { + nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi); + if (IS_ERR(nh_grp)) + return PTR_ERR(nh_grp); + } + list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list); + fib_entry->nh_group = nh_grp; + return 0; +} + +static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; + + list_del(&fib_entry->nexthop_group_node); + if (!list_empty(&nh_grp->fib_list)) + return; + mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp); +} + static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) { char rgcr_pl[MLXSW_REG_RGCR_LEN]; @@ -565,19 +1517,54 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) { int err; + INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list); + INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_group_list); err = __mlxsw_sp_router_init(mlxsw_sp); if (err) return err; mlxsw_sp_lpm_init(mlxsw_sp); mlxsw_sp_vrs_init(mlxsw_sp); - return 0; + return mlxsw_sp_neigh_init(mlxsw_sp); } void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) { + mlxsw_sp_neigh_fini(mlxsw_sp); __mlxsw_sp_router_fini(mlxsw_sp); } +static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + char ralue_pl[MLXSW_REG_RALUE_LEN]; + u32 *p_dip = (u32 *) fib_entry->key.addr; + struct mlxsw_sp_vr *vr = fib_entry->vr; + enum mlxsw_reg_ralue_trap_action trap_action; + u16 trap_id = 0; + u32 adjacency_index = 0; + u16 ecmp_size = 0; + + /* In case the nexthop group adjacency index is valid, use it + * with provided ECMP size. Otherwise, setup trap and pass + * traffic to kernel. + */ + if (fib_entry->nh_group->adj_index_valid) { + trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; + adjacency_index = fib_entry->nh_group->adj_index; + ecmp_size = fib_entry->nh_group->ecmp_size; + } else { + trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP; + trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; + } + + mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id, + fib_entry->key.prefix_len, *p_dip); + mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id, + adjacency_index, ecmp_size); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, enum mlxsw_reg_ralue_op op) @@ -614,7 +1601,7 @@ static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp, { switch (fib_entry->type) { case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: - return -EINVAL; + return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_TRAP: @@ -694,7 +1681,17 @@ mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp, fib_entry->rif = r->rif; return 0; } - return -EINVAL; + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; + return mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi); +} + +static void +mlxsw_sp_router_fib4_entry_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_REMOTE) + return; + mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry); } static int @@ -738,6 +1735,7 @@ mlxsw_sp_router_fib4_add_prepare(struct mlxsw_sp_port *mlxsw_sp_port, return 0; err_alloc_info: + mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry); err_fib4_entry_init: mlxsw_sp_fib_entry_destroy(fib_entry); err_fib_entry_create: @@ -772,6 +1770,7 @@ mlxsw_sp_router_fib4_add_commit(struct mlxsw_sp_port *mlxsw_sp_port, err_fib_entry_add: mlxsw_sp_fib_entry_remove(vr->fib, fib_entry); err_fib_entry_insert: + mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry); mlxsw_sp_fib_entry_destroy(fib_entry); mlxsw_sp_vr_put(mlxsw_sp, vr); return err; @@ -808,6 +1807,7 @@ int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port, } mlxsw_sp_fib_entry_del(mlxsw_sp_port->mlxsw_sp, fib_entry); mlxsw_sp_fib_entry_remove(vr->fib, fib_entry); + mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry); mlxsw_sp_fib_entry_destroy(fib_entry); mlxsw_sp_vr_put(mlxsw_sp, vr); return 0; diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 28b775e5a9ad..f0b09b05ed3f 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -1996,7 +1996,8 @@ static int rocker_port_change_proto_down(struct net_device *dev, return 0; } -static void rocker_port_neigh_destroy(struct neighbour *n) +static void rocker_port_neigh_destroy(struct net_device *dev, + struct neighbour *n) { struct rocker_port *rocker_port = netdev_priv(n->dev); int err; diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index f9eebea83516..a380649bf6b5 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2002,6 +2002,8 @@ static const struct net_device_ops team_netdev_ops = { .ndo_add_slave = team_add_slave, .ndo_del_slave = team_del_slave, .ndo_fix_features = team_fix_features, + .ndo_neigh_construct = netdev_default_l2upper_neigh_construct, + .ndo_neigh_destroy = netdev_default_l2upper_neigh_destroy, .ndo_change_carrier = team_change_carrier, .ndo_bridge_setlink = switchdev_port_bridge_setlink, .ndo_bridge_getlink = switchdev_port_bridge_getlink, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0c6ee2c5099f..49736a31acaa 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1209,8 +1209,10 @@ struct net_device_ops { netdev_features_t features); int (*ndo_set_features)(struct net_device *dev, netdev_features_t features); - int (*ndo_neigh_construct)(struct neighbour *n); - void (*ndo_neigh_destroy)(struct neighbour *n); + int (*ndo_neigh_construct)(struct net_device *dev, + struct neighbour *n); + void (*ndo_neigh_destroy)(struct net_device *dev, + struct neighbour *n); int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[], @@ -3843,6 +3845,10 @@ void *netdev_lower_dev_get_private(struct net_device *dev, struct net_device *lower_dev); void netdev_lower_state_changed(struct net_device *lower_dev, void *lower_state_info); +int netdev_default_l2upper_neigh_construct(struct net_device *dev, + struct neighbour *n); +void netdev_default_l2upper_neigh_destroy(struct net_device *dev, + struct neighbour *n); /* RSS keys are 40 or 52 bytes long */ #define NETDEV_RSS_KEY_LEN 52 diff --git a/include/net/netevent.h b/include/net/netevent.h index d8bbb38584b6..f440df172b56 100644 --- a/include/net/netevent.h +++ b/include/net/netevent.h @@ -24,6 +24,7 @@ struct netevent_redirect { enum netevent_notif_type { NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ + NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */ }; int register_netevent_notifier(struct notifier_block *nb); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 86ae75b77390..c8f422c90856 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -790,6 +790,8 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_netpoll_cleanup = vlan_dev_netpoll_cleanup, #endif .ndo_fix_features = vlan_dev_fix_features, + .ndo_neigh_construct = netdev_default_l2upper_neigh_construct, + .ndo_neigh_destroy = netdev_default_l2upper_neigh_destroy, .ndo_fdb_add = switchdev_port_fdb_add, .ndo_fdb_del = switchdev_port_fdb_del, .ndo_fdb_dump = switchdev_port_fdb_dump, diff --git a/net/atm/clip.c b/net/atm/clip.c index e07f551a863c..53b4ac09e7b7 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -286,7 +286,7 @@ static const struct neigh_ops clip_neigh_ops = { .connected_output = neigh_direct_output, }; -static int clip_constructor(struct neighbour *neigh) +static int clip_constructor(struct net_device *dev, struct neighbour *neigh) { struct atmarp_entry *entry = neighbour_priv(neigh); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 0c39e0f6da09..8eecd0ec22f2 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -349,6 +349,8 @@ static const struct net_device_ops br_netdev_ops = { .ndo_add_slave = br_add_slave, .ndo_del_slave = br_del_slave, .ndo_fix_features = br_fix_features, + .ndo_neigh_construct = netdev_default_l2upper_neigh_construct, + .ndo_neigh_destroy = netdev_default_l2upper_neigh_destroy, .ndo_fdb_add = br_fdb_add, .ndo_fdb_del = br_fdb_delete, .ndo_fdb_dump = br_fdb_dump, diff --git a/net/core/dev.c b/net/core/dev.c index a4f3b0a9aeaf..b92d63bfde7a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6087,6 +6087,50 @@ void netdev_lower_state_changed(struct net_device *lower_dev, } EXPORT_SYMBOL(netdev_lower_state_changed); +int netdev_default_l2upper_neigh_construct(struct net_device *dev, + struct neighbour *n) +{ + struct net_device *lower_dev, *stop_dev; + struct list_head *iter; + int err; + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + if (!lower_dev->netdev_ops->ndo_neigh_construct) + continue; + err = lower_dev->netdev_ops->ndo_neigh_construct(lower_dev, n); + if (err) { + stop_dev = lower_dev; + goto rollback; + } + } + return 0; + +rollback: + netdev_for_each_lower_dev(dev, lower_dev, iter) { + if (lower_dev == stop_dev) + break; + if (!lower_dev->netdev_ops->ndo_neigh_destroy) + continue; + lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n); + } + return err; +} +EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_construct); + +void netdev_default_l2upper_neigh_destroy(struct net_device *dev, + struct neighbour *n) +{ + struct net_device *lower_dev; + struct list_head *iter; + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + if (!lower_dev->netdev_ops->ndo_neigh_destroy) + continue; + lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n); + } +} +EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_destroy); + static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 510cd62fcb99..5cdc62a8eb84 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -473,7 +473,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, } if (dev->netdev_ops->ndo_neigh_construct) { - error = dev->netdev_ops->ndo_neigh_construct(n); + error = dev->netdev_ops->ndo_neigh_construct(dev, n); if (error < 0) { rc = ERR_PTR(error); goto out_neigh_release; @@ -701,7 +701,7 @@ void neigh_destroy(struct neighbour *neigh) neigh->arp_queue_len_bytes = 0; if (dev->netdev_ops->ndo_neigh_destroy) - dev->netdev_ops->ndo_neigh_destroy(neigh); + dev->netdev_ops->ndo_neigh_destroy(dev, neigh); dev_put(dev); neigh_parms_put(neigh->parms); @@ -2047,6 +2047,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) case NDTPA_DELAY_PROBE_TIME: NEIGH_VAR_SET(p, DELAY_PROBE_TIME, nla_get_msecs(tbp[i])); + call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); break; case NDTPA_RETRANS_TIME: NEIGH_VAR_SET(p, RETRANS_TIME, @@ -2930,6 +2931,7 @@ static void neigh_proc_update(struct ctl_table *ctl, int write) return; set_bit(index, p->data_state); + call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); if (!dev) /* NULL dev means this is default value */ neigh_copy_dflt_parms(net, p, index); } diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 8c004a0c8d64..935ab932e841 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -81,7 +81,7 @@ static int lowpan_stop(struct net_device *dev) return 0; } -static int lowpan_neigh_construct(struct neighbour *n) +static int lowpan_neigh_construct(struct net_device *dev, struct neighbour *n) { struct lowpan_802154_neigh *neigh = lowpan_802154_neigh(neighbour_priv(n)); |