Currently, there are three eswitch modes, none, legacy and switchdev.
None is the default mode. Remove redundant none mode as eswitch mode
should always be either legacy mode or switchdev mode.
With this patch, there are two behavior changes:
1. Legacy becomes the default mode. When querying eswitch mode using
devlink, a valid mode is always returned.
2. When disabling sriov, the eswitch mode will not change, only vfs
are unloaded.
Signed-off-by: Chris Mi <cmi@nvidia.com>
Reviewed-by: Maor Dickman <maord@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
{
const u32 *out;
- WARN_ON_ONCE(esw->mode != MLX5_ESWITCH_NONE);
-
if (num_vfs < 0)
return;
return 0;
abort:
- esw->mode = MLX5_ESWITCH_NONE;
+ esw->mode = MLX5_ESWITCH_LEGACY;
if (mode == MLX5_ESWITCH_OFFLOADS)
mlx5_rescan_drivers(esw->dev);
if (!mlx5_esw_allowed(esw))
return 0;
- toggle_lag = esw->mode == MLX5_ESWITCH_NONE;
+ toggle_lag = !mlx5_esw_is_fdb_created(esw);
if (toggle_lag)
mlx5_lag_disable_change(esw->dev);
down_write(&esw->mode_lock);
- if (esw->mode == MLX5_ESWITCH_NONE) {
+ if (!mlx5_esw_is_fdb_created(esw)) {
ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs);
} else {
enum mlx5_eswitch_vport_event vport_events;
return ret;
}
-void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf)
+/* When disabling sriov, free driver level resources. */
+void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf)
{
- struct devlink *devlink = priv_to_devlink(esw->dev);
- int old_mode;
-
- lockdep_assert_held_write(&esw->mode_lock);
-
- if (esw->mode == MLX5_ESWITCH_NONE)
+ if (!mlx5_esw_allowed(esw))
return;
- esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n",
+ down_write(&esw->mode_lock);
+ /* If driver is unloaded, this function is called twice by remove_one()
+ * and mlx5_unload(). Prevent the second call.
+ */
+ if (!esw->esw_funcs.num_vfs && !clear_vf)
+ goto unlock;
+
+ esw_info(esw->dev, "Unload vfs: mode(%s), nvfs(%d), active vports(%d)\n",
esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS",
esw->esw_funcs.num_vfs, esw->enabled_vports);
+ mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs);
+ if (clear_vf)
+ mlx5_eswitch_clear_vf_vports_info(esw);
+ /* If disabling sriov in switchdev mode, free meta rules here
+ * because it depends on num_vfs.
+ */
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS) {
+ struct devlink *devlink = priv_to_devlink(esw->dev);
+
+ esw_offloads_del_send_to_vport_meta_rules(esw);
+ devlink_rate_nodes_destroy(devlink);
+ }
+
+ esw->esw_funcs.num_vfs = 0;
+
+unlock:
+ up_write(&esw->mode_lock);
+}
+
+/* Free resources for corresponding eswitch mode. It is called by devlink
+ * when changing eswitch mode or modprobe when unloading driver.
+ */
+void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw)
+{
+ struct devlink *devlink = priv_to_devlink(esw->dev);
+
/* Notify eswitch users that it is exiting from current mode.
* So that it can do necessary cleanup before the eswitch is disabled.
*/
- mlx5_esw_mode_change_notify(esw, MLX5_ESWITCH_NONE);
+ mlx5_esw_mode_change_notify(esw, MLX5_ESWITCH_LEGACY);
mlx5_eswitch_event_handlers_unregister(esw);
+ esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n",
+ esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS",
+ esw->esw_funcs.num_vfs, esw->enabled_vports);
+
esw->fdb_table.flags &= ~MLX5_ESW_FDB_CREATED;
- if (esw->mode == MLX5_ESWITCH_LEGACY)
- esw_legacy_disable(esw);
- else if (esw->mode == MLX5_ESWITCH_OFFLOADS)
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS)
esw_offloads_disable(esw);
-
- old_mode = esw->mode;
- esw->mode = MLX5_ESWITCH_NONE;
-
- if (old_mode == MLX5_ESWITCH_OFFLOADS)
- mlx5_rescan_drivers(esw->dev);
-
- devlink_rate_nodes_destroy(devlink);
-
+ else if (esw->mode == MLX5_ESWITCH_LEGACY)
+ esw_legacy_disable(esw);
mlx5_esw_acls_ns_cleanup(esw);
- if (clear_vf)
- mlx5_eswitch_clear_vf_vports_info(esw);
+ if (esw->mode == MLX5_ESWITCH_OFFLOADS)
+ devlink_rate_nodes_destroy(devlink);
}
-void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf)
+void mlx5_eswitch_disable(struct mlx5_eswitch *esw)
{
if (!mlx5_esw_allowed(esw))
return;
mlx5_lag_disable_change(esw->dev);
down_write(&esw->mode_lock);
- mlx5_eswitch_disable_locked(esw, clear_vf);
- esw->esw_funcs.num_vfs = 0;
+ mlx5_eswitch_disable_locked(esw);
up_write(&esw->mode_lock);
mlx5_lag_enable_change(esw->dev);
}
refcount_set(&esw->qos.refcnt, 0);
esw->enabled_vports = 0;
- esw->mode = MLX5_ESWITCH_NONE;
+ esw->mode = MLX5_ESWITCH_LEGACY;
esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) &&
MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))
{
struct mlx5_eswitch *esw = dev->priv.eswitch;
- return mlx5_esw_allowed(esw) ? esw->mode : MLX5_ESWITCH_NONE;
+ return mlx5_esw_allowed(esw) ? esw->mode : MLX5_ESWITCH_LEGACY;
}
EXPORT_SYMBOL_GPL(mlx5_eswitch_mode);
int esw_offloads_enable(struct mlx5_eswitch *esw);
void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw);
int esw_offloads_init_reps(struct mlx5_eswitch *esw);
+void esw_offloads_del_send_to_vport_meta_rules(struct mlx5_eswitch *esw);
bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw);
int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable);
#define MLX5_ESWITCH_IGNORE_NUM_VFS (-1)
int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs);
int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs);
-void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf);
-void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf);
+void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf);
+void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw);
+void mlx5_eswitch_disable(struct mlx5_eswitch *esw);
int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
u16 vport, const u8 *mac);
int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; }
-static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) {}
+static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) {}
+static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {}
static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; }
static inline
int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, u16 vport, int link_state) { return 0; }
mlx5_del_flow_rules(flows[i]);
kvfree(flows);
+ /* If changing eswitch mode from switchdev to legacy, but num_vfs is not 0,
+ * meta rules could be freed again. So set it to NULL.
+ */
+ esw->fdb_table.offloads.send_to_vport_meta_rules = NULL;
+}
+
+void esw_offloads_del_send_to_vport_meta_rules(struct mlx5_eswitch *esw)
+{
+ mlx5_eswitch_del_send_to_vport_meta_rules(esw);
}
static int
if (!MLX5_CAP_GEN(dev, vport_group_manager))
return -EOPNOTSUPP;
- if (esw->mode == MLX5_ESWITCH_NONE)
+ if (!mlx5_esw_is_fdb_created(esw))
return -EOPNOTSUPP;
switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
{
int err, err1;
- mlx5_eswitch_disable_locked(esw, false);
err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_OFFLOADS,
esw->dev->priv.sriov.num_vfs);
if (err) {
int err = 0;
down_write(&esw->mode_lock);
- if (esw->mode != MLX5_ESWITCH_NONE) {
+ if (mlx5_esw_is_fdb_created(esw)) {
err = -EBUSY;
goto done;
}
{
int err, err1;
- mlx5_eswitch_disable_locked(esw, false);
err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY,
MLX5_ESWITCH_IGNORE_NUM_VFS);
if (err) {
return 0;
}
-static int eswitch_devlink_esw_mode_check(const struct mlx5_eswitch *esw)
-{
- /* devlink commands in NONE eswitch mode are currently supported only
- * on ECPF.
- */
- return (esw->mode == MLX5_ESWITCH_NONE &&
- !mlx5_core_is_ecpf_esw_manager(esw->dev)) ? -EOPNOTSUPP : 0;
-}
-
/* FIXME: devl_unlock() followed by devl_lock() inside driver callback
* is never correct and prone to races. It's a transitional workaround,
* never repeat this pattern.
if (cur_mlx5_mode == mlx5_mode)
goto unlock;
+ mlx5_eswitch_disable_locked(esw);
if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) {
if (mlx5_devlink_trap_get_num_active(esw->dev)) {
NL_SET_ERR_MSG_MOD(extack,
err = esw_offloads_start(esw, extack);
} else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) {
err = esw_offloads_stop(esw, extack);
+ mlx5_rescan_drivers(esw->dev);
} else {
err = -EINVAL;
}
return PTR_ERR(esw);
mlx5_eswtich_mode_callback_enter(devlink, esw);
- err = eswitch_devlink_esw_mode_check(esw);
- if (err)
- goto unlock;
-
err = esw_mode_to_devlink(esw->mode, mode);
-unlock:
mlx5_eswtich_mode_callback_exit(devlink, esw);
return err;
}
return PTR_ERR(esw);
mlx5_eswtich_mode_callback_enter(devlink, esw);
- err = eswitch_devlink_esw_mode_check(esw);
- if (err)
- goto out;
switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
return PTR_ERR(esw);
mlx5_eswtich_mode_callback_enter(devlink, esw);
- err = eswitch_devlink_esw_mode_check(esw);
- if (err)
- goto unlock;
-
err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
-unlock:
mlx5_eswtich_mode_callback_exit(devlink, esw);
return err;
}
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
struct mlx5_eswitch *esw;
- int err;
+ int err = 0;
esw = mlx5_devlink_eswitch_get(devlink);
if (IS_ERR(esw))
return PTR_ERR(esw);
mlx5_eswtich_mode_callback_enter(devlink, esw);
- err = eswitch_devlink_esw_mode_check(esw);
- if (err)
- goto unlock;
if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE &&
(!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) ||
enum devlink_eswitch_encap_mode *encap)
{
struct mlx5_eswitch *esw;
- int err;
esw = mlx5_devlink_eswitch_get(devlink);
if (IS_ERR(esw))
return PTR_ERR(esw);
mlx5_eswtich_mode_callback_enter(devlink, esw);
- err = eswitch_devlink_esw_mode_check(esw);
- if (err)
- goto unlock;
-
*encap = esw->offloads.encap;
-unlock:
mlx5_eswtich_mode_callback_exit(devlink, esw);
- return err;
+ return 0;
}
static bool
static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
{
#ifdef CONFIG_MLX5_ESWITCH
+ struct mlx5_core_dev *dev;
u8 mode;
#endif
int i;
return false;
#ifdef CONFIG_MLX5_ESWITCH
- mode = mlx5_eswitch_mode(ldev->pf[MLX5_LAG_P1].dev);
-
- if (mode != MLX5_ESWITCH_NONE && mode != MLX5_ESWITCH_OFFLOADS)
+ dev = ldev->pf[MLX5_LAG_P1].dev;
+ if ((mlx5_sriov_is_enabled(dev)) && !is_mdev_switchdev_mode(dev))
return false;
+ mode = mlx5_eswitch_mode(dev);
for (i = 0; i < ldev->ports; i++)
if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode)
return false;
#ifdef CONFIG_MLX5_ESWITCH
for (i = 0; i < ldev->ports; i++)
- roce_lag = roce_lag &&
- ldev->pf[i].dev->priv.eswitch->mode == MLX5_ESWITCH_NONE;
+ roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev);
#endif
return roce_lag;
{
mlx5_sf_dev_table_destroy(dev);
mlx5_sriov_detach(dev);
+ mlx5_eswitch_disable(dev->priv.eswitch);
mlx5_lag_remove_mdev(dev);
mlx5_ec_cleanup(dev);
mlx5_sf_hw_table_destroy(dev);
case MLX5_ESWITCH_OFFLOADS:
mlx5_sf_table_enable(table);
break;
- case MLX5_ESWITCH_NONE:
+ case MLX5_ESWITCH_LEGACY:
mlx5_sf_table_disable(table);
break;
default:
sriov->vfs_ctx[vf].enabled = 0;
}
- if (MLX5_ESWITCH_MANAGER(dev))
- mlx5_eswitch_disable(dev->priv.eswitch, clear_vf);
+ mlx5_eswitch_disable_sriov(dev->priv.eswitch, clear_vf);
if (mlx5_wait_for_pages(dev, &dev->priv.vfs_pages))
mlx5_core_warn(dev, "timeout reclaiming VFs pages\n");
#define MLX5_ESWITCH_MANAGER(mdev) MLX5_CAP_GEN(mdev, eswitch_manager)
enum {
- MLX5_ESWITCH_NONE,
MLX5_ESWITCH_LEGACY,
MLX5_ESWITCH_OFFLOADS
};
static inline u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev)
{
- return MLX5_ESWITCH_NONE;
+ return MLX5_ESWITCH_LEGACY;
}
static inline enum devlink_eswitch_encap_mode
#endif /* CONFIG_MLX5_ESWITCH */
+static inline bool is_mdev_legacy_mode(struct mlx5_core_dev *dev)
+{
+ return mlx5_eswitch_mode(dev) == MLX5_ESWITCH_LEGACY;
+}
+
static inline bool is_mdev_switchdev_mode(struct mlx5_core_dev *dev)
{
return mlx5_eswitch_mode(dev) == MLX5_ESWITCH_OFFLOADS;