OFFLOADS paring using devcom is possible only on devices
that support LAG. Filter based on lag capabilities.
This fixes an issue where mlx5_get_next_phys_dev() was
called without holding the interface lock.
This issue was found when commit
00f22b794b6d ("net/mlx5: Lag, filter non compatible devices")
added an assert that verifies the interface lock is held.
WARNING: CPU: 9 PID: 1706 at drivers/net/ethernet/mellanox/mlx5/core/dev.c:642 mlx5_get_next_phys_dev+0xd2/0x100 [mlx5_core]
Modules linked in: mlx5_vdpa vringh vhost_iotlb vdpa mlx5_ib mlx5_core xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi rdma_cm iw_cm ib_umad ib_ipoib ib_cm ib_uverbs ib_core overlay fuse [last unloaded: mlx5_core]
CPU: 9 PID: 1706 Comm: devlink Not tainted 5.18.0-rc7+ #11
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
RIP: 0010:mlx5_get_next_phys_dev+0xd2/0x100 [mlx5_core]
Code: 02 00 75 48 48 8b 85 80 04 00 00 5d c3 31 c0 5d c3 be ff ff ff ff 48 c7 c7 08 41 5b a0 e8 36 87 28 e3 85 c0 0f 85 6f ff ff ff <0f> 0b e9 68 ff ff ff 48 c7 c7 0c 91 cc 84 e8 cb 36 6f e1 e9 4d ff
RSP: 0018:
ffff88811bf47458 EFLAGS:
00010246
RAX:
0000000000000000 RBX:
ffff88811b398000 RCX:
0000000000000001
RDX:
0000000080000000 RSI:
ffffffffa05b4108 RDI:
ffff88812daaaa78
RBP:
ffff88812d050380 R08:
0000000000000001 R09:
ffff88811d6b3437
R10:
0000000000000001 R11:
00000000fddd3581 R12:
ffff88815238c000
R13:
ffff88812d050380 R14:
ffff8881018aa7e0 R15:
ffff88811d6b3428
FS:
00007fc82e18ae80(0000) GS:
ffff88842e080000(0000) knlGS:
0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0:
0000000080050033
CR2:
00007f9630d1b421 CR3:
0000000149802004 CR4:
0000000000370ea0
DR0:
0000000000000000 DR1:
0000000000000000 DR2:
0000000000000000
DR3:
0000000000000000 DR6:
00000000fffe0ff0 DR7:
0000000000000400
Call Trace:
<TASK>
mlx5_esw_offloads_devcom_event+0x99/0x3b0 [mlx5_core]
mlx5_devcom_send_event+0x167/0x1d0 [mlx5_core]
esw_offloads_enable+0x1153/0x1500 [mlx5_core]
? mlx5_esw_offloads_controller_valid+0x170/0x170 [mlx5_core]
? wait_for_completion_io_timeout+0x20/0x20
? mlx5_rescan_drivers_locked+0x318/0x810 [mlx5_core]
mlx5_eswitch_enable_locked+0x586/0xc50 [mlx5_core]
? mlx5_eswitch_disable_pf_vf_vports+0x1d0/0x1d0 [mlx5_core]
? mlx5_esw_try_lock+0x1b/0xb0 [mlx5_core]
? mlx5_eswitch_enable+0x270/0x270 [mlx5_core]
? __debugfs_create_file+0x260/0x3e0
mlx5_devlink_eswitch_mode_set+0x27e/0x870 [mlx5_core]
? mutex_lock_io_nested+0x12c0/0x12c0
? esw_offloads_disable+0x250/0x250 [mlx5_core]
? devlink_nl_cmd_trap_get_dumpit+0x470/0x470
? rcu_read_lock_sched_held+0x3f/0x70
devlink_nl_cmd_eswitch_set_doit+0x217/0x620
Fixes: 40c70c93f580 ("net/mlx5: E-Switch, handle devcom events only for ports on the same device")
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
return pci_get_drvdata(to_pci_dev(other));
}
-static int next_phys_dev(struct device *dev, const void *data)
-{
- struct mlx5_core_dev *mdev, *this = (struct mlx5_core_dev *)data;
-
- mdev = pci_get_other_drvdata(this->device, dev);
- if (!mdev)
- return 0;
-
- return _next_phys_dev(mdev, data);
-}
-
static int next_phys_dev_lag(struct device *dev, const void *data)
{
struct mlx5_core_dev *mdev, *this = (struct mlx5_core_dev *)data;
return pci_get_drvdata(to_pci_dev(next));
}
-/* Must be called with intf_mutex held */
-struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
-{
- lockdep_assert_held(&mlx5_intf_mutex);
- return mlx5_get_next_dev(dev, &next_phys_dev);
-}
-
/* Must be called with intf_mutex held */
struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev)
{
switch (event) {
case ESW_OFFLOADS_DEVCOM_PAIR:
- if (mlx5_get_next_phys_dev(esw->dev) != peer_esw->dev)
- break;
-
if (mlx5_eswitch_vport_match_metadata_enabled(esw) !=
mlx5_eswitch_vport_match_metadata_enabled(peer_esw))
break;
if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
return;
+ if (!mlx5_is_lag_supported(esw->dev))
+ return;
+
mlx5_devcom_register_component(devcom,
MLX5_DEVCOM_ESW_OFFLOADS,
mlx5_esw_offloads_devcom_event,
if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
return;
+ if (!mlx5_is_lag_supported(esw->dev))
+ return;
+
mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS,
ESW_OFFLOADS_DEVCOM_UNPAIR, esw);
struct lag_mpesw lag_mpesw;
};
+static inline bool mlx5_is_lag_supported(struct mlx5_core_dev *dev)
+{
+ if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
+ !MLX5_CAP_GEN(dev, lag_master) ||
+ MLX5_CAP_GEN(dev, num_lag_ports) < 2 ||
+ MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS)
+ return false;
+ return true;
+}
+
static inline struct mlx5_lag *
mlx5_lag_dev(struct mlx5_core_dev *dev)
{
void mlx5_detach_device(struct mlx5_core_dev *dev);
int mlx5_register_device(struct mlx5_core_dev *dev);
void mlx5_unregister_device(struct mlx5_core_dev *dev);
-struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev);
struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev);
void mlx5_dev_list_lock(void);
void mlx5_dev_list_unlock(void);