]> git.baikalelectronics.ru Git - kernel.git/commitdiff
net/mlx5: Add command entry handling completion
authorMoshe Shemesh <moshe@mellanox.com>
Fri, 27 Dec 2019 05:01:53 +0000 (07:01 +0200)
committerSaeed Mahameed <saeedm@mellanox.com>
Sat, 23 May 2020 00:28:34 +0000 (17:28 -0700)
When FW response to commands is very slow and all command entries in
use are waiting for completion we can have a race where commands can get
timeout before they get out of the queue and handled. Timeout
completion on uninitialized command will cause releasing command's
buffers before accessing it for initialization and then we will get NULL
pointer exception while trying access it. It may also cause releasing
buffers of another command since we may have timeout completion before
even allocating entry index for this command.
Add entry handling completion to avoid this race.

Fixes: ffaf2c1a3989 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
drivers/net/ethernet/mellanox/mlx5/core/cmd.c
include/linux/mlx5/driver.h

index cede5bdfd598e49b44ce133b61bc6509faf923fa..d695b75bc0af9e1313cd710cab001914881fd3a3 100644 (file)
@@ -861,6 +861,7 @@ static void cmd_work_handler(struct work_struct *work)
        int alloc_ret;
        int cmd_mode;
 
+       complete(&ent->handling);
        sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
        down(sem);
        if (!ent->page_queue) {
@@ -978,6 +979,11 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
        struct mlx5_cmd *cmd = &dev->cmd;
        int err;
 
+       if (!wait_for_completion_timeout(&ent->handling, timeout) &&
+           cancel_work_sync(&ent->work)) {
+               ent->ret = -ECANCELED;
+               goto out_err;
+       }
        if (cmd->mode == CMD_MODE_POLLING || ent->polling) {
                wait_for_completion(&ent->done);
        } else if (!wait_for_completion_timeout(&ent->done, timeout)) {
@@ -985,12 +991,17 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
                mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
        }
 
+out_err:
        err = ent->ret;
 
        if (err == -ETIMEDOUT) {
                mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
                               mlx5_command_str(msg_to_opcode(ent->in)),
                               msg_to_opcode(ent->in));
+       } else if (err == -ECANCELED) {
+               mlx5_core_warn(dev, "%s(0x%x) canceled on out of queue timeout.\n",
+                              mlx5_command_str(msg_to_opcode(ent->in)),
+                              msg_to_opcode(ent->in));
        }
        mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n",
                      err, deliv_status_to_str(ent->status), ent->status);
@@ -1026,6 +1037,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
        ent->token = token;
        ent->polling = force_polling;
 
+       init_completion(&ent->handling);
        if (!callback)
                init_completion(&ent->done);
 
@@ -1045,6 +1057,8 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
        err = wait_func(dev, ent);
        if (err == -ETIMEDOUT)
                goto out;
+       if (err == -ECANCELED)
+               goto out_free;
 
        ds = ent->ts2 - ent->ts1;
        op = MLX5_GET(mbox_in, in->first.data, opcode);
index 6f8f79ef829b1829b9c19eba43857c4467d8fa9b..9b1f29f26c2789952cb832e55539c1ade8691638 100644 (file)
@@ -743,6 +743,7 @@ struct mlx5_cmd_work_ent {
        struct delayed_work     cb_timeout_work;
        void                   *context;
        int                     idx;
+       struct completion       handling;
        struct completion       done;
        struct mlx5_cmd        *cmd;
        struct work_struct      work;