#include <linux/mlx5/driver.h>
#include "mlx5_core.h"
#include "mlx5_irq.h"
-#include "sf/sf.h"
+#include "lib/sf.h"
#ifdef CONFIG_RFS_ACCEL
#include <linux/cpu_rmap.h>
#endif
/* min num of vectores for SFs to be enabled */
#define MLX5_IRQ_VEC_COMP_BASE_SF 2
+#define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
+#define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
+#define MLX5_EQ_SHARE_IRQ_MIN_COMP (1)
+#define MLX5_EQ_SHARE_IRQ_MIN_CTRL (4)
+#define MLX5_EQ_REFS_PER_IRQ (2)
+
struct mlx5_irq {
u32 index;
struct atomic_notifier_head nh;
struct mlx5_irq_pool {
char name[MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS];
struct xa_limit xa_num_irqs;
+ struct mutex lock; /* sync IRQs creations */
struct xarray irqs;
+ u32 max_threshold;
+ u32 min_threshold;
struct mlx5_core_dev *dev;
};
static void irq_put(struct mlx5_irq *irq)
{
+ struct mlx5_irq_pool *pool = irq->pool;
+
+ mutex_lock(&pool->lock);
kref_put(&irq->kref, irq_release);
+ mutex_unlock(&pool->lock);
}
static irqreturn_t irq_int_handler(int irq, void *nh)
err = -ENOMEM;
goto err_cpumask;
}
- err = xa_alloc(&pool->irqs, &irq->index, irq, pool->xa_num_irqs,
- GFP_KERNEL);
+ kref_init(&irq->kref);
+ irq->index = i;
+ err = xa_err(xa_store(&pool->irqs, irq->index, irq, GFP_KERNEL));
if (err) {
mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
irq->index, err);
goto err_xa;
}
irq->pool = pool;
- kref_init(&irq->kref);
return irq;
err_xa:
free_cpumask_var(irq->mask);
return irq->mask;
}
+int mlx5_irq_get_index(struct mlx5_irq *irq)
+{
+ return irq->index;
+}
+
+/* irq_pool API */
+
+/* creating an irq from irq_pool */
+static struct mlx5_irq *irq_pool_create_irq(struct mlx5_irq_pool *pool,
+ struct cpumask *affinity)
+{
+ struct mlx5_irq *irq;
+ u32 irq_index;
+ int err;
+
+ err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs,
+ GFP_KERNEL);
+ if (err)
+ return ERR_PTR(err);
+ irq = irq_request(pool, irq_index);
+ if (IS_ERR(irq))
+ return irq;
+ cpumask_copy(irq->mask, affinity);
+ irq_set_affinity_hint(irq->irqn, irq->mask);
+ return irq;
+}
+
+/* looking for the irq with the smallest refcount and the same affinity */
+static struct mlx5_irq *irq_pool_find_least_loaded(struct mlx5_irq_pool *pool,
+ struct cpumask *affinity)
+{
+ int start = pool->xa_num_irqs.min;
+ int end = pool->xa_num_irqs.max;
+ struct mlx5_irq *irq = NULL;
+ struct mlx5_irq *iter;
+ unsigned long index;
+
+ lockdep_assert_held(&pool->lock);
+ xa_for_each_range(&pool->irqs, index, iter, start, end) {
+ if (!cpumask_equal(iter->mask, affinity))
+ continue;
+ if (kref_read(&iter->kref) < pool->min_threshold)
+ return iter;
+ if (!irq || kref_read(&iter->kref) <
+ kref_read(&irq->kref))
+ irq = iter;
+ }
+ return irq;
+}
+
+/* requesting an irq from a given pool according to given affinity */
+static struct mlx5_irq *irq_pool_request_affinity(struct mlx5_irq_pool *pool,
+ struct cpumask *affinity)
+{
+ struct mlx5_irq *least_loaded_irq, *new_irq;
+
+ mutex_lock(&pool->lock);
+ least_loaded_irq = irq_pool_find_least_loaded(pool, affinity);
+ if (least_loaded_irq &&
+ kref_read(&least_loaded_irq->kref) < pool->min_threshold)
+ goto out;
+ new_irq = irq_pool_create_irq(pool, affinity);
+ if (IS_ERR(new_irq)) {
+ if (!least_loaded_irq) {
+ mlx5_core_err(pool->dev, "Didn't find IRQ for cpu = %u\n",
+ cpumask_first(affinity));
+ mutex_unlock(&pool->lock);
+ return new_irq;
+ }
+ /* We failed to create a new IRQ for the requested affinity,
+ * sharing existing IRQ.
+ */
+ goto out;
+ }
+ least_loaded_irq = new_irq;
+ goto unlock;
+out:
+ kref_get(&least_loaded_irq->kref);
+ if (kref_read(&least_loaded_irq->kref) > pool->max_threshold)
+ mlx5_core_dbg(pool->dev, "IRQ %u overloaded, pool_name: %s, %u EQs on this irq\n",
+ least_loaded_irq->irqn, pool->name,
+ kref_read(&least_loaded_irq->kref) / MLX5_EQ_REFS_PER_IRQ);
+unlock:
+ mutex_unlock(&pool->lock);
+ return least_loaded_irq;
+}
+
+/* requesting an irq from a given pool according to given index */
+static struct mlx5_irq *
+irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
+ struct cpumask *affinity)
+{
+ struct mlx5_irq *irq;
+
+ mutex_lock(&pool->lock);
+ irq = xa_load(&pool->irqs, vecidx);
+ if (irq) {
+ kref_get(&irq->kref);
+ goto unlock;
+ }
+ irq = irq_request(pool, vecidx);
+ if (IS_ERR(irq) || !affinity)
+ goto unlock;
+ cpumask_copy(irq->mask, affinity);
+ irq_set_affinity_hint(irq->irqn, irq->mask);
+unlock:
+ mutex_unlock(&pool->lock);
+ return irq;
+}
+
+static struct mlx5_irq_pool *find_sf_irq_pool(struct mlx5_irq_table *irq_table,
+ int i, struct cpumask *affinity)
+{
+ if (cpumask_empty(affinity) && i == MLX5_IRQ_EQ_CTRL)
+ return irq_table->sf_ctrl_pool;
+ return irq_table->sf_comp_pool;
+}
+
/**
* mlx5_irq_release - release an IRQ back to the system.
* @irq: irq to be released.
*
* This function returns a pointer to IRQ, or ERR_PTR in case of error.
*/
-struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, int vecidx,
+struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
struct cpumask *affinity)
{
struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
struct mlx5_irq_pool *pool;
struct mlx5_irq *irq;
- pool = irq_table->pf_pool;
-
- irq = xa_load(&pool->irqs, vecidx);
- if (irq) {
- kref_get(&irq->kref);
- return irq;
+ if (mlx5_core_is_sf(dev)) {
+ pool = find_sf_irq_pool(irq_table, vecidx, affinity);
+ if (!pool)
+ /* we don't have IRQs for SFs, using the PF IRQs */
+ goto pf_irq;
+ if (cpumask_empty(affinity) && !strcmp(pool->name, "mlx5_sf_comp"))
+ /* In case an SF user request IRQ with vecidx */
+ irq = irq_pool_request_vector(pool, vecidx, NULL);
+ else
+ irq = irq_pool_request_affinity(pool, affinity);
+ goto out;
}
- irq = irq_request(pool, vecidx);
+pf_irq:
+ pool = irq_table->pf_pool;
+ irq = irq_pool_request_vector(pool, vecidx, affinity);
+out:
if (IS_ERR(irq))
return irq;
- cpumask_copy(irq->mask, affinity);
- irq_set_affinity_hint(irq->irqn, irq->mask);
+ mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
+ irq->irqn, cpumask_pr_args(affinity),
+ kref_read(&irq->kref) / MLX5_EQ_REFS_PER_IRQ);
return irq;
}
-/* irq_pool API */
-
static struct mlx5_irq_pool *
-irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name)
+irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name,
+ u32 min_threshold, u32 max_threshold)
{
struct mlx5_irq_pool *pool = kvzalloc(sizeof(*pool), GFP_KERNEL);
if (name)
snprintf(pool->name, MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS,
name);
+ pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ;
+ pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ;
+ mutex_init(&pool->lock);
mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d",
name, size, start);
return pool;
int err;
/* init pf_pool */
- table->pf_pool = irq_pool_alloc(dev, 0, pf_vec, NULL);
+ table->pf_pool = irq_pool_alloc(dev, 0, pf_vec, NULL,
+ MLX5_EQ_SHARE_IRQ_MIN_COMP,
+ MLX5_EQ_SHARE_IRQ_MAX_COMP);
if (IS_ERR(table->pf_pool))
return PTR_ERR(table->pf_pool);
if (!mlx5_sf_max_functions(dev))
num_sf_ctrl = min_t(int, num_sf_ctrl_by_msix, num_sf_ctrl_by_sfs);
num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl);
table->sf_ctrl_pool = irq_pool_alloc(dev, pf_vec, num_sf_ctrl,
- "mlx5_sf_ctrl");
+ "mlx5_sf_ctrl",
+ MLX5_EQ_SHARE_IRQ_MIN_CTRL,
+ MLX5_EQ_SHARE_IRQ_MAX_CTRL);
if (IS_ERR(table->sf_ctrl_pool)) {
err = PTR_ERR(table->sf_ctrl_pool);
goto err_pf;
}
/* init sf_comp_pool */
table->sf_comp_pool = irq_pool_alloc(dev, pf_vec + num_sf_ctrl,
- sf_vec - num_sf_ctrl, "mlx5_sf_comp");
+ sf_vec - num_sf_ctrl, "mlx5_sf_comp",
+ MLX5_EQ_SHARE_IRQ_MIN_COMP,
+ MLX5_EQ_SHARE_IRQ_MAX_COMP);
if (IS_ERR(table->sf_comp_pool)) {
err = PTR_ERR(table->sf_comp_pool);
goto err_sf_ctrl;
pci_free_irq_vectors(dev->pdev);
}
+int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table)
+{
+ if (table->sf_comp_pool)
+ return table->sf_comp_pool->xa_num_irqs.max -
+ table->sf_comp_pool->xa_num_irqs.min + 1;
+ else
+ return mlx5_irq_table_get_num_comp(table);
+}
+
struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev)
{
#ifdef CONFIG_MLX5_SF