This patch caches doorbell address directly in struct mlx4_en_tx_ring.
This removes the need to bring in cpu caches whole struct mlx4_uar
in fast path.
Note that mlx4_uar is not guaranteed to be on a local node,
because mlx4_bf_alloc() uses a single free list (priv->bf_list)
regardless of its node parameter.
This kind of change does matter in presence of light/moderate traffic.
In high stress, this read-only line would be kept hot in caches.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
ring->bf_enabled = !!(priv->pflags &
MLX4_EN_PRIV_FLAGS_BLUEFLAME);
}
+ ring->doorbell_address = ring->bf.uar->map + MLX4_SEND_DOORBELL;
ring->hwtstamp_tx_type = priv->hwtstamp_config.tx_type;
ring->queue_index = queue_index;
#else
iowrite32be(
#endif
- (__force u32)ring->doorbell_qpn,
- ring->bf.uar->map + MLX4_SEND_DOORBELL);
+ (__force u32)ring->doorbell_qpn, ring->doorbell_address);
}
static void mlx4_en_tx_write_desc(struct mlx4_en_tx_ring *ring,
struct mlx4_bf bf;
/* Following part should be mostly read */
+ void __iomem *doorbell_address;
__be32 doorbell_qpn;
__be32 mr_key;
u32 size; /* number of TXBBs */