net/mlx4_en: avoid one cache line miss to ring doorbell
authorEric Dumazet <edumazet@google.com>
Fri, 1 Oct 2021 00:52:49 +0000 (17:52 -0700)
committerDavid S. Miller <davem@davemloft.net>
Mon, 4 Oct 2021 11:50:13 +0000 (12:50 +0100)
This patch caches doorbell address directly in struct mlx4_en_tx_ring.

This removes the need to bring in cpu caches whole struct mlx4_uar
in fast path.

Note that mlx4_uar is not guaranteed to be on a local node,
because mlx4_bf_alloc() uses a single free list (priv->bf_list)
regardless of its node parameter.

This kind of change does matter in presence of light/moderate traffic.
In high stress, this read-only line would be kept hot in caches.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/mellanox/mlx4/en_tx.c
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h

index c56b9dba4c71898b61e87fd32e5fa523c313e445..817f4154b86d599cd593876ec83529051d95fe2f 100644 (file)
@@ -130,6 +130,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
                ring->bf_enabled = !!(priv->pflags &
                                      MLX4_EN_PRIV_FLAGS_BLUEFLAME);
        }
+       ring->doorbell_address = ring->bf.uar->map + MLX4_SEND_DOORBELL;
 
        ring->hwtstamp_tx_type = priv->hwtstamp_config.tx_type;
        ring->queue_index = queue_index;
@@ -753,8 +754,7 @@ void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring)
 #else
        iowrite32be(
 #endif
-                 (__force u32)ring->doorbell_qpn,
-                 ring->bf.uar->map + MLX4_SEND_DOORBELL);
+                 (__force u32)ring->doorbell_qpn, ring->doorbell_address);
 }
 
 static void mlx4_en_tx_write_desc(struct mlx4_en_tx_ring *ring,
index ad0a8b488832c8cdca2790e47fc778fe15686f7f..e132ff4c82f2d33045f6c9aeecaaa409a41e0b0d 100644 (file)
@@ -283,6 +283,7 @@ struct mlx4_en_tx_ring {
        struct mlx4_bf          bf;
 
        /* Following part should be mostly read */
+       void __iomem            *doorbell_address;
        __be32                  doorbell_qpn;
        __be32                  mr_key;
        u32                     size; /* number of TXBBs */