From 48ce4d1e14c045d51aff4fce17b6a3ea6501dace Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 5 Sep 2022 18:07:06 +0300 Subject: [PATCH] nvme-tcp: fix regression that causes sporadic requests to time out When we queue requests, we strive to batch as much as possible and also signal the network stack that more data is about to be sent over a socket with MSG_SENDPAGE_NOTLAST. This flag looks at the pending requests queued as well as queue->more_requests that is derived from the block layer last-in-batch indication. We set more_request=true when we flush the request directly from .queue_rq submission context (in nvme_tcp_send_all), however this is wrongly assuming that no other requests may be queued during the execution of nvme_tcp_send_all. Due to this, a race condition may happen where: 1. request X is queued as !last-in-batch 2. request X submission context calls nvme_tcp_send_all directly 3. nvme_tcp_send_all is preempted and schedules to a different cpu 4. request Y is queued as last-in-batch 5. nvme_tcp_send_all context sends request X+Y, however signals for both MSG_SENDPAGE_NOTLAST because queue->more_requests=true. ==> none of the requests is pushed down to the wire as the network stack is waiting for more data, both requests timeout. To fix this, we eliminate queue->more_requests and only rely on the queue req_list and send_list to be not-empty. Fixes: 4158266ca0b6 ("nvme-tcp: optimize network stack with setting msg flags according to batch size") Reported-by: Jonathan Nicklin Signed-off-by: Sagi Grimberg Tested-by: Jonathan Nicklin Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 54b4e8a7fe42c..d5871fd6f769b 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -121,7 +121,6 @@ struct nvme_tcp_queue { struct mutex send_mutex; struct llist_head req_list; struct list_head send_list; - bool more_requests; /* recv state */ void *pdu; @@ -320,7 +319,7 @@ static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue) static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue) { return !list_empty(&queue->send_list) || - !llist_empty(&queue->req_list) || queue->more_requests; + !llist_empty(&queue->req_list); } static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, @@ -339,9 +338,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, */ if (queue->io_cpu == raw_smp_processor_id() && sync && empty && mutex_trylock(&queue->send_mutex)) { - queue->more_requests = !last; nvme_tcp_send_all(queue); - queue->more_requests = false; mutex_unlock(&queue->send_mutex); } -- 2.39.5