From 7c2e6e82180c9d513187be47c239767f4ada4a5d Mon Sep 17 00:00:00 2001 From: "shaohua.li@intel.com" Date: Fri, 6 May 2011 11:34:41 -0600 Subject: [PATCH] block: hold queue if flush is running for non-queueable flush drive In some drives, flush requests are non-queueable. When flush request is running, normal read/write requests can't run. If block layer dispatches such request, driver can't handle it and requeue it. Tejun suggested we can hold the queue when flush is running. This can avoid unnecessary requeue. Also this can improve performance. For example, we have request flush1, write1, flush 2. flush1 is dispatched, then queue is hold, write1 isn't inserted to queue. After flush1 is finished, flush2 will be dispatched. Since disk cache is already clean, flush2 will be finished very soon, so looks like flush2 is folded to flush1. In my test, the queue holding completely solves a regression introduced by commit 6184a27624da949a5ae12384b19e495f50839aef: block: make the flush insertion use the tail of the dispatch list It's not a preempt type request, in fact we have to insert it behind requests that do specify INSERT_FRONT. which causes about 20% regression running a sysbench fileio workload. Stable: 2.6.39 only Cc: stable@kernel.org Signed-off-by: Shaohua Li Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-flush.c | 16 +++++++++++----- block/blk.h | 21 ++++++++++++++++++++- include/linux/blkdev.h | 1 + 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/block/blk-flush.c b/block/blk-flush.c index 6c9b5e189e624..bb21e4c36f70c 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -212,13 +212,19 @@ static void flush_end_io(struct request *flush_rq, int error) } /* - * Moving a request silently to empty queue_head may stall the - * queue. Kick the queue in those cases. This function is called - * from request completion path and calling directly into - * request_fn may confuse the driver. Always use kblockd. + * Kick the queue to avoid stall for two cases: + * 1. Moving a request silently to empty queue_head may stall the + * queue. + * 2. When flush request is running in non-queueable queue, the + * queue is hold. Restart the queue after flush request is finished + * to avoid stall. + * This function is called from request completion path and calling + * directly into request_fn may confuse the driver. Always use + * kblockd. */ - if (queued) + if (queued || q->flush_queue_delayed) blk_run_queue_async(q); + q->flush_queue_delayed = 0; } /** diff --git a/block/blk.h b/block/blk.h index c9df8fc3c9997..83e4bff36201d 100644 --- a/block/blk.h +++ b/block/blk.h @@ -62,7 +62,26 @@ static inline struct request *__elv_next_request(struct request_queue *q) rq = list_entry_rq(q->queue_head.next); return rq; } - + /* + * Flush request is running and flush request isn't queueable + * in the drive, we can hold the queue till flush request is + * finished. Even we don't do this, driver can't dispatch next + * requests and will requeue them. And this can improve + * throughput too. For example, we have request flush1, write1, + * flush 2. flush1 is dispatched, then queue is hold, write1 + * isn't inserted to queue. After flush1 is finished, flush2 + * will be dispatched. Since disk cache is already clean, + * flush2 will be finished very soon, so looks like flush2 is + * folded to flush1. + * Since the queue is hold, a flag is set to indicate the queue + * should be restarted later. Please see flush_end_io() for + * details. + */ + if (q->flush_pending_idx != q->flush_running_idx && + !queue_flush_queueable(q)) { + q->flush_queue_delayed = 1; + return NULL; + } if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) return NULL; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8bd2a271b2d89..9f921bf4bf8c2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -365,6 +365,7 @@ struct request_queue */ unsigned int flush_flags; unsigned int flush_not_queueable:1; + unsigned int flush_queue_delayed:1; unsigned int flush_pending_idx:1; unsigned int flush_running_idx:1; unsigned long flush_pending_since; -- 2.39.5