From 72ec04293462d853762d0cedbb5c2fcdfbf0ef73 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 30 Dec 2019 21:24:45 +0300 Subject: [PATCH] io_uring: optimise head checks in io_get_sqring() A user may ask to submit more than there is in the ring, and then io_uring will submit as much as it can. However, in the last iteration it will allocate an io_kiocb and immediately free it. It could do better and adjust @to_submit to what is in the ring. And since the ring's head is already checked here, there is no need to do it in the loop, spamming with smp_load_acquire()'s barriers Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 497ed610e8b2c..3398f4052ec0e 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4522,7 +4522,6 @@ static void io_commit_sqring(struct io_ring_ctx *ctx) static bool io_get_sqring(struct io_ring_ctx *ctx, struct io_kiocb *req, const struct io_uring_sqe **sqe_ptr) { - struct io_rings *rings = ctx->rings; u32 *sq_array = ctx->sq_array; unsigned head; @@ -4534,12 +4533,7 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct io_kiocb *req, * 2) allows the kernel side to track the head on its own, even * though the application is the one updating it. */ - head = ctx->cached_sq_head; - /* make sure SQ entry isn't read before tail */ - if (unlikely(head == smp_load_acquire(&rings->sq.tail))) - return false; - - head = READ_ONCE(sq_array[head & ctx->sq_mask]); + head = READ_ONCE(sq_array[ctx->cached_sq_head & ctx->sq_mask]); if (likely(head < ctx->sq_entries)) { /* * All io need record the previous position, if LINK vs DARIN, @@ -4557,7 +4551,7 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct io_kiocb *req, /* drop invalid entries */ ctx->cached_sq_head++; ctx->cached_sq_dropped++; - WRITE_ONCE(rings->sq_dropped, ctx->cached_sq_dropped); + WRITE_ONCE(ctx->rings->sq_dropped, ctx->cached_sq_dropped); return false; } @@ -4577,7 +4571,8 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, return -EBUSY; } - nr = min(nr, ctx->sq_entries); + /* make sure SQ entry isn't read before tail */ + nr = min3(nr, ctx->sq_entries, io_sqring_entries(ctx)); if (!percpu_ref_tryget_many(&ctx->refs, nr)) return -EAGAIN; -- 2.39.5