]> git.baikalelectronics.ru Git - kernel.git/commitdiff
io_uring: add IORING_OP_MADVISE
authorJens Axboe <axboe@kernel.dk>
Thu, 26 Dec 2019 05:18:28 +0000 (22:18 -0700)
committerJens Axboe <axboe@kernel.dk>
Tue, 21 Jan 2020 00:04:02 +0000 (17:04 -0700)
This adds support for doing madvise(2) through io_uring. We assume that
any operation can block, and hence punt everything async. This could be
improved, but hard to make bullet proof. The async punt ensures it's
safe.

Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
fs/io_uring.c
include/uapi/linux/io_uring.h

index 9ca12b900b425f1a470d0b7ec970c849fffdf147..17a199c99c7cca46be5f30246e83ba3a7c7f4c46 100644 (file)
@@ -408,6 +408,13 @@ struct io_fadvise {
        u32                             advice;
 };
 
+struct io_madvise {
+       struct file                     *file;
+       u64                             addr;
+       u32                             len;
+       u32                             advice;
+};
+
 struct io_async_connect {
        struct sockaddr_storage         address;
 };
@@ -461,6 +468,7 @@ struct io_kiocb {
                struct io_close         close;
                struct io_files_update  files_update;
                struct io_fadvise       fadvise;
+               struct io_madvise       madvise;
        };
 
        struct io_async_ctx             *io;
@@ -680,6 +688,10 @@ static const struct io_op_def io_op_defs[] = {
                /* IORING_OP_FADVISE */
                .needs_file             = 1,
        },
+       {
+               /* IORING_OP_MADVISE */
+               .needs_mm               = 1,
+       },
 };
 
 static void io_wq_submit_work(struct io_wq_work **workptr);
@@ -2449,6 +2461,42 @@ err:
        return 0;
 }
 
+static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
+       if (sqe->ioprio || sqe->buf_index || sqe->off)
+               return -EINVAL;
+
+       req->madvise.addr = READ_ONCE(sqe->addr);
+       req->madvise.len = READ_ONCE(sqe->len);
+       req->madvise.advice = READ_ONCE(sqe->fadvise_advice);
+       return 0;
+#else
+       return -EOPNOTSUPP;
+#endif
+}
+
+static int io_madvise(struct io_kiocb *req, struct io_kiocb **nxt,
+                     bool force_nonblock)
+{
+#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
+       struct io_madvise *ma = &req->madvise;
+       int ret;
+
+       if (force_nonblock)
+               return -EAGAIN;
+
+       ret = do_madvise(ma->addr, ma->len, ma->advice);
+       if (ret < 0)
+               req_set_fail_links(req);
+       io_cqring_add_event(req, ret);
+       io_put_req_find_next(req, nxt);
+       return 0;
+#else
+       return -EOPNOTSUPP;
+#endif
+}
+
 static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        if (sqe->ioprio || sqe->buf_index || sqe->addr)
@@ -3766,6 +3814,9 @@ static int io_req_defer_prep(struct io_kiocb *req,
        case IORING_OP_FADVISE:
                ret = io_fadvise_prep(req, sqe);
                break;
+       case IORING_OP_MADVISE:
+               ret = io_madvise_prep(req, sqe);
+               break;
        default:
                printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
                                req->opcode);
@@ -3970,6 +4021,14 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
                }
                ret = io_fadvise(req, nxt, force_nonblock);
                break;
+       case IORING_OP_MADVISE:
+               if (sqe) {
+                       ret = io_madvise_prep(req, sqe);
+                       if (ret)
+                               break;
+               }
+               ret = io_madvise(req, nxt, force_nonblock);
+               break;
        default:
                ret = -EINVAL;
                break;
index f86d1c77607877412f4f8bbeeb3bc25e3128a8fe..8ad3cece54408f9a63633bcaaf323db614b2b370 100644 (file)
@@ -88,6 +88,7 @@ enum {
        IORING_OP_READ,
        IORING_OP_WRITE,
        IORING_OP_FADVISE,
+       IORING_OP_MADVISE,
 
        /* this goes last, obviously */
        IORING_OP_LAST,