]> git.baikalelectronics.ru Git - kernel.git/commitdiff
mm: submit multipage write for SWP_FS_OPS swap-space
authorNeilBrown <neilb@suse.de>
Tue, 10 May 2022 01:20:49 +0000 (18:20 -0700)
committerakpm <akpm@linux-foundation.org>
Tue, 10 May 2022 01:20:49 +0000 (18:20 -0700)
swap_writepage() is given one page at a time, but may be called repeatedly
in succession.

For block-device swapspace, the blk_plug functionality allows the multiple
pages to be combined together at lower layers.  That cannot be used for
SWP_FS_OPS as blk_plug may not exist - it is only active when
CONFIG_BLOCK=y.  Consequently all swap reads over NFS are single page
reads.

With this patch we pass a pointer-to-pointer via the wbc.  swap_writepage
can store state between calls - much like the pointer passed explicitly to
swap_readpage.  After calling swap_writepage() some number of times, the
state will be passed to swap_write_unplug() which can submit the combined
request.

Link: https://lkml.kernel.org/r/164859778128.29473.5191868522654408537.stgit@noble.brown
Signed-off-by: NeilBrown <neilb@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: David Howells <dhowells@redhat.com>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Hugh Dickins <hughd@google.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/writeback.h
mm/page_io.c
mm/swap.h
mm/vmscan.c

index fec248ab1fec58fb7474c5e34ab1307357522f33..32b35f21cb97ec995054cc828a2db1c874febf02 100644 (file)
@@ -80,6 +80,13 @@ struct writeback_control {
 
        unsigned punt_to_cgroup:1;      /* cgrp punting, see __REQ_CGROUP_PUNT */
 
+       /* To enable batching of swap writes to non-block-device backends,
+        * "plug" can be set point to a 'struct swap_iocb *'.  When all swap
+        * writes have been submitted, if with swap_iocb is not NULL,
+        * swap_write_unplug() should be called.
+        */
+       struct swap_iocb **swap_plug;
+
 #ifdef CONFIG_CGROUP_WRITEBACK
        struct bdi_writeback *wb;       /* wb this writeback is issued under */
        struct inode *inode;            /* inode being written out */
index a63510fd661172ae217ebc0ea199021297fb1f21..c132511f521c5255bf25cf49efb64af530f6f109 100644 (file)
@@ -259,8 +259,9 @@ static void sio_write_complete(struct kiocb *iocb, long ret)
 {
        struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb);
        struct page *page = sio->bvec[0].bv_page;
+       int p;
 
-       if (ret != PAGE_SIZE) {
+       if (ret != PAGE_SIZE * sio->pages) {
                /*
                 * In the case of swap-over-nfs, this can be a
                 * temporary failure if the system has limited
@@ -271,43 +272,63 @@ static void sio_write_complete(struct kiocb *iocb, long ret)
                 * the normal direct-to-bio case as it could
                 * be temporary.
                 */
-               set_page_dirty(page);
-               ClearPageReclaim(page);
                pr_err_ratelimited("Write error %ld on dio swapfile (%llu)\n",
                                   ret, page_file_offset(page));
+               for (p = 0; p < sio->pages; p++) {
+                       page = sio->bvec[p].bv_page;
+                       set_page_dirty(page);
+                       ClearPageReclaim(page);
+               }
        } else
-               count_vm_event(PSWPOUT);
-       end_page_writeback(page);
+               count_vm_events(PSWPOUT, sio->pages);
+
+       for (p = 0; p < sio->pages; p++)
+               end_page_writeback(sio->bvec[p].bv_page);
+
        mempool_free(sio, sio_pool);
 }
 
 static int swap_writepage_fs(struct page *page, struct writeback_control *wbc)
 {
-       struct swap_iocb *sio;
+       struct swap_iocb *sio = NULL;
        struct swap_info_struct *sis = page_swap_info(page);
        struct file *swap_file = sis->swap_file;
-       struct address_space *mapping = swap_file->f_mapping;
-       struct iov_iter from;
-       int ret;
+       loff_t pos = page_file_offset(page);
 
        set_page_writeback(page);
        unlock_page(page);
-       sio = mempool_alloc(sio_pool, GFP_NOIO);
-       init_sync_kiocb(&sio->iocb, swap_file);
-       sio->iocb.ki_complete = sio_write_complete;
-       sio->iocb.ki_pos = page_file_offset(page);
-       sio->bvec[0].bv_page = page;
-       sio->bvec[0].bv_len = PAGE_SIZE;
-       sio->bvec[0].bv_offset = 0;
-       iov_iter_bvec(&from, WRITE, &sio->bvec[0], 1, PAGE_SIZE);
-       ret = mapping->a_ops->swap_rw(&sio->iocb, &from);
-       if (ret != -EIOCBQUEUED)
-               sio_write_complete(&sio->iocb, ret);
-       return ret;
+       if (wbc->swap_plug)
+               sio = *wbc->swap_plug;
+       if (sio) {
+               if (sio->iocb.ki_filp != swap_file ||
+                   sio->iocb.ki_pos + sio->pages * PAGE_SIZE != pos) {
+                       swap_write_unplug(sio);
+                       sio = NULL;
+               }
+       }
+       if (!sio) {
+               sio = mempool_alloc(sio_pool, GFP_NOIO);
+               init_sync_kiocb(&sio->iocb, swap_file);
+               sio->iocb.ki_complete = sio_write_complete;
+               sio->iocb.ki_pos = pos;
+               sio->pages = 0;
+       }
+       sio->bvec[sio->pages].bv_page = page;
+       sio->bvec[sio->pages].bv_len = PAGE_SIZE;
+       sio->bvec[sio->pages].bv_offset = 0;
+       sio->pages += 1;
+       if (sio->pages == ARRAY_SIZE(sio->bvec) || !wbc->swap_plug) {
+               swap_write_unplug(sio);
+               sio = NULL;
+       }
+       if (wbc->swap_plug)
+               *wbc->swap_plug = sio;
+
+       return 0;
 }
 
 int __swap_writepage(struct page *page, struct writeback_control *wbc,
-               bio_end_io_t end_write_func)
+                    bio_end_io_t end_write_func)
 {
        struct bio *bio;
        int ret;
@@ -344,6 +365,19 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
        return 0;
 }
 
+void swap_write_unplug(struct swap_iocb *sio)
+{
+       struct iov_iter from;
+       struct address_space *mapping = sio->iocb.ki_filp->f_mapping;
+       int ret;
+
+       iov_iter_bvec(&from, WRITE, sio->bvec, sio->pages,
+                     PAGE_SIZE * sio->pages);
+       ret = mapping->a_ops->swap_rw(&sio->iocb, &from);
+       if (ret != -EIOCBQUEUED)
+               sio_write_complete(&sio->iocb, ret);
+}
+
 static void sio_read_complete(struct kiocb *iocb, long ret)
 {
        struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb);
index 0389ab147837cc88fed9ff04f8e034bfef80faeb..a6da8f61290460ffddfd73d9cf58b28d4d1561b6 100644 (file)
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -16,6 +16,7 @@ static inline void swap_read_unplug(struct swap_iocb *plug)
        if (unlikely(plug))
                __swap_read_unplug(plug);
 }
+void swap_write_unplug(struct swap_iocb *sio);
 int swap_writepage(struct page *page, struct writeback_control *wbc);
 void end_swap_bio_write(struct bio *bio);
 int __swap_writepage(struct page *page, struct writeback_control *wbc,
@@ -71,6 +72,9 @@ static inline int swap_readpage(struct page *page, bool do_poll,
 {
        return 0;
 }
+static inline void swap_write_unplug(struct swap_iocb *sio)
+{
+}
 
 static inline struct address_space *swap_address_space(swp_entry_t entry)
 {
index 95ebcb5b3e12e6c83e701177c8cfa4994241f585..a9761b04564c4fb94cb644a41657734a01cdfb32 100644 (file)
@@ -1156,7 +1156,8 @@ typedef enum {
  * pageout is called by shrink_page_list() for each dirty page.
  * Calls ->writepage().
  */
-static pageout_t pageout(struct folio *folio, struct address_space *mapping)
+static pageout_t pageout(struct folio *folio, struct address_space *mapping,
+                        struct swap_iocb **plug)
 {
        /*
         * If the folio is dirty, only perform writeback if that write
@@ -1201,6 +1202,7 @@ static pageout_t pageout(struct folio *folio, struct address_space *mapping)
                        .range_start = 0,
                        .range_end = LLONG_MAX,
                        .for_reclaim = 1,
+                       .swap_plug = plug,
                };
 
                folio_set_reclaim(folio);
@@ -1533,6 +1535,7 @@ static unsigned int shrink_page_list(struct list_head *page_list,
        unsigned int nr_reclaimed = 0;
        unsigned int pgactivate = 0;
        bool do_demote_pass;
+       struct swap_iocb *plug = NULL;
 
        memset(stat, 0, sizeof(*stat));
        cond_resched();
@@ -1814,7 +1817,7 @@ retry:
                         * starts and then write it out here.
                         */
                        try_to_unmap_flush_dirty();
-                       switch (pageout(folio, mapping)) {
+                       switch (pageout(folio, mapping, &plug)) {
                        case PAGE_KEEP:
                                goto keep_locked;
                        case PAGE_ACTIVATE:
@@ -1968,6 +1971,8 @@ keep:
        list_splice(&ret_pages, page_list);
        count_vm_events(PGACTIVATE, pgactivate);
 
+       if (plug)
+               swap_write_unplug(plug);
        return nr_reclaimed;
 }