]> git.baikalelectronics.ru Git - kernel.git/commitdiff
zram: idle writeback fixes and cleanup
authorMinchan Kim <minchan@kernel.org>
Tue, 8 Jan 2019 23:22:53 +0000 (15:22 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 9 Jan 2019 01:15:10 +0000 (17:15 -0800)
This patch includes some fixes and cleanup for idle-page writeback.

1. writeback_limit interface

Now writeback_limit interface is rather conusing.  For example, once
writeback limit budget is exausted, admin can see 0 from
/sys/block/zramX/writeback_limit which is same semantic with disable
writeback_limit at this moment.  IOW, admin cannot tell that zero came
from disable writeback limit or exausted writeback limit.

To make the interface clear, let's sepatate enable of writeback limit to
another knob - /sys/block/zram0/writeback_limit_enable

* before:
  while true :
    # to re-enable writeback limit once previous one is used up
    echo 0 > /sys/block/zram0/writeback_limit
    echo $((200<<20)) > /sys/block/zram0/writeback_limit
    ..
    .. # used up the writeback limit budget

* new
  # To enable writeback limit, from the beginning, admin should
  # enable it.
  echo $((200<<20)) > /sys/block/zram0/writeback_limit
  echo 1 > /sys/block/zram/0/writeback_limit_enable
  while true :
    echo $((200<<20)) > /sys/block/zram0/writeback_limit
    ..
    .. # used up the writeback limit budget

It's much strightforward.

2. fix condition check idle/huge writeback mode check

The mode in writeback_store is not bit opeartion any more so no need to
use bit operations.  Furthermore, current condition check is broken in
that it does writeback every pages regardless of huge/idle.

3. clean up idle_store

No need to use goto.

[minchan@kernel.org: missed spin_lock_init]
Link: http://lkml.kernel.org/r/20190103001601.GA255139@google.com
Link: http://lkml.kernel.org/r/20181224033529.19450-1-minchan@kernel.org
Signed-off-by: Minchan Kim <minchan@kernel.org>
Suggested-by: John Dias <joaodias@google.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: John Dias <joaodias@google.com>
Cc: Srinivas Paladugu <srnvs@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Documentation/ABI/testing/sysfs-block-zram
Documentation/blockdev/zram.txt
drivers/block/zram/zram_drv.c
drivers/block/zram/zram_drv.h

index 9d2339a485c8ab17b2dd13979e139bb1309db8eb..14b2bf2e5105ced6872477b0b85e6f24a76f1d08 100644 (file)
@@ -122,11 +122,18 @@ Description:
                statistics (bd_count, bd_reads, bd_writes) in a format
                similar to block layer statistics file format.
 
+What:          /sys/block/zram<id>/writeback_limit_enable
+Date:          November 2018
+Contact:       Minchan Kim <minchan@kernel.org>
+Description:
+               The writeback_limit_enable file is read-write and specifies
+               eanbe of writeback_limit feature. "1" means eable the feature.
+               No limit "0" is the initial state.
+
 What:          /sys/block/zram<id>/writeback_limit
 Date:          November 2018
 Contact:       Minchan Kim <minchan@kernel.org>
 Description:
                The writeback_limit file is read-write and specifies the maximum
                amount of writeback ZRAM can do. The limit could be changed
-               in run time and "0" means disable the limit.
-               No limit is the initial state.
+               in run time.
index 436c5e98e1b6038511ac5bc1ac9ffd83e418d8ff..4df0ce27108572e597a66f864a6bc6b6d9aa00cc 100644 (file)
@@ -156,22 +156,23 @@ Per-device statistics are exported as various nodes under /sys/block/zram<id>/
 A brief description of exported device attributes. For more details please
 read Documentation/ABI/testing/sysfs-block-zram.
 
-Name            access            description
-----            ------            -----------
-disksize          RW    show and set the device's disk size
-initstate         RO    shows the initialization state of the device
-reset             WO    trigger device reset
-mem_used_max      WO    reset the `mem_used_max' counter (see later)
-mem_limit         WO    specifies the maximum amount of memory ZRAM can use
-                        to store the compressed data
-writeback_limit   WO    specifies the maximum amount of write IO zram can
-                       write out to backing device as 4KB unit
-max_comp_streams  RW    the number of possible concurrent compress operations
-comp_algorithm    RW    show and change the compression algorithm
-compact           WO    trigger memory compaction
-debug_stat        RO    this file is used for zram debugging purposes
-backing_dev      RW    set up backend storage for zram to write out
-idle             WO    mark allocated slot as idle
+Name                   access            description
+----                   ------            -----------
+disksize               RW      show and set the device's disk size
+initstate              RO      shows the initialization state of the device
+reset                  WO      trigger device reset
+mem_used_max           WO      reset the `mem_used_max' counter (see later)
+mem_limit              WO      specifies the maximum amount of memory ZRAM can use
+                               to store the compressed data
+writeback_limit        WO      specifies the maximum amount of write IO zram can
+                               write out to backing device as 4KB unit
+writeback_limit_enable  RW     show and set writeback_limit feature
+max_comp_streams       RW      the number of possible concurrent compress operations
+comp_algorithm         RW      show and change the compression algorithm
+compact                WO      trigger memory compaction
+debug_stat             RO      this file is used for zram debugging purposes
+backing_dev            RW      set up backend storage for zram to write out
+idle                   WO      mark allocated slot as idle
 
 
 User space is advised to use the following files to read the device statistics.
@@ -280,32 +281,51 @@ With the command, zram writeback idle pages from memory to the storage.
 If there are lots of write IO with flash device, potentially, it has
 flash wearout problem so that admin needs to design write limitation
 to guarantee storage health for entire product life.
-To overcome the concern, zram supports "writeback_limit".
-The "writeback_limit"'s default value is 0 so that it doesn't limit
-any writeback. If admin want to measure writeback count in a certain
-period, he could know it via /sys/block/zram0/bd_stat's 3rd column.
+
+To overcome the concern, zram supports "writeback_limit" feature.
+The "writeback_limit_enable"'s default value is 0 so that it doesn't limit
+any writeback. IOW, if admin want to apply writeback budget, he should
+enable writeback_limit_enable via
+
+       $ echo 1 > /sys/block/zramX/writeback_limit_enable
+
+Once writeback_limit_enable is set, zram doesn't allow any writeback
+until admin set the budget via /sys/block/zramX/writeback_limit.
+
+(If admin doesn't enable writeback_limit_enable, writeback_limit's value
+assigned via /sys/block/zramX/writeback_limit is meaninless.)
 
 If admin want to limit writeback as per-day 400M, he could do it
 like below.
 
-    MB_SHIFT=20
-    4K_SHIFT=12
-    echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
-           /sys/block/zram0/writeback_limit.
+       $ MB_SHIFT=20
+       $ 4K_SHIFT=12
+       $ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
+               /sys/block/zram0/writeback_limit.
+       $ echo 1 > /sys/block/zram0/writeback_limit_enable
 
-If admin want to allow further write again, he could do it like below
+If admin want to allow further write again once the bugdet is exausted,
+he could do it like below
 
-    echo 0 > /sys/block/zram0/writeback_limit
+       $ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
+               /sys/block/zram0/writeback_limit
 
 If admin want to see remaining writeback budget since he set,
 
-    cat /sys/block/zram0/writeback_limit
+       $ cat /sys/block/zramX/writeback_limit
+
+If admin want to disable writeback limit, he could do
+
+       $ echo 0 > /sys/block/zramX/writeback_limit_enable
 
 The writeback_limit count will reset whenever you reset zram(e.g.,
 system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of
 writeback happened until you reset the zram to allocate extra writeback
 budget in next setting is user's job.
 
+If admin want to measure writeback count in a certain period, he could
+know it via /sys/block/zram0/bd_stat's 3rd column.
+
 = memory tracking
 
 With CONFIG_ZRAM_MEMORY_TRACKING, user can know information of the
index 33c5cc879f246e09b413de0d53687cf9f433a87f..04ca65912638d50e16028de4c99cd9c127a319fd 100644 (file)
@@ -316,11 +316,9 @@ static ssize_t idle_store(struct device *dev,
                 * See the comment in writeback_store.
                 */
                zram_slot_lock(zram, index);
-               if (!zram_allocated(zram, index) ||
-                               zram_test_flag(zram, index, ZRAM_UNDER_WB))
-                       goto next;
-               zram_set_flag(zram, index, ZRAM_IDLE);
-next:
+               if (zram_allocated(zram, index) &&
+                               !zram_test_flag(zram, index, ZRAM_UNDER_WB))
+                       zram_set_flag(zram, index, ZRAM_IDLE);
                zram_slot_unlock(zram, index);
        }
 
@@ -330,6 +328,41 @@ next:
 }
 
 #ifdef CONFIG_ZRAM_WRITEBACK
+static ssize_t writeback_limit_enable_store(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t len)
+{
+       struct zram *zram = dev_to_zram(dev);
+       u64 val;
+       ssize_t ret = -EINVAL;
+
+       if (kstrtoull(buf, 10, &val))
+               return ret;
+
+       down_read(&zram->init_lock);
+       spin_lock(&zram->wb_limit_lock);
+       zram->wb_limit_enable = val;
+       spin_unlock(&zram->wb_limit_lock);
+       up_read(&zram->init_lock);
+       ret = len;
+
+       return ret;
+}
+
+static ssize_t writeback_limit_enable_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       bool val;
+       struct zram *zram = dev_to_zram(dev);
+
+       down_read(&zram->init_lock);
+       spin_lock(&zram->wb_limit_lock);
+       val = zram->wb_limit_enable;
+       spin_unlock(&zram->wb_limit_lock);
+       up_read(&zram->init_lock);
+
+       return scnprintf(buf, PAGE_SIZE, "%d\n", val);
+}
+
 static ssize_t writeback_limit_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t len)
 {
@@ -341,9 +374,9 @@ static ssize_t writeback_limit_store(struct device *dev,
                return ret;
 
        down_read(&zram->init_lock);
-       atomic64_set(&zram->stats.bd_wb_limit, val);
-       if (val == 0)
-               zram->stop_writeback = false;
+       spin_lock(&zram->wb_limit_lock);
+       zram->bd_wb_limit = val;
+       spin_unlock(&zram->wb_limit_lock);
        up_read(&zram->init_lock);
        ret = len;
 
@@ -357,7 +390,9 @@ static ssize_t writeback_limit_show(struct device *dev,
        struct zram *zram = dev_to_zram(dev);
 
        down_read(&zram->init_lock);
-       val = atomic64_read(&zram->stats.bd_wb_limit);
+       spin_lock(&zram->wb_limit_lock);
+       val = zram->bd_wb_limit;
+       spin_unlock(&zram->wb_limit_lock);
        up_read(&zram->init_lock);
 
        return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
@@ -588,8 +623,8 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
        return 1;
 }
 
-#define HUGE_WRITEBACK 0x1
-#define IDLE_WRITEBACK 0x2
+#define HUGE_WRITEBACK 1
+#define IDLE_WRITEBACK 2
 
 static ssize_t writeback_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t len)
@@ -602,7 +637,7 @@ static ssize_t writeback_store(struct device *dev,
        struct page *page;
        ssize_t ret, sz;
        char mode_buf[8];
-       unsigned long mode = -1UL;
+       int mode = -1;
        unsigned long blk_idx = 0;
 
        sz = strscpy(mode_buf, buf, sizeof(mode_buf));
@@ -618,7 +653,7 @@ static ssize_t writeback_store(struct device *dev,
        else if (!strcmp(mode_buf, "huge"))
                mode = HUGE_WRITEBACK;
 
-       if (mode == -1UL)
+       if (mode == -1)
                return -EINVAL;
 
        down_read(&zram->init_lock);
@@ -645,10 +680,13 @@ static ssize_t writeback_store(struct device *dev,
                bvec.bv_len = PAGE_SIZE;
                bvec.bv_offset = 0;
 
-               if (zram->stop_writeback) {
+               spin_lock(&zram->wb_limit_lock);
+               if (zram->wb_limit_enable && !zram->bd_wb_limit) {
+                       spin_unlock(&zram->wb_limit_lock);
                        ret = -EIO;
                        break;
                }
+               spin_unlock(&zram->wb_limit_lock);
 
                if (!blk_idx) {
                        blk_idx = alloc_block_bdev(zram);
@@ -667,10 +705,11 @@ static ssize_t writeback_store(struct device *dev,
                                zram_test_flag(zram, index, ZRAM_UNDER_WB))
                        goto next;
 
-               if ((mode & IDLE_WRITEBACK &&
-                         !zram_test_flag(zram, index, ZRAM_IDLE)) &&
-                   (mode & HUGE_WRITEBACK &&
-                         !zram_test_flag(zram, index, ZRAM_HUGE)))
+               if (mode == IDLE_WRITEBACK &&
+                         !zram_test_flag(zram, index, ZRAM_IDLE))
+                       goto next;
+               if (mode == HUGE_WRITEBACK &&
+                         !zram_test_flag(zram, index, ZRAM_HUGE))
                        goto next;
                /*
                 * Clearing ZRAM_UNDER_WB is duty of caller.
@@ -732,11 +771,10 @@ static ssize_t writeback_store(struct device *dev,
                zram_set_element(zram, index, blk_idx);
                blk_idx = 0;
                atomic64_inc(&zram->stats.pages_stored);
-               if (atomic64_add_unless(&zram->stats.bd_wb_limit,
-                                       -1 << (PAGE_SHIFT - 12), 0)) {
-                       if (atomic64_read(&zram->stats.bd_wb_limit) == 0)
-                               zram->stop_writeback = true;
-               }
+               spin_lock(&zram->wb_limit_lock);
+               if (zram->wb_limit_enable && zram->bd_wb_limit > 0)
+                       zram->bd_wb_limit -=  1UL << (PAGE_SHIFT - 12);
+               spin_unlock(&zram->wb_limit_lock);
 next:
                zram_slot_unlock(zram, index);
        }
@@ -1812,6 +1850,7 @@ static DEVICE_ATTR_RW(comp_algorithm);
 static DEVICE_ATTR_RW(backing_dev);
 static DEVICE_ATTR_WO(writeback);
 static DEVICE_ATTR_RW(writeback_limit);
+static DEVICE_ATTR_RW(writeback_limit_enable);
 #endif
 
 static struct attribute *zram_disk_attrs[] = {
@@ -1828,6 +1867,7 @@ static struct attribute *zram_disk_attrs[] = {
        &dev_attr_backing_dev.attr,
        &dev_attr_writeback.attr,
        &dev_attr_writeback_limit.attr,
+       &dev_attr_writeback_limit_enable.attr,
 #endif
        &dev_attr_io_stat.attr,
        &dev_attr_mm_stat.attr,
@@ -1867,7 +1907,9 @@ static int zram_add(void)
        device_id = ret;
 
        init_rwsem(&zram->init_lock);
-
+#ifdef CONFIG_ZRAM_WRITEBACK
+       spin_lock_init(&zram->wb_limit_lock);
+#endif
        queue = blk_alloc_queue(GFP_KERNEL);
        if (!queue) {
                pr_err("Error allocating disk queue for device %d\n",
index 4bd3afd15e833e2e8a75292e9f8ffcc34e162b63..f2fd46daa7604583b1c3bebaba86b484bca901c7 100644 (file)
@@ -86,7 +86,6 @@ struct zram_stats {
        atomic64_t bd_count;            /* no. of pages in backing device */
        atomic64_t bd_reads;            /* no. of reads from backing device */
        atomic64_t bd_writes;           /* no. of writes from backing device */
-       atomic64_t bd_wb_limit;         /* writeback limit of backing device */
 #endif
 };
 
@@ -114,8 +113,10 @@ struct zram {
         */
        bool claim; /* Protected by bdev->bd_mutex */
        struct file *backing_dev;
-       bool stop_writeback;
 #ifdef CONFIG_ZRAM_WRITEBACK
+       spinlock_t wb_limit_lock;
+       bool wb_limit_enable;
+       u64 bd_wb_limit;
        struct block_device *bdev;
        unsigned int old_block_size;
        unsigned long *bitmap;