From de32c98fbdd0587fde77842f4a9ca9a4b47e335f Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 9 Dec 2008 13:14:13 -0800 Subject: [PATCH] revert "percpu counter: clean up percpu_counter_sum_and_set()" Revert commit ac1b39ddd036ee8f9c1a98806f7e8e4d8c47b206 Author: Mingming Cao Date: Thu Oct 9 12:50:59 2008 -0400 percpu counter: clean up percpu_counter_sum_and_set() Before this patch we had the following: percpu_counter_sum(): return the percpu_counter's value percpu_counter_sum_and_set(): return the percpu_counter's value, copying that value into the central value and zeroing the per-cpu counters before returning. After this patch, percpu_counter_sum_and_set() has gone, and percpu_counter_sum() gets the old percpu_counter_sum_and_set() functionality. Problem is, as Eric points out, the old percpu_counter_sum_and_set() functionality was racy and wrong. It zeroes out counters on "other" cpus, without holding any locks which will prevent races agaist updates from those other CPUS. This patch reverts ac1b39ddd036ee8f9c1a98806f7e8e4d8c47b206. This means that percpu_counter_sum_and_set() still has the race, but percpu_counter_sum() does not. Note that this is not a simple revert - ext4 has since started using percpu_counter_sum() for its dirty_blocks counter as well. Note that this revert patch changes percpu_counter_sum() semantics. Before the patch, a call to percpu_counter_sum() will bring the counter's central counter mostly up-to-date, so a following percpu_counter_read() will return a close value. After this patch, a call to percpu_counter_sum() will leave the counter's central accumulator unaltered, so a subsequent call to percpu_counter_read() can now return a significantly inaccurate result. If there is any code in the tree which was introduced after 79f36b442a30622523d6b82ca8f65c9112d016d3 was merged, and which depends upon the new percpu_counter_sum() semantics, that code will break. Reported-by: Eric Dumazet Cc: "David S. Miller" Cc: Peter Zijlstra Cc: Mingming Cao Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext4/balloc.c | 4 ++-- include/linux/percpu_counter.h | 12 +++++++++--- lib/percpu_counter.c | 8 +++++--- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index d2003cdc36aa6..c17f69bcd7dd1 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -609,8 +609,8 @@ int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) if (free_blocks - (nblocks + root_blocks + dirty_blocks) < EXT4_FREEBLOCKS_WATERMARK) { - free_blocks = percpu_counter_sum(fbc); - dirty_blocks = percpu_counter_sum(dbc); + free_blocks = percpu_counter_sum_and_set(fbc); + dirty_blocks = percpu_counter_sum_and_set(dbc); if (dirty_blocks < 0) { printk(KERN_CRIT "Dirty block accounting " "went wrong %lld\n", diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 9007ccdfc1127..2083888353570 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -35,7 +35,7 @@ int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount); void percpu_counter_destroy(struct percpu_counter *fbc); void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); -s64 __percpu_counter_sum(struct percpu_counter *fbc); +s64 __percpu_counter_sum(struct percpu_counter *fbc, int set); static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { @@ -44,13 +44,19 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) { - s64 ret = __percpu_counter_sum(fbc); + s64 ret = __percpu_counter_sum(fbc, 0); return ret < 0 ? 0 : ret; } +static inline s64 percpu_counter_sum_and_set(struct percpu_counter *fbc) +{ + return __percpu_counter_sum(fbc, 1); +} + + static inline s64 percpu_counter_sum(struct percpu_counter *fbc) { - return __percpu_counter_sum(fbc); + return __percpu_counter_sum(fbc, 0); } static inline s64 percpu_counter_read(struct percpu_counter *fbc) diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 71b265c330ce3..dba1530a5b291 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -52,7 +52,7 @@ EXPORT_SYMBOL(__percpu_counter_add); * Add up all the per-cpu counts, return the result. This is a more accurate * but much slower version of percpu_counter_read_positive() */ -s64 __percpu_counter_sum(struct percpu_counter *fbc) +s64 __percpu_counter_sum(struct percpu_counter *fbc, int set) { s64 ret; int cpu; @@ -62,9 +62,11 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc) for_each_online_cpu(cpu) { s32 *pcount = per_cpu_ptr(fbc->counters, cpu); ret += *pcount; - *pcount = 0; + if (set) + *pcount = 0; } - fbc->count = ret; + if (set) + fbc->count = ret; spin_unlock(&fbc->lock); return ret; -- 2.39.5