fs: restore nobh

author Nick Piggin <npiggin@suse.de>

Tue, 16 Oct 2007 08:25:25 +0000 (01:25 -0700)

committer Linus Torvalds <torvalds@woody.linux-foundation.org>

Tue, 16 Oct 2007 16:42:58 +0000 (09:42 -0700)
author Nick Piggin <npiggin@suse.de>
Tue, 16 Oct 2007 08:25:25 +0000 (01:25 -0700)
committer Linus Torvalds <torvalds@woody.linux-foundation.org>
Tue, 16 Oct 2007 16:42:58 +0000 (09:42 -0700)
diff --git a/fs/buffer.c b/fs/buffer.c

index a89d25bf11714702c2727f5c009216d2078aa416..a406cfd89e3bc184dea41e7da231449ea2c9d112 100644 (file)
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2369,7 +2369,7 @@ out_unlock:
  }
  
  /*
- * nobh_prepare_write()'s prereads are special: the buffer_heads are freed
+ * nobh_write_begin()'s prereads are special: the buffer_heads are freed
   * immediately, while under the page lock.  So it needs a special end_io
   * handler which does not touch the bh after unlocking it.
   */
@@ -2378,17 +2378,46 @@ static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
         __end_buffer_read_notouch(bh, uptodate);
  }
  
+/*
+ * Attach the singly-linked list of buffers created by nobh_write_begin, to
+ * the page (converting it to circular linked list and taking care of page
+ * dirty races).
+ */
+static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
+{
+       struct buffer_head *bh;
+
+       BUG_ON(!PageLocked(page));
+
+       spin_lock(&page->mapping->private_lock);
+       bh = head;
+       do {
+               if (PageDirty(page))
+                       set_buffer_dirty(bh);
+               if (!bh->b_this_page)
+                       bh->b_this_page = head;
+               bh = bh->b_this_page;
+       } while (bh != head);
+       attach_page_buffers(page, head);
+       spin_unlock(&page->mapping->private_lock);
+}
+
  /*
   * On entry, the page is fully not uptodate.
   * On exit the page is fully uptodate in the areas outside (from,to)
   */
-int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
+int nobh_write_begin(struct file *file, struct address_space *mapping,
+                       loff_t pos, unsigned len, unsigned flags,
+                       struct page **pagep, void **fsdata,
                         get_block_t *get_block)
  {
-       struct inode *inode = page->mapping->host;
+       struct inode *inode = mapping->host;
         const unsigned blkbits = inode->i_blkbits;
         const unsigned blocksize = 1 << blkbits;
         struct buffer_head *head, *bh;
+       struct page *page;
+       pgoff_t index;
+       unsigned from, to;
         unsigned block_in_page;
         unsigned block_start, block_end;
         sector_t block_in_file;
@@ -2397,8 +2426,23 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
         int ret = 0;
         int is_mapped_to_disk = 1;
  
-       if (page_has_buffers(page))
-               return block_prepare_write(page, from, to, get_block);
+       index = pos >> PAGE_CACHE_SHIFT;
+       from = pos & (PAGE_CACHE_SIZE - 1);
+       to = from + len;
+
+       page = __grab_cache_page(mapping, index);
+       if (!page)
+               return -ENOMEM;
+       *pagep = page;
+       *fsdata = NULL;
+
+       if (page_has_buffers(page)) {
+               unlock_page(page);
+               page_cache_release(page);
+               *pagep = NULL;
+               return block_write_begin(file, mapping, pos, len, flags, pagep,
+                                       fsdata, get_block);
+       }
  
         if (PageMappedToDisk(page))
                 return 0;
@@ -2413,8 +2457,10 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
          * than the circular one we're used to.
          */
         head = alloc_page_buffers(page, blocksize, 0);
-       if (!head)
-               return -ENOMEM;
+       if (!head) {
+               ret = -ENOMEM;
+               goto out_release;
+       }
  
         block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
  
@@ -2483,15 +2529,12 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
         if (is_mapped_to_disk)
                 SetPageMappedToDisk(page);
  
-       do {
-               bh = head;
-               head = head->b_this_page;
-               free_buffer_head(bh);
-       } while (head);
+       *fsdata = head; /* to be released by nobh_write_end */
  
         return 0;
  
  failed:
+       BUG_ON(!ret);
         /*
          * Error recovery is a bit difficult. We need to zero out blocks that
          * were newly allocated, and dirty them to ensure they get written out.
@@ -2499,64 +2542,57 @@ failed:
          * the handling of potential IO errors during writeout would be hard
          * (could try doing synchronous writeout, but what if that fails too?)
          */
-       spin_lock(&page->mapping->private_lock);
-       bh = head;
-       block_start = 0;
-       do {
-               if (PageUptodate(page))
-                       set_buffer_uptodate(bh);
-               if (PageDirty(page))
-                       set_buffer_dirty(bh);
+       attach_nobh_buffers(page, head);
+       page_zero_new_buffers(page, from, to);
  
-               block_end = block_start+blocksize;
-               if (block_end <= from)
-                       goto next;
-               if (block_start >= to)
-                       goto next;
+out_release:
+       unlock_page(page);
+       page_cache_release(page);
+       *pagep = NULL;
  
-               if (buffer_new(bh)) {
-                       clear_buffer_new(bh);
-                       if (!buffer_uptodate(bh)) {
-                               zero_user_page(page, block_start, bh->b_size, KM_USER0);
-                               set_buffer_uptodate(bh);
-                       }
-                       mark_buffer_dirty(bh);
-               }
-next:
-               block_start = block_end;
-               if (!bh->b_this_page)
-                       bh->b_this_page = head;
-               bh = bh->b_this_page;
-       } while (bh != head);
-       attach_page_buffers(page, head);
-       spin_unlock(&page->mapping->private_lock);
+       if (pos + len > inode->i_size)
+               vmtruncate(inode, inode->i_size);
  
         return ret;
  }
-EXPORT_SYMBOL(nobh_prepare_write);
+EXPORT_SYMBOL(nobh_write_begin);
  
-/*
- * Make sure any changes to nobh_commit_write() are reflected in
- * nobh_truncate_page(), since it doesn't call commit_write().
- */
-int nobh_commit_write(struct file *file, struct page *page,
-               unsigned from, unsigned to)
+int nobh_write_end(struct file *file, struct address_space *mapping,
+                       loff_t pos, unsigned len, unsigned copied,
+                       struct page *page, void *fsdata)
  {
         struct inode *inode = page->mapping->host;
-       loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+       struct buffer_head *head = NULL;
+       struct buffer_head *bh;
  
-       if (page_has_buffers(page))
-               return generic_commit_write(file, page, from, to);
+       if (!PageMappedToDisk(page)) {
+               if (unlikely(copied < len) && !page_has_buffers(page))
+                       attach_nobh_buffers(page, head);
+               if (page_has_buffers(page))
+                       return generic_write_end(file, mapping, pos, len,
+                                               copied, page, fsdata);
+       }
  
         SetPageUptodate(page);
         set_page_dirty(page);
-       if (pos > inode->i_size) {
-               i_size_write(inode, pos);
+       if (pos+copied > inode->i_size) {
+               i_size_write(inode, pos+copied);
                 mark_inode_dirty(inode);
         }
-       return 0;
+
+       unlock_page(page);
+       page_cache_release(page);
+
+       head = fsdata;
+       while (head) {
+               bh = head;
+               head = head->b_this_page;
+               free_buffer_head(bh);
+       }
+
+       return copied;
  }
-EXPORT_SYMBOL(nobh_commit_write);
+EXPORT_SYMBOL(nobh_write_end);
  
  /*
   * nobh_writepage() - based on block_full_write_page() except
@@ -2609,44 +2645,79 @@ out:
  }
  EXPORT_SYMBOL(nobh_writepage);
  
-/*
- * This function assumes that ->prepare_write() uses nobh_prepare_write().
- */
-int nobh_truncate_page(struct address_space *mapping, loff_t from)
+int nobh_truncate_page(struct address_space *mapping,
+                       loff_t from, get_block_t *get_block)
  {
-       struct inode *inode = mapping->host;
-       unsigned blocksize = 1 << inode->i_blkbits;
         pgoff_t index = from >> PAGE_CACHE_SHIFT;
         unsigned offset = from & (PAGE_CACHE_SIZE-1);
-       unsigned to;
+       unsigned blocksize;
+       sector_t iblock;
+       unsigned length, pos;
+       struct inode *inode = mapping->host;
         struct page *page;
-       const struct address_space_operations *a_ops = mapping->a_ops;
-       int ret = 0;
+       struct buffer_head map_bh;
+       int err;
  
-       if ((offset & (blocksize - 1)) == 0)
-               goto out;
+       blocksize = 1 << inode->i_blkbits;
+       length = offset & (blocksize - 1);
+
+       /* Block boundary? Nothing to do */
+       if (!length)
+               return 0;
+
+       length = blocksize - length;
+       iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
  
-       ret = -ENOMEM;
         page = grab_cache_page(mapping, index);
+       err = -ENOMEM;
         if (!page)
                 goto out;
  
-       to = (offset + blocksize) & ~(blocksize - 1);
-       ret = a_ops->prepare_write(NULL, page, offset, to);
-       if (ret == 0) {
-               zero_user_page(page, offset, PAGE_CACHE_SIZE - offset,
-                               KM_USER0);
-               /*
-                * It would be more correct to call aops->commit_write()
-                * here, but this is more efficient.
-                */
-               SetPageUptodate(page);
-               set_page_dirty(page);
+       if (page_has_buffers(page)) {
+has_buffers:
+               unlock_page(page);
+               page_cache_release(page);
+               return block_truncate_page(mapping, from, get_block);
         }
+
+       /* Find the buffer that contains "offset" */
+       pos = blocksize;
+       while (offset >= pos) {
+               iblock++;
+               pos += blocksize;
+       }
+
+       err = get_block(inode, iblock, &map_bh, 0);
+       if (err)
+               goto unlock;
+       /* unmapped? It's a hole - nothing to do */
+       if (!buffer_mapped(&map_bh))
+               goto unlock;
+
+       /* Ok, it's mapped. Make sure it's up-to-date */
+       if (!PageUptodate(page)) {
+               err = mapping->a_ops->readpage(NULL, page);
+               if (err) {
+                       page_cache_release(page);
+                       goto out;
+               }
+               lock_page(page);
+               if (!PageUptodate(page)) {
+                       err = -EIO;
+                       goto unlock;
+               }
+               if (page_has_buffers(page))
+                       goto has_buffers;
+       }
+       zero_user_page(page, offset, length, KM_USER0);
+       set_page_dirty(page);
+       err = 0;
+
+unlock:
         unlock_page(page);
         page_cache_release(page);
  out:
-       return ret;
+       return err;
  }
  EXPORT_SYMBOL(nobh_truncate_page);
  
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c

index 63ab02aa4c528a4b4d19928f4c97582d78cdf05a..1b102a1ccebb373761b02194cc2b138a2f194cf0 100644 (file)
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -659,6 +659,20 @@ ext2_write_begin(struct file *file, struct address_space *mapping,
         return __ext2_write_begin(file, mapping, pos, len, flags, pagep,fsdata);
  }
  
+static int
+ext2_nobh_write_begin(struct file *file, struct address_space *mapping,
+               loff_t pos, unsigned len, unsigned flags,
+               struct page **pagep, void **fsdata)
+{
+       /*
+        * Dir-in-pagecache still uses ext2_write_begin. Would have to rework
+        * directory handling code to pass around offsets rather than struct
+        * pages in order to make this work easily.
+        */
+       return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+                                                       ext2_get_block);
+}
+
  static int ext2_nobh_writepage(struct page *page,
                         struct writeback_control *wbc)
  {
@@ -710,7 +724,8 @@ const struct address_space_operations ext2_nobh_aops = {
         .readpages              = ext2_readpages,
         .writepage              = ext2_nobh_writepage,
         .sync_page              = block_sync_page,
-       /* XXX: todo */
+       .write_begin            = ext2_nobh_write_begin,
+       .write_end              = nobh_write_end,
         .bmap                   = ext2_bmap,
         .direct_IO              = ext2_direct_IO,
         .writepages             = ext2_writepages,
@@ -927,7 +942,8 @@ void ext2_truncate (struct inode * inode)
         if (mapping_is_xip(inode->i_mapping))
                 xip_truncate_page(inode->i_mapping, inode->i_size);
         else if (test_opt(inode->i_sb, NOBH))
-               nobh_truncate_page(inode->i_mapping, inode->i_size);
+               nobh_truncate_page(inode->i_mapping,
+                               inode->i_size, ext2_get_block);
         else
                 block_truncate_page(inode->i_mapping,
                                 inode->i_size, ext2_get_block);
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c

index 6af37856361168dc50105ff8b8d23fb1305a7cde..4672013802e126a9f0d0da9f14947ebfa6830cb9 100644 (file)
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -279,8 +279,7 @@ static int jfs_write_begin(struct file *file, struct address_space *mapping,
                                 loff_t pos, unsigned len, unsigned flags,
                                 struct page **pagep, void **fsdata)
  {
-       *pagep = NULL;
-       return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+       return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
                                 jfs_get_block);
  }
  
@@ -306,7 +305,7 @@ const struct address_space_operations jfs_aops = {
         .writepages     = jfs_writepages,
         .sync_page      = block_sync_page,
         .write_begin    = jfs_write_begin,
-       .write_end      = generic_write_end,
+       .write_end      = nobh_write_end,
         .bmap           = jfs_bmap,
         .direct_IO      = jfs_direct_IO,
  };
@@ -359,7 +358,7 @@ void jfs_truncate(struct inode *ip)
  {
         jfs_info("jfs_truncate: size = 0x%lx", (ulong) ip->i_size);
  
-       block_truncate_page(ip->i_mapping, ip->i_size, jfs_get_block);
+       nobh_truncate_page(ip->i_mapping, ip->i_size, jfs_get_block);
  
         IWRITE_LOCK(ip, RDWRLOCK_NORMAL);
         jfs_truncate_nolock(ip, ip->i_size);
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h

index f4ef547fc609d3ab5260b9c09ae596d0b7610114..da0d83fbadc0f30e11ebcb82171e75069431a463 100644 (file)
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -226,9 +226,13 @@ sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
  int generic_commit_write(struct file *, struct page *, unsigned, unsigned);
  int block_truncate_page(struct address_space *, loff_t, get_block_t *);
  int file_fsync(struct file *, struct dentry *, int);
-int nobh_prepare_write(struct page*, unsigned, unsigned, get_block_t*);
-int nobh_commit_write(struct file *, struct page *, unsigned, unsigned);
-int nobh_truncate_page(struct address_space *, loff_t);
+int nobh_write_begin(struct file *, struct address_space *,
+                               loff_t, unsigned, unsigned,
+                               struct page **, void **, get_block_t*);
+int nobh_write_end(struct file *, struct address_space *,
+                               loff_t, unsigned, unsigned,
+                               struct page *, void *);
+int nobh_truncate_page(struct address_space *, loff_t, get_block_t *);
  int nobh_writepage(struct page *page, get_block_t *get_block,
                          struct writeback_control *wbc);
author	Nick Piggin <npiggin@suse.de>
	Tue, 16 Oct 2007 08:25:25 +0000 (01:25 -0700)
committer	Linus Torvalds <torvalds@woody.linux-foundation.org>
	Tue, 16 Oct 2007 16:42:58 +0000 (09:42 -0700)
fs/buffer.c		patch \| blob \| history
fs/ext2/inode.c		patch \| blob \| history
fs/jfs/inode.c		patch \| blob \| history
include/linux/buffer_head.h		patch \| blob \| history