dm thin: fix race between simultaneous io and discards to same block

author Joe Thornber <ejt@redhat.com>

Fri, 21 Dec 2012 20:23:31 +0000 (20:23 +0000)

committer Alasdair G Kergon <agk@redhat.com>

Fri, 21 Dec 2012 20:23:31 +0000 (20:23 +0000)
author Joe Thornber <ejt@redhat.com>
Fri, 21 Dec 2012 20:23:31 +0000 (20:23 +0000)
committer Alasdair G Kergon <agk@redhat.com>
Fri, 21 Dec 2012 20:23:31 +0000 (20:23 +0000)
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c

index 25dfd2311a61259422c04454fc882f70bd37ecd2..41c9e81ba74a1d3a4764f4896ea61ef1e4713501 100644 (file)
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -368,6 +368,17 @@ static int bio_triggers_commit(struct thin_c *tc, struct bio *bio)
                 dm_thin_changed_this_transaction(tc->td);
  }
  
+static void inc_all_io_entry(struct pool *pool, struct bio *bio)
+{
+       struct dm_thin_endio_hook *h;
+
+       if (bio->bi_rw & REQ_DISCARD)
+               return;
+
+       h = dm_get_mapinfo(bio)->ptr;
+       h->all_io_entry = dm_deferred_entry_inc(pool->all_io_ds);
+}
+
  static void issue(struct thin_c *tc, struct bio *bio)
  {
         struct pool *pool = tc->pool;
@@ -596,13 +607,15 @@ static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m)
  {
         struct thin_c *tc = m->tc;
  
+       inc_all_io_entry(tc->pool, m->bio);
+       cell_defer_except(tc, m->cell);
+       cell_defer_except(tc, m->cell2);
+
         if (m->pass_discard)
                 remap_and_issue(tc, m->bio, m->data_block);
         else
                 bio_endio(m->bio, 0);
  
-       cell_defer_except(tc, m->cell);
-       cell_defer_except(tc, m->cell2);
         mempool_free(m, tc->pool->mapping_pool);
  }
  
@@ -710,6 +723,7 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
                 h->overwrite_mapping = m;
                 m->bio = bio;
                 save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
+               inc_all_io_entry(pool, bio);
                 remap_and_issue(tc, bio, data_dest);
         } else {
                 struct dm_io_region from, to;
@@ -779,6 +793,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
                 h->overwrite_mapping = m;
                 m->bio = bio;
                 save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
+               inc_all_io_entry(pool, bio);
                 remap_and_issue(tc, bio, data_block);
         } else {
                 int r;
@@ -961,13 +976,15 @@ static void process_discard(struct thin_c *tc, struct bio *bio)
                                 wake_worker(pool);
                         }
                 } else {
+                       inc_all_io_entry(pool, bio);
+                       cell_defer_except(tc, cell);
+                       cell_defer_except(tc, cell2);
+
                         /*
                          * The DM core makes sure that the discard doesn't span
                          * a block boundary.  So we submit the discard of a
                          * partial block appropriately.
                          */
-                       cell_defer_except(tc, cell);
-                       cell_defer_except(tc, cell2);
                         if ((!lookup_result.shared) && pool->pf.discard_passdown)
                                 remap_and_issue(tc, bio, lookup_result.block);
                         else
@@ -1039,8 +1056,9 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio,
                 struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr;
  
                 h->shared_read_entry = dm_deferred_entry_inc(pool->shared_read_ds);
-
+               inc_all_io_entry(pool, bio);
                 cell_defer_except(tc, cell);
+
                 remap_and_issue(tc, bio, lookup_result->block);
         }
  }
@@ -1055,7 +1073,9 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block
          * Remap empty bios (flushes) immediately, without provisioning.
          */
         if (!bio->bi_size) {
+               inc_all_io_entry(tc->pool, bio);
                 cell_defer_except(tc, cell);
+
                 remap_and_issue(tc, bio, 0);
                 return;
         }
@@ -1110,26 +1130,22 @@ static void process_bio(struct thin_c *tc, struct bio *bio)
         r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
         switch (r) {
         case 0:
-               /*
-                * We can release this cell now.  This thread is the only
-                * one that puts bios into a cell, and we know there were
-                * no preceding bios.
-                */
-               /*
-                * TODO: this will probably have to change when discard goes
-                * back in.
-                */
-               cell_defer_except(tc, cell);
-
-               if (lookup_result.shared)
+               if (lookup_result.shared) {
                         process_shared_bio(tc, bio, block, &lookup_result);
-               else
+                       cell_defer_except(tc, cell);
+               } else {
+                       inc_all_io_entry(tc->pool, bio);
+                       cell_defer_except(tc, cell);
+
                         remap_and_issue(tc, bio, lookup_result.block);
+               }
                 break;
  
         case -ENODATA:
                 if (bio_data_dir(bio) == READ && tc->origin_dev) {
+                       inc_all_io_entry(tc->pool, bio);
                         cell_defer_except(tc, cell);
+
                         remap_to_origin_and_issue(tc, bio);
                 } else
                         provision_block(tc, bio, block, cell);
@@ -1155,8 +1171,10 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio)
         case 0:
                 if (lookup_result.shared && (rw == WRITE) && bio->bi_size)
                         bio_io_error(bio);
-               else
+               else {
+                       inc_all_io_entry(tc->pool, bio);
                         remap_and_issue(tc, bio, lookup_result.block);
+               }
                 break;
  
         case -ENODATA:
@@ -1166,6 +1184,7 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio)
                 }
  
                 if (tc->origin_dev) {
+                       inc_all_io_entry(tc->pool, bio);
                         remap_to_origin_and_issue(tc, bio);
                         break;
                 }
@@ -1346,7 +1365,7 @@ static struct dm_thin_endio_hook *thin_hook_bio(struct thin_c *tc, struct bio *b
  
         h->tc = tc;
         h->shared_read_entry = NULL;
-       h->all_io_entry = bio->bi_rw & REQ_DISCARD ? NULL : dm_deferred_entry_inc(pool->all_io_ds);
+       h->all_io_entry = NULL;
         h->overwrite_mapping = NULL;
  
         return h;
@@ -1363,6 +1382,8 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio,
         dm_block_t block = get_bio_block(tc, bio);
         struct dm_thin_device *td = tc->td;
         struct dm_thin_lookup_result result;
+       struct dm_bio_prison_cell *cell1, *cell2;
+       struct dm_cell_key key;
  
         map_context->ptr = thin_hook_bio(tc, bio);
  
@@ -1399,12 +1420,25 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio,
                          * shared flag will be set in their case.
                          */
                         thin_defer_bio(tc, bio);
-                       r = DM_MAPIO_SUBMITTED;
-               } else {
-                       remap(tc, bio, result.block);
-                       r = DM_MAPIO_REMAPPED;
+                       return DM_MAPIO_SUBMITTED;
                 }
-               break;
+
+               build_virtual_key(tc->td, block, &key);
+               if (dm_bio_detain(tc->pool->prison, &key, bio, &cell1))
+                       return DM_MAPIO_SUBMITTED;
+
+               build_data_key(tc->td, result.block, &key);
+               if (dm_bio_detain(tc->pool->prison, &key, bio, &cell2)) {
+                       cell_defer_except(tc, cell1);
+                       return DM_MAPIO_SUBMITTED;
+               }
+
+               inc_all_io_entry(tc->pool, bio);
+               cell_defer_except(tc, cell2);
+               cell_defer_except(tc, cell1);
+
+               remap(tc, bio, result.block);
+               return DM_MAPIO_REMAPPED;
  
         case -ENODATA:
                 if (get_pool_mode(tc->pool) == PM_READ_ONLY) {
author	Joe Thornber <ejt@redhat.com>
	Fri, 21 Dec 2012 20:23:31 +0000 (20:23 +0000)
committer	Alasdair G Kergon <agk@redhat.com>
	Fri, 21 Dec 2012 20:23:31 +0000 (20:23 +0000)