* rewinding from end of bio and restoring its original position.
* Caller is also responsibile for restoring bio's size.
*/
-void dm_bio_rewind(struct bio *bio, unsigned bytes)
+static void dm_bio_rewind(struct bio *bio, unsigned bytes)
{
if (bio_integrity(bio))
dm_bio_integrity_rewind(bio, bytes);
dm_bio_rewind_iter(bio, &bio->bi_iter, bytes);
}
+
+void dm_io_rewind(struct dm_io *io, struct bio_set *bs)
+{
+ struct bio *orig = io->orig_bio;
+ struct bio *new_orig = bio_alloc_clone(orig->bi_bdev, orig,
+ GFP_NOIO, bs);
+ /*
+ * dm_bio_rewind can restore to previous position since the
+ * end sector is fixed for original bio, but we still need
+ * to restore bio's size manually (using io->sectors).
+ */
+ dm_bio_rewind(new_orig, ((io->sector_offset << 9) -
+ orig->bi_iter.bi_size));
+ bio_trim(new_orig, 0, io->sectors);
+
+ bio_chain(new_orig, orig);
+ /*
+ * __bi_remaining was increased (by dm_split_and_process_bio),
+ * so must drop the one added in bio_chain.
+ */
+ atomic_dec(&orig->__bi_remaining);
+ io->orig_bio = new_orig;
+}
atomic_set(&io->io_count, 2);
this_cpu_inc(*md->pending_io);
io->orig_bio = bio;
- io->split_bio = NULL;
io->md = md;
spin_lock_init(&io->lock);
io->start_time = jiffies;
return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
}
+static void dm_requeue_add_io(struct dm_io *io, bool first_stage)
+{
+ struct mapped_device *md = io->md;
+
+ if (first_stage) {
+ struct dm_io *next = md->requeue_list;
+
+ md->requeue_list = io;
+ io->next = next;
+ } else {
+ bio_list_add_head(&md->deferred, io->orig_bio);
+ }
+}
+
+static void dm_kick_requeue(struct mapped_device *md, bool first_stage)
+{
+ if (first_stage)
+ queue_work(md->wq, &md->requeue_work);
+ else
+ queue_work(md->wq, &md->work);
+}
+
/*
* Return true if the dm_io's original bio is requeued.
* io->status is updated with error if requeue disallowed.
*/
-static bool dm_handle_requeue(struct dm_io *io)
+static bool dm_handle_requeue(struct dm_io *io, bool first_stage)
{
- struct bio *bio = io->split_bio ? io->split_bio : io->orig_bio;
+ struct bio *bio = io->orig_bio;
bool handle_requeue = (io->status == BLK_STS_DM_REQUEUE);
bool handle_polled_eagain = ((io->status == BLK_STS_AGAIN) &&
(bio->bi_opf & REQ_POLLED));
spin_lock_irqsave(&md->deferred_lock, flags);
if ((__noflush_suspending(md) &&
!WARN_ON_ONCE(dm_is_zone_write(md, bio))) ||
- handle_polled_eagain) {
- bio_list_add_head(&md->deferred, bio);
+ handle_polled_eagain || first_stage) {
+ dm_requeue_add_io(io, first_stage);
requeued = true;
} else {
/*
}
if (requeued)
- queue_work(md->wq, &md->work);
+ dm_kick_requeue(md, first_stage);
return requeued;
}
-static void dm_io_complete(struct dm_io *io)
+static void __dm_io_complete(struct dm_io *io, bool first_stage)
{
- struct bio *bio = io->split_bio ? io->split_bio : io->orig_bio;
+ struct bio *bio = io->orig_bio;
struct mapped_device *md = io->md;
blk_status_t io_error;
bool requeued;
- requeued = dm_handle_requeue(io);
+ requeued = dm_handle_requeue(io, first_stage);
+ if (requeued && first_stage)
+ return;
io_error = io->status;
if (dm_io_flagged(io, DM_IO_ACCOUNTED))
}
}
+static void dm_wq_requeue_work(struct work_struct *work)
+{
+ struct mapped_device *md = container_of(work, struct mapped_device,
+ requeue_work);
+ unsigned long flags;
+ struct dm_io *io;
+
+ /* reuse deferred lock to simplify dm_handle_requeue */
+ spin_lock_irqsave(&md->deferred_lock, flags);
+ io = md->requeue_list;
+ md->requeue_list = NULL;
+ spin_unlock_irqrestore(&md->deferred_lock, flags);
+
+ while (io) {
+ struct dm_io *next = io->next;
+
+ dm_io_rewind(io, &md->queue->bio_split);
+
+ io->next = NULL;
+ __dm_io_complete(io, false);
+ io = next;
+ }
+}
+
+/*
+ * Two staged requeue:
+ *
+ * 1) io->orig_bio points to the real original bio, and the part mapped to
+ * this io must be requeued, instead of other parts of the original bio.
+ *
+ * 2) io->orig_bio points to new cloned bio which matches the requeued dm_io.
+ */
+static void dm_io_complete(struct dm_io *io)
+{
+ bool first_requeue;
+
+ /*
+ * Only dm_io that has been split needs two stage requeue, otherwise
+ * we may run into long bio clone chain during suspend and OOM could
+ * be triggered.
+ *
+ * Also flush data dm_io won't be marked as DM_IO_WAS_SPLIT, so they
+ * also aren't handled via the first stage requeue.
+ */
+ if (dm_io_flagged(io, DM_IO_WAS_SPLIT))
+ first_requeue = true;
+ else
+ first_requeue = false;
+
+ __dm_io_complete(io, first_requeue);
+}
+
/*
* Decrements the number of outstanding ios that a bio has been
* cloned into, completing the original io if necc.
void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
{
struct dm_target_io *tio = clone_to_tio(bio);
+ struct dm_io *io = tio->io;
unsigned bio_sectors = bio_sectors(bio);
BUG_ON(dm_tio_flagged(tio, DM_TIO_IS_DUPLICATE_BIO));
* __split_and_process_bio() may have already saved mapped part
* for accounting but it is being reduced so update accordingly.
*/
- dm_io_set_flag(tio->io, DM_IO_WAS_SPLIT);
- tio->io->sectors = n_sectors;
+ dm_io_set_flag(io, DM_IO_WAS_SPLIT);
+ io->sectors = n_sectors;
+ io->sector_offset = bio_sectors(io->orig_bio);
}
EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
*/
dm_io_set_flag(io, DM_IO_WAS_SPLIT);
io->sectors = len;
- }
-
- if (static_branch_unlikely(&stats_enabled) &&
- unlikely(dm_stats_used(&io->md->stats))) {
- /*
- * Save bi_sector in terms of its offset from end of
- * original bio, only needed for DM-stats' benefit.
- * - saved regardless of whether split needed so that
- * dm_accept_partial_bio() doesn't need to.
- */
- io->sector_offset = bio_end_sector(ci->bio) - ci->sector;
+ io->sector_offset = bio_sectors(ci->bio);
}
}
* Remainder must be passed to submit_bio_noacct() so it gets handled
* *after* bios already submitted have been completely processed.
*/
- WARN_ON_ONCE(!dm_io_flagged(io, DM_IO_WAS_SPLIT));
- io->split_bio = bio_split(bio, io->sectors, GFP_NOIO,
- &md->queue->bio_split);
- bio_chain(io->split_bio, bio);
- trace_block_split(io->split_bio, bio->bi_iter.bi_sector);
+ bio_trim(bio, io->sectors, ci.sector_count);
+ trace_block_split(bio, bio->bi_iter.bi_sector);
+ bio_inc_remaining(bio);
submit_bio_noacct(bio);
out:
/*
init_waitqueue_head(&md->wait);
INIT_WORK(&md->work, dm_wq_work);
+ INIT_WORK(&md->requeue_work, dm_wq_requeue_work);
init_waitqueue_head(&md->eventq);
init_completion(&md->kobj_holder.completion);
+ md->requeue_list = NULL;
md->swap_bios = get_swap_bios();
sema_init(&md->swap_bios_semaphore, md->swap_bios);
mutex_init(&md->swap_bios_lock);