From 48f3a5f035b741f5713ee196c334a072e906ab92 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 26 May 2022 09:36:36 +0200 Subject: [PATCH] btrfs: defer I/O completion based on the btrfs_raid_bio Instead of attaching an extra allocation an indirect call to each low-level bio issued by the RAID code, add a work_struct to struct btrfs_raid_bio and only defer the per-rbio completion action. The per-bio action for all the I/Os are trivial and can be safely done from interrupt context. As a nice side effect this also allows sharing the boilerplate code for the per-bio completions Signed-off-by: Christoph Hellwig Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 2 +- fs/btrfs/disk-io.c | 12 ++---- fs/btrfs/disk-io.h | 1 - fs/btrfs/raid56.c | 102 ++++++++++++++++++--------------------------- fs/btrfs/raid56.h | 2 + 5 files changed, 47 insertions(+), 72 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6d4e71f52910e..1d5b38f3aa5fd 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -852,7 +852,7 @@ struct btrfs_fs_info { struct btrfs_workqueue *flush_workers; struct btrfs_workqueue *endio_workers; struct btrfs_workqueue *endio_meta_workers; - struct btrfs_workqueue *endio_raid56_workers; + struct workqueue_struct *endio_raid56_workers; struct workqueue_struct *rmw_workers; struct btrfs_workqueue *endio_meta_write_workers; struct btrfs_workqueue *endio_write_workers; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 018510188a0d1..1c9c6c2980dd2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -754,14 +754,10 @@ static void end_workqueue_bio(struct bio *bio) wq = fs_info->endio_meta_write_workers; else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) wq = fs_info->endio_freespace_worker; - else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) - wq = fs_info->endio_raid56_workers; else wq = fs_info->endio_write_workers; } else { - if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) - wq = fs_info->endio_raid56_workers; - else if (end_io_wq->metadata) + if (end_io_wq->metadata) wq = fs_info->endio_meta_workers; else wq = fs_info->endio_workers; @@ -2281,7 +2277,8 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) btrfs_destroy_workqueue(fs_info->hipri_workers); btrfs_destroy_workqueue(fs_info->workers); btrfs_destroy_workqueue(fs_info->endio_workers); - btrfs_destroy_workqueue(fs_info->endio_raid56_workers); + if (fs_info->endio_raid56_workers) + destroy_workqueue(fs_info->endio_raid56_workers); if (fs_info->rmw_workers) destroy_workqueue(fs_info->rmw_workers); btrfs_destroy_workqueue(fs_info->endio_write_workers); @@ -2490,8 +2487,7 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info) btrfs_alloc_workqueue(fs_info, "endio-meta-write", flags, max_active, 2); fs_info->endio_raid56_workers = - btrfs_alloc_workqueue(fs_info, "endio-raid56", flags, - max_active, 4); + alloc_workqueue("btrfs-endio-raid56", flags, max_active); fs_info->rmw_workers = alloc_workqueue("btrfs-rmw", flags, max_active); fs_info->endio_write_workers = btrfs_alloc_workqueue(fs_info, "endio-write", flags, diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 4ee8c42c9f783..809ef065f1666 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -21,7 +21,6 @@ enum btrfs_wq_endio_type { BTRFS_WQ_ENDIO_DATA, BTRFS_WQ_ENDIO_METADATA, BTRFS_WQ_ENDIO_FREE_SPACE, - BTRFS_WQ_ENDIO_RAID56, }; static inline u64 btrfs_sb_offset(int mirror) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index baba435692d2c..00cd9e8db7ae0 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1488,15 +1488,7 @@ static void set_bio_pages_uptodate(struct btrfs_raid_bio *rbio, struct bio *bio) } } -/* - * end io for the read phase of the rmw cycle. All the bios here are physical - * stripe bios we've read from the disk so we can recalculate the parity of the - * stripe. - * - * This will usually kick off finish_rmw once all the bios are read in, but it - * may trigger parity reconstruction if we had any errors along the way - */ -static void raid_rmw_end_io(struct bio *bio) +static void raid56_bio_end_io(struct bio *bio) { struct btrfs_raid_bio *rbio = bio->bi_private; @@ -1507,23 +1499,34 @@ static void raid_rmw_end_io(struct bio *bio) bio_put(bio); - if (!atomic_dec_and_test(&rbio->stripes_pending)) - return; + if (atomic_dec_and_test(&rbio->stripes_pending)) + queue_work(rbio->bioc->fs_info->endio_raid56_workers, + &rbio->end_io_work); +} - if (atomic_read(&rbio->error) > rbio->bioc->max_errors) - goto cleanup; +/* + * End io handler for the read phase of the RMW cycle. All the bios here are + * physical stripe bios we've read from the disk so we can recalculate the + * parity of the stripe. + * + * This will usually kick off finish_rmw once all the bios are read in, but it + * may trigger parity reconstruction if we had any errors along the way + */ +static void raid56_rmw_end_io_work(struct work_struct *work) +{ + struct btrfs_raid_bio *rbio = + container_of(work, struct btrfs_raid_bio, end_io_work); + + if (atomic_read(&rbio->error) > rbio->bioc->max_errors) { + rbio_orig_end_io(rbio, BLK_STS_IOERR); + return; + } /* - * this will normally call finish_rmw to start our write - * but if there are any failed stripes we'll reconstruct - * from parity first + * This will normally call finish_rmw to start our write but if there + * are any failed stripes we'll reconstruct from parity first. */ validate_rbio_for_rmw(rbio); - return; - -cleanup: - - rbio_orig_end_io(rbio, BLK_STS_IOERR); } /* @@ -1598,10 +1601,9 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) * touch it after that. */ atomic_set(&rbio->stripes_pending, bios_to_read); + INIT_WORK(&rbio->end_io_work, raid56_rmw_end_io_work); while ((bio = bio_list_pop(&bio_list))) { - bio->bi_end_io = raid_rmw_end_io; - - btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); + bio->bi_end_io = raid56_bio_end_io; if (trace_raid56_read_partial_enabled()) { struct raid56_bio_trace_info trace_info = { 0 }; @@ -2076,25 +2078,13 @@ cleanup_io: } /* - * This is called only for stripes we've read from disk to - * reconstruct the parity. + * This is called only for stripes we've read from disk to reconstruct the + * parity. */ -static void raid_recover_end_io(struct bio *bio) +static void raid_recover_end_io_work(struct work_struct *work) { - struct btrfs_raid_bio *rbio = bio->bi_private; - - /* - * we only read stripe pages off the disk, set them - * up to date if there were no errors - */ - if (bio->bi_status) - fail_bio_stripe(rbio, bio); - else - set_bio_pages_uptodate(rbio, bio); - bio_put(bio); - - if (!atomic_dec_and_test(&rbio->stripes_pending)) - return; + struct btrfs_raid_bio *rbio = + container_of(work, struct btrfs_raid_bio, end_io_work); if (atomic_read(&rbio->error) > rbio->bioc->max_errors) rbio_orig_end_io(rbio, BLK_STS_IOERR); @@ -2177,10 +2167,9 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) * touch it after that. */ atomic_set(&rbio->stripes_pending, bios_to_read); + INIT_WORK(&rbio->end_io_work, raid_recover_end_io_work); while ((bio = bio_list_pop(&bio_list))) { - bio->bi_end_io = raid_recover_end_io; - - btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); + bio->bi_end_io = raid56_bio_end_io; if (trace_raid56_scrub_read_recover_enabled()) { struct raid56_bio_trace_info trace_info = { 0 }; @@ -2650,24 +2639,14 @@ cleanup: * This will usually kick off finish_rmw once all the bios are read in, but it * may trigger parity reconstruction if we had any errors along the way */ -static void raid56_parity_scrub_end_io(struct bio *bio) +static void raid56_parity_scrub_end_io_work(struct work_struct *work) { - struct btrfs_raid_bio *rbio = bio->bi_private; - - if (bio->bi_status) - fail_bio_stripe(rbio, bio); - else - set_bio_pages_uptodate(rbio, bio); - - bio_put(bio); - - if (!atomic_dec_and_test(&rbio->stripes_pending)) - return; + struct btrfs_raid_bio *rbio = + container_of(work, struct btrfs_raid_bio, end_io_work); /* - * this will normally call finish_rmw to start our write - * but if there are any failed stripes we'll reconstruct - * from parity first + * This will normally call finish_rmw to start our write, but if there + * are any failed stripes we'll reconstruct from parity first */ validate_rbio_for_parity_scrub(rbio); } @@ -2737,10 +2716,9 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio) * touch it after that. */ atomic_set(&rbio->stripes_pending, bios_to_read); + INIT_WORK(&rbio->end_io_work, raid56_parity_scrub_end_io_work); while ((bio = bio_list_pop(&bio_list))) { - bio->bi_end_io = raid56_parity_scrub_end_io; - - btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); + bio->bi_end_io = raid56_bio_end_io; if (trace_raid56_scrub_read_enabled()) { struct raid56_bio_trace_info trace_info = { 0 }; diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h index 3badde24dcbf0..3b22657ca857e 100644 --- a/fs/btrfs/raid56.h +++ b/fs/btrfs/raid56.h @@ -100,6 +100,8 @@ struct btrfs_raid_bio { atomic_t error; + struct work_struct end_io_work; + /* Bitmap to record which horizontal stripe has data */ unsigned long dbitmap; -- 2.39.5