]> git.baikalelectronics.ru Git - kernel.git/commitdiff
xfs: fix an incore inode UAF in xfs_bui_recover
authorDarrick J. Wong <darrick.wong@oracle.com>
Thu, 16 Feb 2023 05:20:10 +0000 (10:50 +0530)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 22 Feb 2023 11:50:37 +0000 (12:50 +0100)
commit 7ec4716508e25be9902468fef4829ab40196cf78 upstream.

In xfs_bui_item_recover, there exists a use-after-free bug with regards
to the inode that is involved in the bmap replay operation.  If the
mapping operation does not complete, we call xfs_bmap_unmap_extent to
create a deferred op to finish the unmapping work, and we retain a
pointer to the incore inode.

Unfortunately, the very next thing we do is commit the transaction and
drop the inode.  If reclaim tears down the inode before we try to finish
the defer ops, we dereference garbage and blow up.  Therefore, create a
way to join inodes to the defer ops freezer so that we can maintain the
xfs_inode reference until we're done with the inode.

Note: This imposes the requirement that there be enough memory to keep
every incore inode in memory throughout recovery.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
Acked-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
fs/xfs/libxfs/xfs_defer.c
fs/xfs/libxfs/xfs_defer.h
fs/xfs/xfs_bmap_item.c
fs/xfs/xfs_extfree_item.c
fs/xfs/xfs_log_recover.c
fs/xfs/xfs_refcount_item.c
fs/xfs/xfs_rmap_item.c

index d928637737362c34d36539b84ca3d2f936e215d4..714756931317f7229f65d99635abd2eac29ec94a 100644 (file)
@@ -16,6 +16,7 @@
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
 #include "xfs_trace.h"
+#include "xfs_icache.h"
 
 /*
  * Deferred Operations in XFS
@@ -567,10 +568,14 @@ xfs_defer_move(
  * deferred ops state is transferred to the capture structure and the
  * transaction is then ready for the caller to commit it.  If there are no
  * intent items to capture, this function returns NULL.
+ *
+ * If capture_ip is not NULL, the capture structure will obtain an extra
+ * reference to the inode.
  */
 static struct xfs_defer_capture *
 xfs_defer_ops_capture(
-       struct xfs_trans                *tp)
+       struct xfs_trans                *tp,
+       struct xfs_inode                *capture_ip)
 {
        struct xfs_defer_capture        *dfc;
 
@@ -596,6 +601,15 @@ xfs_defer_ops_capture(
        /* Preserve the log reservation size. */
        dfc->dfc_logres = tp->t_log_res;
 
+       /*
+        * Grab an extra reference to this inode and attach it to the capture
+        * structure.
+        */
+       if (capture_ip) {
+               ihold(VFS_I(capture_ip));
+               dfc->dfc_capture_ip = capture_ip;
+       }
+
        return dfc;
 }
 
@@ -606,24 +620,33 @@ xfs_defer_ops_release(
        struct xfs_defer_capture        *dfc)
 {
        xfs_defer_cancel_list(mp, &dfc->dfc_dfops);
+       if (dfc->dfc_capture_ip)
+               xfs_irele(dfc->dfc_capture_ip);
        kmem_free(dfc);
 }
 
 /*
  * Capture any deferred ops and commit the transaction.  This is the last step
- * needed to finish a log intent item that we recovered from the log.
+ * needed to finish a log intent item that we recovered from the log.  If any
+ * of the deferred ops operate on an inode, the caller must pass in that inode
+ * so that the reference can be transferred to the capture structure.  The
+ * caller must hold ILOCK_EXCL on the inode, and must unlock it before calling
+ * xfs_defer_ops_continue.
  */
 int
 xfs_defer_ops_capture_and_commit(
        struct xfs_trans                *tp,
+       struct xfs_inode                *capture_ip,
        struct list_head                *capture_list)
 {
        struct xfs_mount                *mp = tp->t_mountp;
        struct xfs_defer_capture        *dfc;
        int                             error;
 
+       ASSERT(!capture_ip || xfs_isilocked(capture_ip, XFS_ILOCK_EXCL));
+
        /* If we don't capture anything, commit transaction and exit. */
-       dfc = xfs_defer_ops_capture(tp);
+       dfc = xfs_defer_ops_capture(tp, capture_ip);
        if (!dfc)
                return xfs_trans_commit(tp);
 
@@ -640,16 +663,26 @@ xfs_defer_ops_capture_and_commit(
 
 /*
  * Attach a chain of captured deferred ops to a new transaction and free the
- * capture structure.
+ * capture structure.  If an inode was captured, it will be passed back to the
+ * caller with ILOCK_EXCL held and joined to the transaction with lockflags==0.
+ * The caller now owns the inode reference.
  */
 void
 xfs_defer_ops_continue(
        struct xfs_defer_capture        *dfc,
-       struct xfs_trans                *tp)
+       struct xfs_trans                *tp,
+       struct xfs_inode                **captured_ipp)
 {
        ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
        ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY));
 
+       /* Lock and join the captured inode to the new transaction. */
+       if (dfc->dfc_capture_ip) {
+               xfs_ilock(dfc->dfc_capture_ip, XFS_ILOCK_EXCL);
+               xfs_trans_ijoin(tp, dfc->dfc_capture_ip, 0);
+       }
+       *captured_ipp = dfc->dfc_capture_ip;
+
        /* Move captured dfops chain and state to the transaction. */
        list_splice_init(&dfc->dfc_dfops, &tp->t_dfops);
        tp->t_flags |= dfc->dfc_tpflags;
index d5b7494513e817b48e97b5359916250b32f72a8e..4c3248d47a350719010bbbceeb9ba37df9211a63 100644 (file)
@@ -80,6 +80,12 @@ struct xfs_defer_capture {
 
        /* Log reservation saved from the transaction. */
        unsigned int            dfc_logres;
+
+       /*
+        * An inode reference that must be maintained to complete the deferred
+        * work.
+        */
+       struct xfs_inode        *dfc_capture_ip;
 };
 
 /*
@@ -87,8 +93,9 @@ struct xfs_defer_capture {
  * This doesn't normally happen except log recovery.
  */
 int xfs_defer_ops_capture_and_commit(struct xfs_trans *tp,
-               struct list_head *capture_list);
-void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp);
+               struct xfs_inode *capture_ip, struct list_head *capture_list);
+void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp,
+               struct xfs_inode **captured_ipp);
 void xfs_defer_ops_release(struct xfs_mount *mp, struct xfs_defer_capture *d);
 
 #endif /* __XFS_DEFER_H__ */
index f7015eabfdc92cbff0e7e65735045bfea4c9cb9c..888449ac8b75291aef5af2f185b6e3e82f74ed43 100644 (file)
@@ -528,8 +528,11 @@ xfs_bui_recover(
        }
 
        set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
-       /* Commit transaction, which frees the transaction. */
-       error = xfs_defer_ops_capture_and_commit(tp, capture_list);
+       /*
+        * Commit transaction, which frees the transaction and saves the inode
+        * for later replay activities.
+        */
+       error = xfs_defer_ops_capture_and_commit(tp, ip, capture_list);
        if (error)
                goto err_unlock;
 
index 2db85c2c6d99473fb9d06bd02a5cb7892b6739f7..0333b20afafdc19a34a727dbe41cf7e03cc77a85 100644 (file)
@@ -639,7 +639,7 @@ xfs_efi_recover(
 
        set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
 
-       return xfs_defer_ops_capture_and_commit(tp, capture_list);
+       return xfs_defer_ops_capture_and_commit(tp, NULL, capture_list);
 
 abort_error:
        xfs_trans_cancel(tp);
index 1e6ef00b833a3bae032d020b25a7c9349d343212..6c60cdd10d33011c6483d3b87aa891e26cb61b42 100644 (file)
@@ -4766,6 +4766,7 @@ xlog_finish_defer_ops(
 {
        struct xfs_defer_capture *dfc, *next;
        struct xfs_trans        *tp;
+       struct xfs_inode        *ip;
        int                     error = 0;
 
        list_for_each_entry_safe(dfc, next, capture_list, dfc_list) {
@@ -4791,9 +4792,13 @@ xlog_finish_defer_ops(
                 * from recovering a single intent item.
                 */
                list_del_init(&dfc->dfc_list);
-               xfs_defer_ops_continue(dfc, tp);
+               xfs_defer_ops_continue(dfc, tp, &ip);
 
                error = xfs_trans_commit(tp);
+               if (ip) {
+                       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+                       xfs_irele(ip);
+               }
                if (error)
                        return error;
        }
index c071f8600e8ef3facf84d7e378371fbdab2026ba..98f67dd64ce89f3d59045b494d02258451fcdf17 100644 (file)
@@ -569,7 +569,7 @@ xfs_cui_recover(
 
        xfs_refcount_finish_one_cleanup(tp, rcur, error);
        set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
-       return xfs_defer_ops_capture_and_commit(tp, capture_list);
+       return xfs_defer_ops_capture_and_commit(tp, NULL, capture_list);
 
 abort_error:
        xfs_refcount_finish_one_cleanup(tp, rcur, error);
index 5bdf1f5e51b8bda4a088fc3fdb9d877fb50e95c7..32f580fa18772d643bbaded95bfc8dad2e5cb380 100644 (file)
@@ -593,7 +593,7 @@ xfs_rui_recover(
 
        xfs_rmap_finish_one_cleanup(tp, rcur, error);
        set_bit(XFS_RUI_RECOVERED, &ruip->rui_flags);
-       return xfs_defer_ops_capture_and_commit(tp, capture_list);
+       return xfs_defer_ops_capture_and_commit(tp, NULL, capture_list);
 
 abort_error:
        xfs_rmap_finish_one_cleanup(tp, rcur, error);