scsi: target: tcmu: Fix possible data corruption

author Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>

Thu, 21 Apr 2022 02:37:35 +0000 (10:37 +0800)

committer Martin K. Petersen <martin.petersen@oracle.com>

Mon, 2 May 2022 20:59:11 +0000 (16:59 -0400)
author Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
Thu, 21 Apr 2022 02:37:35 +0000 (10:37 +0800)
committer Martin K. Petersen <martin.petersen@oracle.com>
Mon, 2 May 2022 20:59:11 +0000 (16:59 -0400)
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c

index fd7267baa707838a3eed720728390f1534cb4db2..b1fd06edea595e54ebaa8dc63123d141db61e5e3 100644 (file)
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -20,6 +20,7 @@
  #include <linux/configfs.h>
  #include <linux/mutex.h>
  #include <linux/workqueue.h>
+#include <linux/pagemap.h>
  #include <net/genetlink.h>
  #include <scsi/scsi_common.h>
  #include <scsi/scsi_proto.h>
@@ -1667,6 +1668,26 @@ static u32 tcmu_blocks_release(struct tcmu_dev *udev, unsigned long first,
         xas_lock(&xas);
         xas_for_each(&xas, page, (last + 1) * udev->data_pages_per_blk - 1) {
                 xas_store(&xas, NULL);
+               /*
+                * While reaching here there may be page faults occurring on
+                * the to-be-released pages. A race condition may occur if
+                * unmap_mapping_range() is called before page faults on these
+                * pages have completed; a valid but stale map is created.
+                *
+                * If another command subsequently runs and needs to extend
+                * dbi_thresh, it may reuse the slot corresponding to the
+                * previous page in data_bitmap. Though we will allocate a new
+                * page for the slot in data_area, no page fault will happen
+                * because we have a valid map. Therefore the command's data
+                * will be lost.
+                *
+                * We lock and unlock pages that are to be released to ensure
+                * all page faults have completed. This way
+                * unmap_mapping_range() can ensure stale maps are cleanly
+                * removed.
+                */
+               lock_page(page);
+               unlock_page(page);
                 __free_page(page);
                 pages_freed++;
         }
@@ -1822,6 +1843,7 @@ static struct page *tcmu_try_get_data_page(struct tcmu_dev *udev, uint32_t dpi)
         page = xa_load(&udev->data_pages, dpi);
         if (likely(page)) {
                 get_page(page);
+               lock_page(page);
                 mutex_unlock(&udev->cmdr_lock);
                 return page;
         }
@@ -1863,6 +1885,7 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf)
         struct page *page;
         unsigned long offset;
         void *addr;
+       vm_fault_t ret = 0;
  
         int mi = tcmu_find_mem_index(vmf->vma);
         if (mi < 0)
@@ -1887,10 +1910,11 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf)
                 page = tcmu_try_get_data_page(udev, dpi);
                 if (!page)
                         return VM_FAULT_SIGBUS;
+               ret = VM_FAULT_LOCKED;
         }
  
         vmf->page = page;
-       return 0;
+       return ret;
  }
  
  static const struct vm_operations_struct tcmu_vm_ops = {
@@ -3205,12 +3229,22 @@ static void find_free_blocks(void)
                         udev->dbi_max = block;
                 }
  
+               /*
+                * Release the block pages.
+                *
+                * Also note that since tcmu_vma_fault() gets an extra page
+                * refcount, tcmu_blocks_release() won't free pages if pages
+                * are mapped. This means it is safe to call
+                * tcmu_blocks_release() before unmap_mapping_range() which
+                * drops the refcount of any pages it unmaps and thus releases
+                * them.
+                */
+               pages_freed = tcmu_blocks_release(udev, start, end - 1);
+
                 /* Here will truncate the data area from off */
                 off = udev->data_off + (loff_t)start * udev->data_blk_size;
                 unmap_mapping_range(udev->inode->i_mapping, off, 0, 1);
  
-               /* Release the block pages */
-               pages_freed = tcmu_blocks_release(udev, start, end - 1);
                 mutex_unlock(&udev->cmdr_lock);
  
                 total_pages_freed += pages_freed;
author	Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
	Thu, 21 Apr 2022 02:37:35 +0000 (10:37 +0800)
committer	Martin K. Petersen <martin.petersen@oracle.com>
	Mon, 2 May 2022 20:59:11 +0000 (16:59 -0400)