]> git.baikalelectronics.ru Git - kernel.git/commitdiff
drm/amdkfd: check access permisson to restore retry fault
authorPhilip Yang <Philip.Yang@amd.com>
Sun, 15 Aug 2021 18:42:33 +0000 (14:42 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 24 Aug 2021 19:36:50 +0000 (15:36 -0400)
Check range access permission to restore GPU retry fault, if GPU retry
fault on address which belongs to VMA, and VMA has no read or write
permission requested by GPU, failed to restore the address. The vm fault
event will pass back to user space.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
drivers/gpu/drm/amd/amdkfd/kfd_svm.c
drivers/gpu/drm/amd/amdkfd/kfd_svm.h

index 2af8860d74cc98275ff176a84cccb24111ad67af..0b893aa21ffe20df183b89e0e23a65386a2d93b3 100644 (file)
@@ -3345,12 +3345,13 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
  * @adev: amdgpu device pointer
  * @pasid: PASID of the VM
  * @addr: Address of the fault
+ * @write_fault: true is write fault, false is read fault
  *
  * Try to gracefully handle a VM fault. Return true if the fault was handled and
  * shouldn't be reported any more.
  */
 bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
-                           uint64_t addr)
+                           uint64_t addr, bool write_fault)
 {
        bool is_compute_context = false;
        struct amdgpu_bo *root;
@@ -3375,7 +3376,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
        addr /= AMDGPU_GPU_PAGE_SIZE;
 
        if (is_compute_context &&
-           !svm_range_restore_pages(adev, pasid, addr)) {
+           !svm_range_restore_pages(adev, pasid, addr, write_fault)) {
                amdgpu_bo_unref(&root);
                return true;
        }
index 80cc9ab2c1d02f8dc2ee5ec5c0ae3a95407a6669..85fcfb8c5efd1be3e16d4e0dc88ca26c900c44cc 100644 (file)
@@ -448,7 +448,7 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
 void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
                             struct amdgpu_task_info *task_info);
 bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
-                           uint64_t addr);
+                           uint64_t addr, bool write_fault);
 
 void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
 
index 24b781e90befa5cc6ad2b018ef097d7b0e4e4db7..41c3a0d70b7c0b309af25d8495c07c0c94edbad8 100644 (file)
@@ -93,6 +93,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
                                       struct amdgpu_iv_entry *entry)
 {
        bool retry_fault = !!(entry->src_data[1] & 0x80);
+       bool write_fault = !!(entry->src_data[1] & 0x20);
        struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src];
        struct amdgpu_task_info task_info;
        uint32_t status = 0;
@@ -121,7 +122,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
                /* Try to handle the recoverable page faults by filling page
                 * tables
                 */
-               if (amdgpu_vm_handle_fault(adev, entry->pasid, addr))
+               if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault))
                        return 1;
        }
 
index 085fab45245d05397f67c81f8266233edcb45b94..d90c16a6b2b8085d701249d0a0eaf024a8ba21cb 100644 (file)
@@ -507,6 +507,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
                                      struct amdgpu_iv_entry *entry)
 {
        bool retry_fault = !!(entry->src_data[1] & 0x80);
+       bool write_fault = !!(entry->src_data[1] & 0x20);
        uint32_t status = 0, cid = 0, rw = 0;
        struct amdgpu_task_info task_info;
        struct amdgpu_vmhub *hub;
@@ -537,7 +538,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
                /* Try to handle the recoverable page faults by filling page
                 * tables
                 */
-               if (amdgpu_vm_handle_fault(adev, entry->pasid, addr))
+               if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault))
                        return 1;
        }
 
index ecc390c2f28c11e2a4b0d1dbae759577d7975d8e..e4bdfd77669efd674d037d51ab94968745c2bb73 100644 (file)
@@ -2400,9 +2400,29 @@ svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p,
                WRITE_ONCE(pdd->faults, pdd->faults + 1);
 }
 
+static bool
+svm_fault_allowed(struct mm_struct *mm, uint64_t addr, bool write_fault)
+{
+       unsigned long requested = VM_READ;
+       struct vm_area_struct *vma;
+
+       if (write_fault)
+               requested |= VM_WRITE;
+
+       vma = find_vma(mm, addr << PAGE_SHIFT);
+       if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
+               pr_debug("address 0x%llx VMA is removed\n", addr);
+               return true;
+       }
+
+       pr_debug("requested 0x%lx, vma permission flags 0x%lx\n", requested,
+               vma->vm_flags);
+       return (vma->vm_flags & requested) == requested;
+}
+
 int
 svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
-                       uint64_t addr)
+                       uint64_t addr, bool write_fault)
 {
        struct mm_struct *mm = NULL;
        struct svm_range_list *svms;
@@ -2484,6 +2504,13 @@ retry_write_locked:
                goto out_unlock_range;
        }
 
+       if (!svm_fault_allowed(mm, addr, write_fault)) {
+               pr_debug("fault addr 0x%llx no %s permission\n", addr,
+                       write_fault ? "write" : "read");
+               r = -EPERM;
+               goto out_unlock_range;
+       }
+
        best_loc = svm_range_best_restore_location(prange, adev, &gpuidx);
        if (best_loc == -1) {
                pr_debug("svms %p failed get best restore loc [0x%lx 0x%lx]\n",
index 3fc1fd8b4fbca176063e4a687972486f0b12c555..c6ec55354c7bc39d390a74174298bde32a34de85 100644 (file)
@@ -175,7 +175,7 @@ int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
                               unsigned long addr, struct svm_range *parent,
                               struct svm_range *prange);
 int svm_range_restore_pages(struct amdgpu_device *adev,
-                           unsigned int pasid, uint64_t addr);
+                           unsigned int pasid, uint64_t addr, bool write_fault);
 int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence);
 void svm_range_add_list_work(struct svm_range_list *svms,
                             struct svm_range *prange, struct mm_struct *mm,
@@ -209,7 +209,8 @@ static inline void svm_range_list_fini(struct kfd_process *p)
 }
 
 static inline int svm_range_restore_pages(struct amdgpu_device *adev,
-                                         unsigned int pasid, uint64_t addr)
+                                         unsigned int pasid, uint64_t addr,
+                                         bool write_fault)
 {
        return -EFAULT;
 }