]> git.baikalelectronics.ru Git - kernel.git/commitdiff
drm/amdgpu: fix the issue of reserving bad pages failed
authorDennis Li <Dennis.Li@amd.com>
Thu, 22 Oct 2020 09:44:55 +0000 (17:44 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 30 Oct 2020 04:57:29 +0000 (00:57 -0400)
In amdgpu_ras_reset_gpu, because bad pages may not be freed,
it has high probability to reserve bad pages failed.

Change to reserve bad pages when freeing VRAM.

v2:
1. avoid allocating the drm_mm node outside of amdgpu_vram_mgr.c
2. move bad page reserving into amdgpu_ras_add_bad_pages, if vram mgr
   reserve bad page failed, it will put it into pending list, otherwise
   put it into processed list;
3. remove amdgpu_ras_release_bad_pages, because retired page's info has
   been moved into amdgpu_vram_mgr

v3:
1. formate code style;
2. rename amdgpu_vram_reserve_scope as amdgpu_vram_reservation;
3. rename scope_pending as reservations_pending;
4. rename scope_processed as reserved_pages;
5. change to iterate over all the pending ones and try to insert them
   with drm_mm_reserve_node();

v4:
1. rename amdgpu_vram_mgr_reserve_scope as
amdgpu_vram_mgr_reserve_range;
2. remove unused include "amdgpu_ras.h";
3. rename amdgpu_vram_mgr_check_and_reserve as
amdgpu_vram_mgr_do_reserve;
4. refine amdgpu_vram_mgr_reserve_range to call
amdgpu_vram_mgr_do_reserve.

Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Hawking Zhang <hawking.zhang@amd.com>
Signed-off-by: Dennis Li <Dennis.Li@amd.com>
Signed-off-by: Wenhui Sheng <Wenhui.Sheng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c

index 95d6b34027ca9630040a4ed071174af857d98de9..de312e89153da3f36aa94b0274a90d5813c567e6 100644 (file)
@@ -80,6 +80,8 @@ enum amdgpu_ras_retire_page_reservation {
 
 atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0);
 
+static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
+                               uint64_t addr);
 static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
                                uint64_t addr);
 
@@ -1551,10 +1553,12 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
                        .size = AMDGPU_GPU_PAGE_SIZE,
                        .flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED,
                };
-
-               if (data->last_reserved <= i)
+               ret = amdgpu_vram_mgr_query_page_status(
+                               ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM),
+                               data->bps[i].retired_page);
+               if (ret == -EBUSY)
                        (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING;
-               else if (data->bps_bo[i] == NULL)
+               else if (ret == -ENOENT)
                        (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT;
        }
 
@@ -1606,12 +1610,9 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
        unsigned int new_space = old_space + pages;
        unsigned int align_space = ALIGN(new_space, 512);
        void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL);
-       struct amdgpu_bo **bps_bo =
-                       kmalloc(align_space * sizeof(*data->bps_bo), GFP_KERNEL);
 
-       if (!bps || !bps_bo) {
+       if (!bps) {
                kfree(bps);
-               kfree(bps_bo);
                return -ENOMEM;
        }
 
@@ -1620,14 +1621,8 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
                                data->count * sizeof(*data->bps));
                kfree(data->bps);
        }
-       if (data->bps_bo) {
-               memcpy(bps_bo, data->bps_bo,
-                               data->count * sizeof(*data->bps_bo));
-               kfree(data->bps_bo);
-       }
 
        data->bps = bps;
-       data->bps_bo = bps_bo;
        data->space_left += align_space - old_space;
        return 0;
 }
@@ -1639,6 +1634,7 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
        struct ras_err_handler_data *data;
        int ret = 0;
+       uint32_t i;
 
        if (!con || !con->eh_data || !bps || pages <= 0)
                return 0;
@@ -1648,16 +1644,26 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
        if (!data)
                goto out;
 
-       if (data->space_left <= pages)
-               if (amdgpu_ras_realloc_eh_data_space(adev, data, pages)) {
+       for (i = 0; i < pages; i++) {
+               if (amdgpu_ras_check_bad_page_unlock(con,
+                       bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT))
+                       continue;
+
+               if (!data->space_left &&
+                       amdgpu_ras_realloc_eh_data_space(adev, data, 256)) {
                        ret = -ENOMEM;
                        goto out;
                }
 
-       memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps));
-       data->count += pages;
-       data->space_left -= pages;
+               amdgpu_vram_mgr_reserve_range(
+                       ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM),
+                       bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT,
+                       AMDGPU_GPU_PAGE_SIZE);
 
+               memcpy(&data->bps[data->count], &bps[i], sizeof(*data->bps));
+               data->count++;
+               data->space_left--;
+       }
 out:
        mutex_unlock(&con->recovery_lock);
 
@@ -1730,6 +1736,20 @@ out:
        return ret;
 }
 
+static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con,
+                               uint64_t addr)
+{
+       struct ras_err_handler_data *data = con->eh_data;
+       int i;
+
+       addr >>= AMDGPU_GPU_PAGE_SHIFT;
+       for (i = 0; i < data->count; i++)
+               if (addr == data->bps[i].retired_page)
+                       return true;
+
+       return false;
+}
+
 /*
  * check if an address belongs to bad page
  *
@@ -1739,26 +1759,13 @@ static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
                                uint64_t addr)
 {
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
-       struct ras_err_handler_data *data;
-       int i;
        bool ret = false;
 
        if (!con || !con->eh_data)
                return ret;
 
        mutex_lock(&con->recovery_lock);
-       data = con->eh_data;
-       if (!data)
-               goto out;
-
-       addr >>= AMDGPU_GPU_PAGE_SHIFT;
-       for (i = 0; i < data->count; i++)
-               if (addr == data->bps[i].retired_page) {
-                       ret = true;
-                       goto out;
-               }
-
-out:
+       ret = amdgpu_ras_check_bad_page_unlock(con, addr);
        mutex_unlock(&con->recovery_lock);
        return ret;
 }
@@ -1804,77 +1811,6 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev,
        }
 }
 
-/* called in gpu recovery/init */
-int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev)
-{
-       struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
-       struct ras_err_handler_data *data;
-       uint64_t bp;
-       struct amdgpu_bo *bo = NULL;
-       int i, ret = 0;
-
-       /* Not reserve bad page when amdgpu_bad_page_threshold == 0. */
-       if (!con || !con->eh_data || (amdgpu_bad_page_threshold == 0))
-               return 0;
-
-       mutex_lock(&con->recovery_lock);
-       data = con->eh_data;
-       if (!data)
-               goto out;
-       /* reserve vram at driver post stage. */
-       for (i = data->last_reserved; i < data->count; i++) {
-               bp = data->bps[i].retired_page;
-
-               /* There are two cases of reserve error should be ignored:
-                * 1) a ras bad page has been allocated (used by someone);
-                * 2) a ras bad page has been reserved (duplicate error injection
-                *    for one page);
-                */
-               if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
-                                              AMDGPU_GPU_PAGE_SIZE,
-                                              AMDGPU_GEM_DOMAIN_VRAM,
-                                              &bo, NULL))
-                       dev_warn(adev->dev, "RAS WARN: reserve vram for "
-                                       "retired page %llx fail\n", bp);
-
-               data->bps_bo[i] = bo;
-               data->last_reserved = i + 1;
-               bo = NULL;
-       }
-out:
-       mutex_unlock(&con->recovery_lock);
-       return ret;
-}
-
-/* called when driver unload */
-static int amdgpu_ras_release_bad_pages(struct amdgpu_device *adev)
-{
-       struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
-       struct ras_err_handler_data *data;
-       struct amdgpu_bo *bo;
-       int i;
-
-       if (!con || !con->eh_data)
-               return 0;
-
-       mutex_lock(&con->recovery_lock);
-       data = con->eh_data;
-       if (!data)
-               goto out;
-
-       for (i = data->last_reserved - 1; i >= 0; i--) {
-               bo = data->bps_bo[i];
-
-               amdgpu_bo_free_kernel(&bo, NULL, NULL);
-
-               data->bps_bo[i] = bo;
-               data->last_reserved = i;
-       }
-out:
-       mutex_unlock(&con->recovery_lock);
-       return 0;
-}
-
 int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
 {
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
@@ -1914,18 +1850,12 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
                ret = amdgpu_ras_load_bad_pages(adev);
                if (ret)
                        goto free;
-               ret = amdgpu_ras_reserve_bad_pages(adev);
-               if (ret)
-                       goto release;
        }
 
        return 0;
 
-release:
-       amdgpu_ras_release_bad_pages(adev);
 free:
        kfree((*data)->bps);
-       kfree((*data)->bps_bo);
        kfree(*data);
        con->eh_data = NULL;
 out:
@@ -1953,12 +1883,10 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
                return 0;
 
        cancel_work_sync(&con->recovery_work);
-       amdgpu_ras_release_bad_pages(adev);
 
        mutex_lock(&con->recovery_lock);
        con->eh_data = NULL;
        kfree(data->bps);
-       kfree(data->bps_bo);
        kfree(data);
        mutex_unlock(&con->recovery_lock);
 
index 7c39d706e6d15f80c47ab2ff09d9d969d31bbd02..4667cce38582e2ea16a86d45663cec694ef22cf4 100644 (file)
@@ -362,14 +362,10 @@ struct ras_err_data {
 struct ras_err_handler_data {
        /* point to bad page records array */
        struct eeprom_table_record *bps;
-       /* point to reserved bo array */
-       struct amdgpu_bo **bps_bo;
        /* the count of entries */
        int count;
        /* the space can place new entries */
        int space_left;
-       /* last reserved entry's index + 1 */
-       int last_reserved;
 };
 
 typedef int (*ras_ih_cb)(struct amdgpu_device *adev,
@@ -506,15 +502,11 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
                struct eeprom_table_record *bps, int pages);
 
 int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev);
-int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev);
 
 static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
 {
        struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 
-       if (in_task())
-               amdgpu_ras_reserve_bad_pages(adev);
-
        if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
                schedule_work(&ras->recovery_work);
        return 0;
index d808b2a58b283df8d7120177c7a3834a6352fc08..684a9ee9da759c251fdae5de88f9279d938719ca 100644 (file)
 
 #define AMDGPU_POISON  0xd0bed0be
 
+struct amdgpu_vram_reservation {
+       struct list_head node;
+       struct drm_mm_node mm_node;
+};
+
 struct amdgpu_vram_mgr {
        struct ttm_resource_manager manager;
        struct drm_mm mm;
        spinlock_t lock;
+       struct list_head reservations_pending;
+       struct list_head reserved_pages;
        atomic64_t usage;
        atomic64_t vis_usage;
 };
@@ -122,6 +129,10 @@ void amdgpu_vram_mgr_free_sgt(struct amdgpu_device *adev,
                              struct sg_table *sgt);
 uint64_t amdgpu_vram_mgr_usage(struct ttm_resource_manager *man);
 uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_resource_manager *man);
+int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man,
+                                 uint64_t start, uint64_t size);
+int amdgpu_vram_mgr_query_page_status(struct ttm_resource_manager *man,
+                                     uint64_t start);
 
 int amdgpu_ttm_init(struct amdgpu_device *adev);
 void amdgpu_ttm_late_init(struct amdgpu_device *adev);
index a3dd909f78ab322e55f2d450c8b9121a0ad8fa37..10126a6aa603a6c0b36f3e916d24e3078cdb33f0 100644 (file)
@@ -187,6 +187,8 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
 
        drm_mm_init(&mgr->mm, 0, man->size);
        spin_lock_init(&mgr->lock);
+       INIT_LIST_HEAD(&mgr->reservations_pending);
+       INIT_LIST_HEAD(&mgr->reserved_pages);
 
        /* Add the two VRAM-related sysfs files */
        ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
@@ -211,6 +213,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
        struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
        struct ttm_resource_manager *man = &mgr->manager;
        int ret;
+       struct amdgpu_vram_reservation *rsv, *temp;
 
        ttm_resource_manager_set_used(man, false);
 
@@ -219,6 +222,13 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
                return;
 
        spin_lock(&mgr->lock);
+       list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node)
+               kfree(rsv);
+
+       list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) {
+               drm_mm_remove_node(&rsv->mm_node);
+               kfree(rsv);
+       }
        drm_mm_takedown(&mgr->mm);
        spin_unlock(&mgr->lock);
 
@@ -277,6 +287,101 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
        return usage;
 }
 
+static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man)
+{
+       struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
+       struct amdgpu_device *adev = to_amdgpu_device(mgr);
+       struct drm_mm *mm = &mgr->mm;
+       struct amdgpu_vram_reservation *rsv, *temp;
+       uint64_t vis_usage;
+
+       list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) {
+               if (drm_mm_reserve_node(mm, &rsv->mm_node))
+                       continue;
+
+               dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Successed\n",
+                       rsv->mm_node.start, rsv->mm_node.size);
+
+               vis_usage = amdgpu_vram_mgr_vis_size(adev, &rsv->mm_node);
+               atomic64_add(vis_usage, &mgr->vis_usage);
+               atomic64_add(rsv->mm_node.size << PAGE_SHIFT, &mgr->usage);
+               list_move(&rsv->node, &mgr->reserved_pages);
+       }
+}
+
+/**
+ * amdgpu_vram_mgr_reserve_range - Reserve a range from VRAM
+ *
+ * @man: TTM memory type manager
+ * @start: start address of the range in VRAM
+ * @size: size of the range
+ *
+ * Reserve memory from start addess with the specified size in VRAM
+ */
+int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man,
+                                 uint64_t start, uint64_t size)
+{
+       struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
+       struct amdgpu_vram_reservation *rsv;
+
+       rsv = kzalloc(sizeof(*rsv), GFP_KERNEL);
+       if (!rsv)
+               return -ENOMEM;
+
+       INIT_LIST_HEAD(&rsv->node);
+       rsv->mm_node.start = start >> PAGE_SHIFT;
+       rsv->mm_node.size = size >> PAGE_SHIFT;
+
+       spin_lock(&mgr->lock);
+       list_add_tail(&mgr->reservations_pending, &rsv->node);
+       amdgpu_vram_mgr_do_reserve(man);
+       spin_unlock(&mgr->lock);
+
+       return 0;
+}
+
+/**
+ * amdgpu_vram_mgr_query_page_status - query the reservation status
+ *
+ * @man: TTM memory type manager
+ * @start: start address of a page in VRAM
+ *
+ * Returns:
+ *     -EBUSY: the page is still hold and in pending list
+ *     0: the page has been reserved
+ *     -ENOENT: the input page is not a reservation
+ */
+int amdgpu_vram_mgr_query_page_status(struct ttm_resource_manager *man,
+                                     uint64_t start)
+{
+       struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
+       struct amdgpu_vram_reservation *rsv;
+       int ret;
+
+       spin_lock(&mgr->lock);
+
+       list_for_each_entry(rsv, &mgr->reservations_pending, node) {
+               if ((rsv->mm_node.start <= start) &&
+                   (start < (rsv->mm_node.start + rsv->mm_node.size))) {
+                       ret = -EBUSY;
+                       goto out;
+               }
+       }
+
+       list_for_each_entry(rsv, &mgr->reserved_pages, node) {
+               if ((rsv->mm_node.start <= start) &&
+                   (start < (rsv->mm_node.start + rsv->mm_node.size))) {
+                       ret = 0;
+                       goto out;
+               }
+       }
+
+       ret = -ENOENT;
+out:
+       spin_unlock(&mgr->lock);
+       return ret;
+}
+
 /**
  * amdgpu_vram_mgr_virt_start - update virtual start address
  *
@@ -447,6 +552,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
                vis_usage += amdgpu_vram_mgr_vis_size(adev, nodes);
                ++nodes;
        }
+       amdgpu_vram_mgr_do_reserve(man);
        spin_unlock(&mgr->lock);
 
        atomic64_sub(usage, &mgr->usage);