From d2913e1c46bedabc7c90bb8e8f36e0639dad1f06 Mon Sep 17 00:00:00 2001 From: Dennis Li Date: Thu, 22 Oct 2020 17:44:55 +0800 Subject: [PATCH] drm/amdgpu: fix the issue of reserving bad pages failed MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit In amdgpu_ras_reset_gpu, because bad pages may not be freed, it has high probability to reserve bad pages failed. Change to reserve bad pages when freeing VRAM. v2: 1. avoid allocating the drm_mm node outside of amdgpu_vram_mgr.c 2. move bad page reserving into amdgpu_ras_add_bad_pages, if vram mgr reserve bad page failed, it will put it into pending list, otherwise put it into processed list; 3. remove amdgpu_ras_release_bad_pages, because retired page's info has been moved into amdgpu_vram_mgr v3: 1. formate code style; 2. rename amdgpu_vram_reserve_scope as amdgpu_vram_reservation; 3. rename scope_pending as reservations_pending; 4. rename scope_processed as reserved_pages; 5. change to iterate over all the pending ones and try to insert them with drm_mm_reserve_node(); v4: 1. rename amdgpu_vram_mgr_reserve_scope as amdgpu_vram_mgr_reserve_range; 2. remove unused include "amdgpu_ras.h"; 3. rename amdgpu_vram_mgr_check_and_reserve as amdgpu_vram_mgr_do_reserve; 4. refine amdgpu_vram_mgr_reserve_range to call amdgpu_vram_mgr_do_reserve. Reviewed-by: Christian König Reviewed-by: Hawking Zhang Signed-off-by: Dennis Li Signed-off-by: Wenhui Sheng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 150 +++++-------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 8 - drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 11 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 106 +++++++++++++ 4 files changed, 156 insertions(+), 119 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 95d6b34027ca9..de312e89153da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -80,6 +80,8 @@ enum amdgpu_ras_retire_page_reservation { atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0); +static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con, + uint64_t addr); static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev, uint64_t addr); @@ -1551,10 +1553,12 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, .size = AMDGPU_GPU_PAGE_SIZE, .flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED, }; - - if (data->last_reserved <= i) + ret = amdgpu_vram_mgr_query_page_status( + ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM), + data->bps[i].retired_page); + if (ret == -EBUSY) (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING; - else if (data->bps_bo[i] == NULL) + else if (ret == -ENOENT) (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT; } @@ -1606,12 +1610,9 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev, unsigned int new_space = old_space + pages; unsigned int align_space = ALIGN(new_space, 512); void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL); - struct amdgpu_bo **bps_bo = - kmalloc(align_space * sizeof(*data->bps_bo), GFP_KERNEL); - if (!bps || !bps_bo) { + if (!bps) { kfree(bps); - kfree(bps_bo); return -ENOMEM; } @@ -1620,14 +1621,8 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev, data->count * sizeof(*data->bps)); kfree(data->bps); } - if (data->bps_bo) { - memcpy(bps_bo, data->bps_bo, - data->count * sizeof(*data->bps_bo)); - kfree(data->bps_bo); - } data->bps = bps; - data->bps_bo = bps_bo; data->space_left += align_space - old_space; return 0; } @@ -1639,6 +1634,7 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data; int ret = 0; + uint32_t i; if (!con || !con->eh_data || !bps || pages <= 0) return 0; @@ -1648,16 +1644,26 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, if (!data) goto out; - if (data->space_left <= pages) - if (amdgpu_ras_realloc_eh_data_space(adev, data, pages)) { + for (i = 0; i < pages; i++) { + if (amdgpu_ras_check_bad_page_unlock(con, + bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT)) + continue; + + if (!data->space_left && + amdgpu_ras_realloc_eh_data_space(adev, data, 256)) { ret = -ENOMEM; goto out; } - memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps)); - data->count += pages; - data->space_left -= pages; + amdgpu_vram_mgr_reserve_range( + ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM), + bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT, + AMDGPU_GPU_PAGE_SIZE); + memcpy(&data->bps[data->count], &bps[i], sizeof(*data->bps)); + data->count++; + data->space_left--; + } out: mutex_unlock(&con->recovery_lock); @@ -1730,6 +1736,20 @@ out: return ret; } +static bool amdgpu_ras_check_bad_page_unlock(struct amdgpu_ras *con, + uint64_t addr) +{ + struct ras_err_handler_data *data = con->eh_data; + int i; + + addr >>= AMDGPU_GPU_PAGE_SHIFT; + for (i = 0; i < data->count; i++) + if (addr == data->bps[i].retired_page) + return true; + + return false; +} + /* * check if an address belongs to bad page * @@ -1739,26 +1759,13 @@ static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev, uint64_t addr) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - struct ras_err_handler_data *data; - int i; bool ret = false; if (!con || !con->eh_data) return ret; mutex_lock(&con->recovery_lock); - data = con->eh_data; - if (!data) - goto out; - - addr >>= AMDGPU_GPU_PAGE_SHIFT; - for (i = 0; i < data->count; i++) - if (addr == data->bps[i].retired_page) { - ret = true; - goto out; - } - -out: + ret = amdgpu_ras_check_bad_page_unlock(con, addr); mutex_unlock(&con->recovery_lock); return ret; } @@ -1804,77 +1811,6 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev, } } -/* called in gpu recovery/init */ -int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev) -{ - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - struct ras_err_handler_data *data; - uint64_t bp; - struct amdgpu_bo *bo = NULL; - int i, ret = 0; - - /* Not reserve bad page when amdgpu_bad_page_threshold == 0. */ - if (!con || !con->eh_data || (amdgpu_bad_page_threshold == 0)) - return 0; - - mutex_lock(&con->recovery_lock); - data = con->eh_data; - if (!data) - goto out; - /* reserve vram at driver post stage. */ - for (i = data->last_reserved; i < data->count; i++) { - bp = data->bps[i].retired_page; - - /* There are two cases of reserve error should be ignored: - * 1) a ras bad page has been allocated (used by someone); - * 2) a ras bad page has been reserved (duplicate error injection - * for one page); - */ - if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT, - AMDGPU_GPU_PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &bo, NULL)) - dev_warn(adev->dev, "RAS WARN: reserve vram for " - "retired page %llx fail\n", bp); - - data->bps_bo[i] = bo; - data->last_reserved = i + 1; - bo = NULL; - } -out: - mutex_unlock(&con->recovery_lock); - return ret; -} - -/* called when driver unload */ -static int amdgpu_ras_release_bad_pages(struct amdgpu_device *adev) -{ - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - struct ras_err_handler_data *data; - struct amdgpu_bo *bo; - int i; - - if (!con || !con->eh_data) - return 0; - - mutex_lock(&con->recovery_lock); - data = con->eh_data; - if (!data) - goto out; - - for (i = data->last_reserved - 1; i >= 0; i--) { - bo = data->bps_bo[i]; - - amdgpu_bo_free_kernel(&bo, NULL, NULL); - - data->bps_bo[i] = bo; - data->last_reserved = i; - } -out: - mutex_unlock(&con->recovery_lock); - return 0; -} - int amdgpu_ras_recovery_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -1914,18 +1850,12 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) ret = amdgpu_ras_load_bad_pages(adev); if (ret) goto free; - ret = amdgpu_ras_reserve_bad_pages(adev); - if (ret) - goto release; } return 0; -release: - amdgpu_ras_release_bad_pages(adev); free: kfree((*data)->bps); - kfree((*data)->bps_bo); kfree(*data); con->eh_data = NULL; out: @@ -1953,12 +1883,10 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) return 0; cancel_work_sync(&con->recovery_work); - amdgpu_ras_release_bad_pages(adev); mutex_lock(&con->recovery_lock); con->eh_data = NULL; kfree(data->bps); - kfree(data->bps_bo); kfree(data); mutex_unlock(&con->recovery_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 7c39d706e6d15..4667cce38582e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -362,14 +362,10 @@ struct ras_err_data { struct ras_err_handler_data { /* point to bad page records array */ struct eeprom_table_record *bps; - /* point to reserved bo array */ - struct amdgpu_bo **bps_bo; /* the count of entries */ int count; /* the space can place new entries */ int space_left; - /* last reserved entry's index + 1 */ - int last_reserved; }; typedef int (*ras_ih_cb)(struct amdgpu_device *adev, @@ -506,15 +502,11 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, struct eeprom_table_record *bps, int pages); int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev); -int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev); static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) { struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - if (in_task()) - amdgpu_ras_reserve_bad_pages(adev); - if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) schedule_work(&ras->recovery_work); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index d808b2a58b283..684a9ee9da759 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -41,10 +41,17 @@ #define AMDGPU_POISON 0xd0bed0be +struct amdgpu_vram_reservation { + struct list_head node; + struct drm_mm_node mm_node; +}; + struct amdgpu_vram_mgr { struct ttm_resource_manager manager; struct drm_mm mm; spinlock_t lock; + struct list_head reservations_pending; + struct list_head reserved_pages; atomic64_t usage; atomic64_t vis_usage; }; @@ -122,6 +129,10 @@ void amdgpu_vram_mgr_free_sgt(struct amdgpu_device *adev, struct sg_table *sgt); uint64_t amdgpu_vram_mgr_usage(struct ttm_resource_manager *man); uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_resource_manager *man); +int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man, + uint64_t start, uint64_t size); +int amdgpu_vram_mgr_query_page_status(struct ttm_resource_manager *man, + uint64_t start); int amdgpu_ttm_init(struct amdgpu_device *adev); void amdgpu_ttm_late_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index a3dd909f78ab3..10126a6aa603a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -187,6 +187,8 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev) drm_mm_init(&mgr->mm, 0, man->size); spin_lock_init(&mgr->lock); + INIT_LIST_HEAD(&mgr->reservations_pending); + INIT_LIST_HEAD(&mgr->reserved_pages); /* Add the two VRAM-related sysfs files */ ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes); @@ -211,6 +213,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; struct ttm_resource_manager *man = &mgr->manager; int ret; + struct amdgpu_vram_reservation *rsv, *temp; ttm_resource_manager_set_used(man, false); @@ -219,6 +222,13 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) return; spin_lock(&mgr->lock); + list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) + kfree(rsv); + + list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) { + drm_mm_remove_node(&rsv->mm_node); + kfree(rsv); + } drm_mm_takedown(&mgr->mm); spin_unlock(&mgr->lock); @@ -277,6 +287,101 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo) return usage; } +static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man) +{ + struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); + struct amdgpu_device *adev = to_amdgpu_device(mgr); + struct drm_mm *mm = &mgr->mm; + struct amdgpu_vram_reservation *rsv, *temp; + uint64_t vis_usage; + + list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) { + if (drm_mm_reserve_node(mm, &rsv->mm_node)) + continue; + + dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Successed\n", + rsv->mm_node.start, rsv->mm_node.size); + + vis_usage = amdgpu_vram_mgr_vis_size(adev, &rsv->mm_node); + atomic64_add(vis_usage, &mgr->vis_usage); + atomic64_add(rsv->mm_node.size << PAGE_SHIFT, &mgr->usage); + list_move(&rsv->node, &mgr->reserved_pages); + } +} + +/** + * amdgpu_vram_mgr_reserve_range - Reserve a range from VRAM + * + * @man: TTM memory type manager + * @start: start address of the range in VRAM + * @size: size of the range + * + * Reserve memory from start addess with the specified size in VRAM + */ +int amdgpu_vram_mgr_reserve_range(struct ttm_resource_manager *man, + uint64_t start, uint64_t size) +{ + struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); + struct amdgpu_vram_reservation *rsv; + + rsv = kzalloc(sizeof(*rsv), GFP_KERNEL); + if (!rsv) + return -ENOMEM; + + INIT_LIST_HEAD(&rsv->node); + rsv->mm_node.start = start >> PAGE_SHIFT; + rsv->mm_node.size = size >> PAGE_SHIFT; + + spin_lock(&mgr->lock); + list_add_tail(&mgr->reservations_pending, &rsv->node); + amdgpu_vram_mgr_do_reserve(man); + spin_unlock(&mgr->lock); + + return 0; +} + +/** + * amdgpu_vram_mgr_query_page_status - query the reservation status + * + * @man: TTM memory type manager + * @start: start address of a page in VRAM + * + * Returns: + * -EBUSY: the page is still hold and in pending list + * 0: the page has been reserved + * -ENOENT: the input page is not a reservation + */ +int amdgpu_vram_mgr_query_page_status(struct ttm_resource_manager *man, + uint64_t start) +{ + struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); + struct amdgpu_vram_reservation *rsv; + int ret; + + spin_lock(&mgr->lock); + + list_for_each_entry(rsv, &mgr->reservations_pending, node) { + if ((rsv->mm_node.start <= start) && + (start < (rsv->mm_node.start + rsv->mm_node.size))) { + ret = -EBUSY; + goto out; + } + } + + list_for_each_entry(rsv, &mgr->reserved_pages, node) { + if ((rsv->mm_node.start <= start) && + (start < (rsv->mm_node.start + rsv->mm_node.size))) { + ret = 0; + goto out; + } + } + + ret = -ENOENT; +out: + spin_unlock(&mgr->lock); + return ret; +} + /** * amdgpu_vram_mgr_virt_start - update virtual start address * @@ -447,6 +552,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, vis_usage += amdgpu_vram_mgr_vis_size(adev, nodes); ++nodes; } + amdgpu_vram_mgr_do_reserve(man); spin_unlock(&mgr->lock); atomic64_sub(usage, &mgr->usage); -- 2.39.5