]> git.baikalelectronics.ru Git - kernel.git/commitdiff
drm/amdgpu: revert "fix system hang issue during GPU reset"
authorChristian König <christian.koenig@amd.com>
Wed, 12 Aug 2020 15:48:26 +0000 (17:48 +0200)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 14 Aug 2020 20:22:40 +0000 (16:22 -0400)
The whole approach wasn't thought through till the end.

We already had a reset lock like this in the past and it caused the same problems like this one.

Completely revert the patch for now and add individual trylock protection to the hardware access functions as necessary.

This reverts commit 0a95b23516a1d3a48831caa0e1e908a7cd373c9c.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
39 files changed:
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
drivers/gpu/drm/amd/amdgpu/atom.c
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_process.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c

index 3e82a11577d90de447bc98dd72db891542fa97cb..08f80ca3b296cb92b0fd3b82ba79bfa50c78ef76 100644 (file)
@@ -949,9 +949,9 @@ struct amdgpu_device {
        bool                            in_suspend;
        bool                            in_hibernate;
 
-       atomic_t                        in_gpu_reset;
+       bool                            in_gpu_reset;
        enum pp_mp1_state               mp1_state;
-       struct rw_semaphore     reset_sem;
+       struct mutex  lock_reset;
        struct amdgpu_doorbell_index doorbell_index;
 
        struct mutex                    notifier_lock;
@@ -1266,9 +1266,4 @@ static inline bool amdgpu_is_tmz(struct amdgpu_device *adev)
        return adev->gmc.tmz_enabled;
 }
 
-static inline bool amdgpu_in_reset(struct amdgpu_device *adev)
-{
-       return atomic_read(&adev->in_gpu_reset) ? true : false;
-}
-
 #endif
index 9738dccb1c2c8698a593e24a90882d5418f58ba1..0effc1d4682416d07832bc63019bd356fd2a26f7 100644 (file)
@@ -244,14 +244,11 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
        if (cp_mqd_gfx9)
                bp.flags |= AMDGPU_GEM_CREATE_CP_MQD_GFX9;
 
-       if (!down_read_trylock(&adev->reset_sem))
-               return -EIO;
-
        r = amdgpu_bo_create(adev, &bp, &bo);
        if (r) {
                dev_err(adev->dev,
                        "failed to allocate BO for amdkfd (%d)\n", r);
-               goto err;
+               return r;
        }
 
        /* map the buffer */
@@ -286,7 +283,6 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 
        amdgpu_bo_unreserve(bo);
 
-       up_read(&adev->reset_sem);
        return 0;
 
 allocate_mem_kmap_bo_failed:
@@ -295,25 +291,19 @@ allocate_mem_pin_bo_failed:
        amdgpu_bo_unreserve(bo);
 allocate_mem_reserve_bo_failed:
        amdgpu_bo_unref(&bo);
-err:
-       up_read(&adev->reset_sem);
+
        return r;
 }
 
 void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
 {
-       struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
        struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
 
-       down_read(&adev->reset_sem);
-
        amdgpu_bo_reserve(bo, true);
        amdgpu_bo_kunmap(bo);
        amdgpu_bo_unpin(bo);
        amdgpu_bo_unreserve(bo);
        amdgpu_bo_unref(&(bo));
-
-       up_read(&adev->reset_sem);
 }
 
 int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size,
@@ -345,14 +335,9 @@ int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size,
 
 void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj)
 {
-       struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
        struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;
 
-       down_read(&adev->reset_sem);
-
        amdgpu_bo_unref(&bo);
-
-       up_read(&adev->reset_sem);
 }
 
 uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
@@ -626,15 +611,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
        /* This works for NO_HWS. TODO: need to handle without knowing VMID */
        job->vmid = vmid;
 
-       if (!down_read_trylock(&adev->reset_sem)) {
-               ret = -EIO;
-               goto err_ib_sched;
-       }
-
        ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
 
-       up_read(&adev->reset_sem);
-
        if (ret) {
                DRM_ERROR("amdgpu: failed to schedule IB.\n");
                goto err_ib_sched;
@@ -670,9 +648,6 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 
-       if (!down_read_trylock(&adev->reset_sem))
-               return -EIO;
-
        if (adev->family == AMDGPU_FAMILY_AI) {
                int i;
 
@@ -682,8 +657,6 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid)
                amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
        }
 
-       up_read(&adev->reset_sem);
-
        return 0;
 }
 
@@ -692,18 +665,11 @@ int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid)
        struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
        const uint32_t flush_type = 0;
        bool all_hub = false;
-       int ret = -EIO;
 
        if (adev->family == AMDGPU_FAMILY_AI)
                all_hub = true;
 
-       if (down_read_trylock(&adev->reset_sem)) {
-               ret = amdgpu_gmc_flush_gpu_tlb_pasid(adev,
-                                       pasid, flush_type, all_hub);
-               up_read(&adev->reset_sem);
-       }
-
-       return ret;
+       return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
 }
 
 bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
index b0dcc800251ee861af9ba1480f3af9430be5c3dd..bf927f432506dc2cedac50fa387f15d6589d6b16 100644 (file)
@@ -542,7 +542,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
        uint32_t temp;
        struct v10_compute_mqd *m = get_mqd(mqd);
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EIO;
 
 #if 0
index 275f20399373ff2eeec2371a22156f35fd92edcc..744366c7ee85d33df316c9cc1586271846ea239d 100644 (file)
@@ -423,7 +423,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
        unsigned long flags, end_jiffies;
        int retry;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EIO;
 
        acquire_queue(kgd, pipe_id, queue_id);
index 4997189d8b36869f6aa8faaf9d01269fe2cab8a5..feab4cc6e836765eb15d15a77715ddfd82b3d87e 100644 (file)
@@ -419,7 +419,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
        int retry;
        struct vi_mqd *m = get_mqd(mqd);
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EIO;
 
        acquire_queue(kgd, pipe_id, queue_id);
index d5d997fe6aa4476a27c99d836d08b472387ee2e7..e4c274bd35c8c01d2038eabde5e931d76338e5b8 100644 (file)
@@ -539,7 +539,7 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd,
        uint32_t temp;
        struct v9_mqd *m = get_mqd(mqd);
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EIO;
 
        acquire_queue(kgd, pipe_id, queue_id);
index fcf72f3377852e92d2a8332c695877e90fe555c4..62cb510e2cc47bd624fb92bd7ab089244d83c1b9 100644 (file)
@@ -1194,9 +1194,6 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
                return -EINVAL;
        }
 
-       if (!down_read_trylock(&adev->reset_sem))
-               return -EIO;
-
        *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
        if (!*mem) {
                ret = -ENOMEM;
@@ -1263,7 +1260,6 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
        if (offset)
                *offset = amdgpu_bo_mmap_offset(bo);
 
-       up_read(&adev->reset_sem);
        return 0;
 
 allocate_init_user_pages_failed:
@@ -1281,9 +1277,6 @@ err:
                sg_free_table(sg);
                kfree(sg);
        }
-
-       up_read(&adev->reset_sem);
-
        return ret;
 }
 
index a3b150304daef0f50b4dd092a6f65336ff6c2c3b..a512ccbc4deae77e120d2d6dfda0e3498542eae8 100644 (file)
@@ -1292,8 +1292,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
        parser.adev = adev;
        parser.filp = filp;
 
-       down_read(&adev->reset_sem);
-
        r = amdgpu_cs_parser_init(&parser, data);
        if (r) {
                DRM_ERROR("Failed to initialize parser %d!\n", r);
@@ -1333,8 +1331,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 out:
        amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
 
-       up_read(&adev->reset_sem);
-
        return r;
 }
 
index d85d13f7a0435725bdaeace4a379963a970d4aa3..8842c55d4490b3fdb57ffda94a7cfee219bfc14d 100644 (file)
@@ -358,8 +358,6 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
        if (atomic_read(&ctx->guilty))
                out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
 
-       down_read(&adev->reset_sem);
-
        /*query ue count*/
        ras_counter = amdgpu_ras_query_error_count(adev, false);
        /*ras counter is monotonic increasing*/
@@ -375,8 +373,6 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
                ctx->ras_counter_ce = ras_counter;
        }
 
-       up_read(&adev->reset_sem);
-
        mutex_unlock(&mgr->lock);
        return 0;
 }
index 0af249a1e35b7d1150dcc8acd07925930beffa10..35fed75a43972d70066476dcd8391f9341f3a5e6 100644 (file)
@@ -101,14 +101,14 @@ static int amdgpu_debugfs_autodump_open(struct inode *inode, struct file *file)
 
        file->private_data = adev;
 
-       down_read(&adev->reset_sem);
+       mutex_lock(&adev->lock_reset);
        if (adev->autodump.dumping.done) {
                reinit_completion(&adev->autodump.dumping);
                ret = 0;
        } else {
                ret = -EBUSY;
        }
-       up_read(&adev->reset_sem);
+       mutex_unlock(&adev->lock_reset);
 
        return ret;
 }
@@ -127,7 +127,7 @@ static unsigned int amdgpu_debugfs_autodump_poll(struct file *file, struct poll_
 
        poll_wait(file, &adev->autodump.gpu_hang, poll_table);
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return POLLIN | POLLRDNORM | POLLWRNORM;
 
        return 0;
@@ -1242,7 +1242,7 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
        }
 
        /* Avoid accidently unparking the sched thread during GPU reset */
-       down_read(&adev->reset_sem);
+       mutex_lock(&adev->lock_reset);
 
        /* hold on the scheduler */
        for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
@@ -1269,7 +1269,7 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
                kthread_unpark(ring->sched.thread);
        }
 
-       up_read(&adev->reset_sem);
+       mutex_unlock(&adev->lock_reset);
 
        pm_runtime_mark_last_busy(dev->dev);
        pm_runtime_put_autosuspend(dev->dev);
@@ -1459,7 +1459,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
                return -ENOMEM;
 
        /* Avoid accidently unparking the sched thread during GPU reset */
-       down_read(&adev->reset_sem);
+       mutex_lock(&adev->lock_reset);
 
        /* stop the scheduler */
        kthread_park(ring->sched.thread);
@@ -1500,7 +1500,7 @@ failure:
        /* restart the scheduler */
        kthread_unpark(ring->sched.thread);
 
-       up_read(&adev->reset_sem);
+       mutex_unlock(&adev->lock_reset);
 
        ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
 
index bb7f0c8611f97e8a9aabcded4507cf1bf82aa84d..415e1a32b98c2e86443447810acb06c6e8f9dc34 100644 (file)
@@ -1940,7 +1940,7 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
                        if (adev->ip_blocks[i].status.hw == true)
                                break;
 
-                       if (amdgpu_in_reset(adev) || adev->in_suspend) {
+                       if (adev->in_gpu_reset || adev->in_suspend) {
                                r = adev->ip_blocks[i].version->funcs->resume(adev);
                                if (r) {
                                        DRM_ERROR("resume of IP block <%s> failed %d\n",
@@ -2117,7 +2117,7 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
                        AMDGPU_RESET_MAGIC_NUM))
                return true;
 
-       if (!amdgpu_in_reset(adev))
+       if (!adev->in_gpu_reset)
                return false;
 
        /*
@@ -3053,8 +3053,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
        mutex_init(&adev->mn_lock);
        mutex_init(&adev->virt.vf_errors.lock);
        hash_init(adev->mn_hash);
-       init_rwsem(&adev->reset_sem);
-       atomic_set(&adev->in_gpu_reset, 0);
+       mutex_init(&adev->lock_reset);
        mutex_init(&adev->psp.mutex);
        mutex_init(&adev->notifier_lock);
 
@@ -4082,11 +4081,8 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
        list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
                if (need_full_reset) {
                        /* post card */
-                       if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context)) {
-                               dev_warn(tmp_adev->dev, "asic atom init failed!");
-                               r = -EAGAIN;
-                               goto out;
-                       }
+                       if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
+                               DRM_WARN("asic atom init failed!");
 
                        if (!r) {
                                dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
@@ -4176,18 +4172,16 @@ end:
        return r;
 }
 
-static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, struct amdgpu_hive_info *hive)
+static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
 {
-       if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
-               return false;
-
-       if (hive) {
-               down_write_nest_lock(&adev->reset_sem, &hive->hive_lock);
-       } else {
-               down_write(&adev->reset_sem);
-       }
+       if (trylock) {
+               if (!mutex_trylock(&adev->lock_reset))
+                       return false;
+       } else
+               mutex_lock(&adev->lock_reset);
 
        atomic_inc(&adev->gpu_reset_counter);
+       adev->in_gpu_reset = true;
        switch (amdgpu_asic_reset_method(adev)) {
        case AMD_RESET_METHOD_MODE1:
                adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
@@ -4207,8 +4201,8 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
 {
        amdgpu_vf_error_trans_all(adev);
        adev->mp1_state = PP_MP1_STATE_NONE;
-       atomic_set(&adev->in_gpu_reset, 0);
-       up_write(&adev->reset_sem);
+       adev->in_gpu_reset = false;
+       mutex_unlock(&adev->lock_reset);
 }
 
 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
@@ -4318,14 +4312,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
         * We always reset all schedulers for device and all devices for XGMI
         * hive so that should take care of them too.
         */
-       hive = amdgpu_get_xgmi_hive(adev, false);
-       if (hive) {
-               if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
-                       DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
-                               job ? job->base.id : -1, hive->hive_id);
-                       return 0;
-               }
-               mutex_lock(&hive->hive_lock);
+       hive = amdgpu_get_xgmi_hive(adev, true);
+       if (hive && !mutex_trylock(&hive->reset_lock)) {
+               DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
+                         job ? job->base.id : -1, hive->hive_id);
+               mutex_unlock(&hive->hive_lock);
+               return 0;
        }
 
        /*
@@ -4347,11 +4339,11 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 
        /* block all schedulers and reset given job's ring */
        list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
-               if (!amdgpu_device_lock_adev(tmp_adev, hive)) {
+               if (!amdgpu_device_lock_adev(tmp_adev, !hive)) {
                        DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
                                  job ? job->base.id : -1);
-                       r = 0;
-                       goto skip_recovery;
+                       mutex_unlock(&hive->hive_lock);
+                       return 0;
                }
 
                /*
@@ -4484,9 +4476,8 @@ skip_sched_resume:
                amdgpu_device_unlock_adev(tmp_adev);
        }
 
-skip_recovery:
        if (hive) {
-               atomic_set(&hive->in_reset, 0);
+               mutex_unlock(&hive->reset_lock);
                mutex_unlock(&hive->hive_lock);
        }
 
index 73cc68ab53d0678f306b57759e008cd802ab03bd..7f9e50247413d97ad5c6dcc21037576b20959f1f 100644 (file)
@@ -671,8 +671,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
                bo_va = NULL;
        }
 
-       down_read(&adev->reset_sem);
-
        switch (args->operation) {
        case AMDGPU_VA_OP_MAP:
                va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
@@ -702,8 +700,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
                amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
                                        args->operation);
 
-       up_read(&adev->reset_sem);
-
 error_backoff:
        ttm_eu_backoff_reservation(&ticket, &list);
 
index 8ccd17d02cc60090f8d238af6f169bd205aea138..a819360a4b6aebc45bc9a0916b527b4d4d7e5da4 100644 (file)
@@ -719,7 +719,7 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
         *
         * also don't wait anymore for IRQ context
         * */
-       if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
+       if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
                goto failed_kiq_read;
 
        might_sleep();
@@ -777,7 +777,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
         *
         * also don't wait anymore for IRQ context
         * */
-       if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
+       if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
                goto failed_kiq_write;
 
        might_sleep();
@@ -796,5 +796,5 @@ failed_undo:
        amdgpu_ring_undo(ring);
        spin_unlock_irqrestore(&kiq->ring_lock, flags);
 failed_kiq_write:
-       dev_warn(adev->dev, "failed to write reg:%x\n", reg);
+       pr_err("failed to write reg:%x\n", reg);
 }
index 75d37dfb51aa7139dc94ce1f388e21de868c9a08..937029ad5271ab6a89ef60014f71e6b88b9755c8 100644 (file)
@@ -220,17 +220,17 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
 
        trace_amdgpu_sched_run_job(job);
 
-       if (down_read_trylock(&ring->adev->reset_sem)) {
+       if (job->vram_lost_counter != atomic_read(&ring->adev->vram_lost_counter))
+               dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if VRAM lost */
+
+       if (finished->error < 0) {
+               DRM_INFO("Skip scheduling IBs!\n");
+       } else {
                r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
-                                       &fence);
-               up_read(&ring->adev->reset_sem);
+                                      &fence);
                if (r)
                        DRM_ERROR("Error scheduling IBs (%d)\n", r);
-       } else {
-               dma_fence_set_error(finished, -ECANCELED);
-               DRM_INFO("Skip scheduling IBs!\n");
        }
-
        /* if gpu reset, hw fence will be replaced here */
        dma_fence_put(job->fence);
        job->fence = dma_fence_get(fence);
index 58580a48b648632a662fc41f6d6334e14a776cf0..7619f1c3084df09707023b286c7032c6011decde 100644 (file)
@@ -1087,8 +1087,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
        if (!fpriv)
                return;
 
-       down_read(&adev->reset_sem);
-
        pm_runtime_get_sync(dev->dev);
 
        if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_UVD) != NULL)
@@ -1127,8 +1125,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 
        pm_runtime_mark_last_busy(dev->dev);
        pm_runtime_put_autosuspend(dev->dev);
-
-       up_read(&adev->reset_sem);
 }
 
 /*
index 1705e328c6fcf41d839abd6fea5bdf5bb6d91e64..65ad174bb976bbdd3cf9f6ca676aaddc2cc53c9a 100644 (file)
@@ -163,7 +163,7 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
        enum amd_pm_state_type pm;
        int ret;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = pm_runtime_get_sync(ddev->dev);
@@ -172,8 +172,6 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev)) {
                if (adev->smu.ppt_funcs->get_current_power_state)
                        pm = smu_get_current_power_state(&adev->smu);
@@ -185,8 +183,6 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev,
                pm = adev->pm.dpm.user_state;
        }
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
@@ -205,7 +201,7 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
        enum amd_pm_state_type  state;
        int ret;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        if (strncmp("battery", buf, strlen("battery")) == 0)
@@ -223,8 +219,6 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev)) {
                mutex_lock(&adev->pm.mutex);
                adev->pm.dpm.user_state = state;
@@ -238,9 +232,6 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev,
 
                amdgpu_pm_compute_clocks(adev);
        }
-
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
@@ -316,7 +307,7 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
        enum amd_dpm_forced_level level = 0xff;
        int ret;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = pm_runtime_get_sync(ddev->dev);
@@ -325,8 +316,6 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev))
                level = smu_get_performance_level(&adev->smu);
        else if (adev->powerplay.pp_funcs->get_performance_level)
@@ -334,8 +323,6 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev,
        else
                level = adev->pm.dpm.forced_level;
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
@@ -362,7 +349,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
        enum amd_dpm_forced_level current_level = 0xff;
        int ret = 0;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        if (strncmp("low", buf, strlen("low")) == 0) {
@@ -393,8 +380,6 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev))
                current_level = smu_get_performance_level(&adev->smu);
        else if (adev->powerplay.pp_funcs->get_performance_level)
@@ -403,8 +388,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
        if (current_level == level) {
                pm_runtime_mark_last_busy(ddev->dev);
                pm_runtime_put_autosuspend(ddev->dev);
-               ret = count;
-               goto pro_end;
+               return count;
        }
 
        if (adev->asic_type == CHIP_RAVEN) {
@@ -425,8 +409,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
                pr_err("Currently not in any profile mode!\n");
                pm_runtime_mark_last_busy(ddev->dev);
                pm_runtime_put_autosuspend(ddev->dev);
-               ret = -EINVAL;
-               goto pro_end;
+               return -EINVAL;
        }
 
        if (is_support_sw_smu(adev)) {
@@ -434,8 +417,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
                if (ret) {
                        pm_runtime_mark_last_busy(ddev->dev);
                        pm_runtime_put_autosuspend(ddev->dev);
-                       ret = -EINVAL;
-                       goto pro_end;
+                       return -EINVAL;
                }
        } else if (adev->powerplay.pp_funcs->force_performance_level) {
                mutex_lock(&adev->pm.mutex);
@@ -443,16 +425,14 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
                        mutex_unlock(&adev->pm.mutex);
                        pm_runtime_mark_last_busy(ddev->dev);
                        pm_runtime_put_autosuspend(ddev->dev);
-                       ret = -EINVAL;
-                       goto pro_end;
+                       return -EINVAL;
                }
                ret = amdgpu_dpm_force_performance_level(adev, level);
                if (ret) {
                        mutex_unlock(&adev->pm.mutex);
                        pm_runtime_mark_last_busy(ddev->dev);
                        pm_runtime_put_autosuspend(ddev->dev);
-                       ret = -EINVAL;
-                       goto pro_end;
+                       return -EINVAL;
                } else {
                        adev->pm.dpm.forced_level = level;
                }
@@ -461,9 +441,7 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev,
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
-pro_end:
-       up_read(&adev->reset_sem);
-       return ret;
+       return count;
 }
 
 static ssize_t amdgpu_get_pp_num_states(struct device *dev,
@@ -475,7 +453,7 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev,
        struct pp_states_info data;
        int i, buf_len, ret;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = pm_runtime_get_sync(ddev->dev);
@@ -519,7 +497,7 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
        enum amd_pm_state_type pm = 0;
        int i = 0, ret = 0;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = pm_runtime_get_sync(ddev->dev);
@@ -560,7 +538,7 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev,
        struct drm_device *ddev = dev_get_drvdata(dev);
        struct amdgpu_device *adev = ddev->dev_private;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        if (adev->pp_force_state_enabled)
@@ -580,7 +558,7 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
        unsigned long idx;
        int ret;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        if (strlen(buf) == 1)
@@ -606,7 +584,6 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
                        return ret;
                }
 
-               down_read(&adev->reset_sem);
                /* only set user selected power states */
                if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
                    state != POWER_STATE_TYPE_DEFAULT) {
@@ -614,8 +591,6 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
                                        AMD_PP_TASK_ENABLE_USER_STATE, &state);
                        adev->pp_force_state_enabled = true;
                }
-               up_read(&adev->reset_sem);
-
                pm_runtime_mark_last_busy(ddev->dev);
                pm_runtime_put_autosuspend(ddev->dev);
        }
@@ -643,7 +618,7 @@ static ssize_t amdgpu_get_pp_table(struct device *dev,
        char *table = NULL;
        int size, ret;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = pm_runtime_get_sync(ddev->dev);
@@ -687,7 +662,7 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
        struct amdgpu_device *adev = ddev->dev_private;
        int ret = 0;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = pm_runtime_get_sync(ddev->dev);
@@ -696,21 +671,16 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev)) {
                ret = smu_sys_set_pp_table(&adev->smu, (void *)buf, count);
                if (ret) {
                        pm_runtime_mark_last_busy(ddev->dev);
                        pm_runtime_put_autosuspend(ddev->dev);
-                       up_read(&adev->reset_sem);
                        return ret;
                }
        } else if (adev->powerplay.pp_funcs->set_pp_table)
                amdgpu_dpm_set_pp_table(adev, buf, count);
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
@@ -845,7 +815,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
        const char delimiter[3] = {' ', '\n', '\0'};
        uint32_t type;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        if (count > 127)
@@ -889,10 +859,6 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
                return ret;
        }
 
-       ret = count;
-
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev)) {
                ret = smu_od_edit_dpm_table(&adev->smu, type,
                                            parameter, parameter_size);
@@ -900,8 +866,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
                if (ret) {
                        pm_runtime_mark_last_busy(ddev->dev);
                        pm_runtime_put_autosuspend(ddev->dev);
-                       ret = -EINVAL;
-                       goto pro_end;
+                       return -EINVAL;
                }
        } else {
                if (adev->powerplay.pp_funcs->odn_edit_dpm_table) {
@@ -910,8 +875,7 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
                        if (ret) {
                                pm_runtime_mark_last_busy(ddev->dev);
                                pm_runtime_put_autosuspend(ddev->dev);
-                               ret = -EINVAL;
-                               goto pro_end;
+                               return -EINVAL;
                        }
                }
 
@@ -922,22 +886,18 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
                                                NULL);
                                pm_runtime_mark_last_busy(ddev->dev);
                                pm_runtime_put_autosuspend(ddev->dev);
-                               ret = count;
-                               goto pro_end;
+                               return count;
                        } else {
                                pm_runtime_mark_last_busy(ddev->dev);
                                pm_runtime_put_autosuspend(ddev->dev);
-                               ret = -EINVAL;
-                               goto pro_end;
+                               return -EINVAL;
                        }
                }
        }
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
-pro_end:
-       up_read(&adev->reset_sem);
-       return ret;
+       return count;
 }
 
 static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
@@ -949,7 +909,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
        ssize_t size;
        int ret;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = pm_runtime_get_sync(ddev->dev);
@@ -1003,7 +963,7 @@ static ssize_t amdgpu_set_pp_features(struct device *dev,
        uint64_t featuremask;
        int ret;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = kstrtou64(buf, 0, &featuremask);
@@ -1018,13 +978,11 @@ static ssize_t amdgpu_set_pp_features(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
        if (is_support_sw_smu(adev)) {
                ret = smu_sys_set_pp_feature_mask(&adev->smu, featuremask);
                if (ret) {
                        pm_runtime_mark_last_busy(ddev->dev);
                        pm_runtime_put_autosuspend(ddev->dev);
-                       up_read(&adev->reset_sem);
                        return -EINVAL;
                }
        } else if (adev->powerplay.pp_funcs->set_ppfeature_status) {
@@ -1032,12 +990,9 @@ static ssize_t amdgpu_set_pp_features(struct device *dev,
                if (ret) {
                        pm_runtime_mark_last_busy(ddev->dev);
                        pm_runtime_put_autosuspend(ddev->dev);
-                       up_read(&adev->reset_sem);
                        return -EINVAL;
                }
        }
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1053,7 +1008,7 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
        ssize_t size;
        int ret;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = pm_runtime_get_sync(ddev->dev);
@@ -1062,8 +1017,6 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev))
                size = smu_sys_get_pp_feature_mask(&adev->smu, buf);
        else if (adev->powerplay.pp_funcs->get_ppfeature_status)
@@ -1071,8 +1024,6 @@ static ssize_t amdgpu_get_pp_features(struct device *dev,
        else
                size = snprintf(buf, PAGE_SIZE, "\n");
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1118,7 +1069,7 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
        ssize_t size;
        int ret;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = pm_runtime_get_sync(ddev->dev);
@@ -1127,8 +1078,6 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev))
                size = smu_print_clk_levels(&adev->smu, SMU_SCLK, buf);
        else if (adev->powerplay.pp_funcs->print_clock_levels)
@@ -1136,8 +1085,6 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
        else
                size = snprintf(buf, PAGE_SIZE, "\n");
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1190,7 +1137,7 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
        int ret;
        uint32_t mask = 0;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = amdgpu_read_mask(buf, count, &mask);
@@ -1203,15 +1150,11 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev))
                ret = smu_force_clk_levels(&adev->smu, SMU_SCLK, mask);
        else if (adev->powerplay.pp_funcs->force_clock_level)
                ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1230,7 +1173,7 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
        ssize_t size;
        int ret;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = pm_runtime_get_sync(ddev->dev);
@@ -1239,8 +1182,6 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev))
                size = smu_print_clk_levels(&adev->smu, SMU_MCLK, buf);
        else if (adev->powerplay.pp_funcs->print_clock_levels)
@@ -1248,8 +1189,6 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
        else
                size = snprintf(buf, PAGE_SIZE, "\n");
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1266,7 +1205,7 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
        uint32_t mask = 0;
        int ret;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = amdgpu_read_mask(buf, count, &mask);
@@ -1279,15 +1218,11 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev))
                ret = smu_force_clk_levels(&adev->smu, SMU_MCLK, mask);
        else if (adev->powerplay.pp_funcs->force_clock_level)
                ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1306,7 +1241,7 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev,
        ssize_t size;
        int ret;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = pm_runtime_get_sync(ddev->dev);
@@ -1315,8 +1250,6 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev))
                size = smu_print_clk_levels(&adev->smu, SMU_SOCCLK, buf);
        else if (adev->powerplay.pp_funcs->print_clock_levels)
@@ -1324,8 +1257,6 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev,
        else
                size = snprintf(buf, PAGE_SIZE, "\n");
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1342,7 +1273,7 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
        int ret;
        uint32_t mask = 0;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = amdgpu_read_mask(buf, count, &mask);
@@ -1355,8 +1286,6 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev))
                ret = smu_force_clk_levels(&adev->smu, SMU_SOCCLK, mask);
        else if (adev->powerplay.pp_funcs->force_clock_level)
@@ -1364,8 +1293,6 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
        else
                ret = 0;
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1384,7 +1311,7 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev,
        ssize_t size;
        int ret;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = pm_runtime_get_sync(ddev->dev);
@@ -1393,8 +1320,6 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev))
                size = smu_print_clk_levels(&adev->smu, SMU_FCLK, buf);
        else if (adev->powerplay.pp_funcs->print_clock_levels)
@@ -1402,8 +1327,6 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev,
        else
                size = snprintf(buf, PAGE_SIZE, "\n");
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1420,7 +1343,7 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
        int ret;
        uint32_t mask = 0;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = amdgpu_read_mask(buf, count, &mask);
@@ -1433,8 +1356,6 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev))
                ret = smu_force_clk_levels(&adev->smu, SMU_FCLK, mask);
        else if (adev->powerplay.pp_funcs->force_clock_level)
@@ -1442,8 +1363,6 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
        else
                ret = 0;
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1462,7 +1381,7 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev,
        ssize_t size;
        int ret;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = pm_runtime_get_sync(ddev->dev);
@@ -1471,8 +1390,6 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev))
                size = smu_print_clk_levels(&adev->smu, SMU_DCEFCLK, buf);
        else if (adev->powerplay.pp_funcs->print_clock_levels)
@@ -1480,8 +1397,6 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev,
        else
                size = snprintf(buf, PAGE_SIZE, "\n");
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1498,7 +1413,7 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
        int ret;
        uint32_t mask = 0;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = amdgpu_read_mask(buf, count, &mask);
@@ -1511,8 +1426,6 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev))
                ret = smu_force_clk_levels(&adev->smu, SMU_DCEFCLK, mask);
        else if (adev->powerplay.pp_funcs->force_clock_level)
@@ -1520,8 +1433,6 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
        else
                ret = 0;
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1540,7 +1451,7 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
        ssize_t size;
        int ret;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = pm_runtime_get_sync(ddev->dev);
@@ -1549,8 +1460,6 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev))
                size = smu_print_clk_levels(&adev->smu, SMU_PCIE, buf);
        else if (adev->powerplay.pp_funcs->print_clock_levels)
@@ -1558,8 +1467,6 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
        else
                size = snprintf(buf, PAGE_SIZE, "\n");
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1576,7 +1483,7 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
        int ret;
        uint32_t mask = 0;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = amdgpu_read_mask(buf, count, &mask);
@@ -1589,8 +1496,6 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev))
                ret = smu_force_clk_levels(&adev->smu, SMU_PCIE, mask);
        else if (adev->powerplay.pp_funcs->force_clock_level)
@@ -1598,8 +1503,6 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
        else
                ret = 0;
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1618,7 +1521,7 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
        uint32_t value = 0;
        int ret;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = pm_runtime_get_sync(ddev->dev);
@@ -1627,15 +1530,11 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev))
                value = smu_get_od_percentage(&(adev->smu), SMU_OD_SCLK);
        else if (adev->powerplay.pp_funcs->get_sclk_od)
                value = amdgpu_dpm_get_sclk_od(adev);
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1652,7 +1551,7 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
        int ret;
        long int value;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = kstrtol(buf, 0, &value);
@@ -1666,8 +1565,6 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev)) {
                value = smu_set_od_percentage(&(adev->smu), SMU_OD_SCLK, (uint32_t)value);
        } else {
@@ -1682,8 +1579,6 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
                }
        }
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1699,7 +1594,7 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
        uint32_t value = 0;
        int ret;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = pm_runtime_get_sync(ddev->dev);
@@ -1708,15 +1603,11 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev))
                value = smu_get_od_percentage(&(adev->smu), SMU_OD_MCLK);
        else if (adev->powerplay.pp_funcs->get_mclk_od)
                value = amdgpu_dpm_get_mclk_od(adev);
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1733,7 +1624,7 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
        int ret;
        long int value;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = kstrtol(buf, 0, &value);
@@ -1747,8 +1638,6 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev)) {
                value = smu_set_od_percentage(&(adev->smu), SMU_OD_MCLK, (uint32_t)value);
        } else {
@@ -1763,8 +1652,6 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
                }
        }
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1800,7 +1687,7 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
        ssize_t size;
        int ret;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = pm_runtime_get_sync(ddev->dev);
@@ -1809,8 +1696,6 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev))
                size = smu_get_power_profile_mode(&adev->smu, buf);
        else if (adev->powerplay.pp_funcs->get_power_profile_mode)
@@ -1818,8 +1703,6 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
        else
                size = snprintf(buf, PAGE_SIZE, "\n");
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1844,7 +1727,7 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
        long int profile_mode = 0;
        const char delimiter[3] = {' ', '\n', '\0'};
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        tmp[0] = *(buf);
@@ -1878,15 +1761,11 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev))
                ret = smu_set_power_profile_mode(&adev->smu, parameter, parameter_size, true);
        else if (adev->powerplay.pp_funcs->set_power_profile_mode)
                ret = amdgpu_dpm_set_power_profile_mode(adev, parameter, parameter_size);
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1912,7 +1791,7 @@ static ssize_t amdgpu_get_gpu_busy_percent(struct device *dev,
        struct amdgpu_device *adev = ddev->dev_private;
        int r, value, size = sizeof(value);
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        r = pm_runtime_get_sync(ddev->dev);
@@ -1921,11 +1800,9 @@ static ssize_t amdgpu_get_gpu_busy_percent(struct device *dev,
                return r;
        }
 
-       down_read(&adev->reset_sem);
        /* read the IP busy sensor */
        r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD,
                                   (void *)&value, &size);
-       up_read(&adev->reset_sem);
 
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
@@ -1952,7 +1829,7 @@ static ssize_t amdgpu_get_mem_busy_percent(struct device *dev,
        struct amdgpu_device *adev = ddev->dev_private;
        int r, value, size = sizeof(value);
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        r = pm_runtime_get_sync(ddev->dev);
@@ -1961,14 +1838,10 @@ static ssize_t amdgpu_get_mem_busy_percent(struct device *dev,
                return r;
        }
 
-       down_read(&adev->reset_sem);
-
        /* read the IP busy sensor */
        r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD,
                                   (void *)&value, &size);
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
@@ -1999,7 +1872,7 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
        uint64_t count0 = 0, count1 = 0;
        int ret;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        if (adev->flags & AMD_IS_APU)
@@ -2014,12 +1887,8 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        amdgpu_asic_get_pcie_usage(adev, &count0, &count1);
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(ddev->dev);
        pm_runtime_put_autosuspend(ddev->dev);
 
@@ -2044,7 +1913,7 @@ static ssize_t amdgpu_get_unique_id(struct device *dev,
        struct drm_device *ddev = dev_get_drvdata(dev);
        struct amdgpu_device *adev = ddev->dev_private;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        if (adev->unique_id)
@@ -2142,7 +2011,7 @@ static ssize_t amdgpu_get_gpu_metrics(struct device *dev,
        ssize_t size = 0;
        int ret;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = pm_runtime_get_sync(ddev->dev);
@@ -2151,12 +2020,10 @@ static ssize_t amdgpu_get_gpu_metrics(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
        if (is_support_sw_smu(adev))
                size = smu_sys_get_gpu_metrics(&adev->smu, &gpu_metrics);
        else if (adev->powerplay.pp_funcs->get_gpu_metrics)
                size = amdgpu_dpm_get_gpu_metrics(adev, &gpu_metrics);
-       up_read(&adev->reset_sem);
 
        if (size <= 0)
                goto out;
@@ -2368,7 +2235,7 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
        int channel = to_sensor_dev_attr(attr)->index;
        int r, temp = 0, size = sizeof(temp);
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        if (channel >= PP_TEMP_MAX)
@@ -2380,8 +2247,6 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
                return r;
        }
 
-       down_read(&adev->reset_sem);
-
        switch (channel) {
        case PP_TEMP_JUNCTION:
                /* get current junction temperature */
@@ -2403,8 +2268,6 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
                break;
        }
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -2508,7 +2371,7 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
        u32 pwm_mode = 0;
        int ret;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = pm_runtime_get_sync(adev->ddev->dev);
@@ -2517,23 +2380,18 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev)) {
                pwm_mode = smu_get_fan_control_mode(&adev->smu);
        } else {
                if (!adev->powerplay.pp_funcs->get_fan_control_mode) {
                        pm_runtime_mark_last_busy(adev->ddev->dev);
                        pm_runtime_put_autosuspend(adev->ddev->dev);
-                       up_read(&adev->reset_sem);
                        return -EINVAL;
                }
 
                pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
        }
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -2549,7 +2407,7 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
        int err, ret;
        int value;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        err = kstrtoint(buf, 10, &value);
@@ -2562,23 +2420,18 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev)) {
                smu_set_fan_control_mode(&adev->smu, value);
        } else {
                if (!adev->powerplay.pp_funcs->set_fan_control_mode) {
                        pm_runtime_mark_last_busy(adev->ddev->dev);
                        pm_runtime_put_autosuspend(adev->ddev->dev);
-                       up_read(&adev->reset_sem);
                        return -EINVAL;
                }
 
                amdgpu_dpm_set_fan_control_mode(adev, value);
        }
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -2608,7 +2461,7 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
        u32 value;
        u32 pwm_mode;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        err = pm_runtime_get_sync(adev->ddev->dev);
@@ -2617,15 +2470,11 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
                return err;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev))
                pwm_mode = smu_get_fan_control_mode(&adev->smu);
        else
                pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
 
-       up_read(&adev->reset_sem);
-
        if (pwm_mode != AMD_FAN_CTRL_MANUAL) {
                pr_info("manual fan speed control should be enabled first\n");
                pm_runtime_mark_last_busy(adev->ddev->dev);
@@ -2666,7 +2515,7 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
        int err;
        u32 speed = 0;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        err = pm_runtime_get_sync(adev->ddev->dev);
@@ -2675,8 +2524,6 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
                return err;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev))
                err = smu_get_fan_speed_percent(&adev->smu, &speed);
        else if (adev->powerplay.pp_funcs->get_fan_speed_percent)
@@ -2684,8 +2531,6 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
        else
                err = -EINVAL;
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -2705,7 +2550,7 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
        int err;
        u32 speed = 0;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        err = pm_runtime_get_sync(adev->ddev->dev);
@@ -2714,8 +2559,6 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
                return err;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev))
                err = smu_get_fan_speed_rpm(&adev->smu, &speed);
        else if (adev->powerplay.pp_funcs->get_fan_speed_rpm)
@@ -2723,8 +2566,6 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
        else
                err = -EINVAL;
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -2743,7 +2584,7 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev,
        u32 size = sizeof(min_rpm);
        int r;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        r = pm_runtime_get_sync(adev->ddev->dev);
@@ -2752,13 +2593,9 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev,
                return r;
        }
 
-       down_read(&adev->reset_sem);
-
        r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM,
                                   (void *)&min_rpm, &size);
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -2777,7 +2614,7 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev,
        u32 size = sizeof(max_rpm);
        int r;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        r = pm_runtime_get_sync(adev->ddev->dev);
@@ -2786,13 +2623,9 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev,
                return r;
        }
 
-       down_read(&adev->reset_sem);
-
        r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM,
                                   (void *)&max_rpm, &size);
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -2810,7 +2643,7 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
        int err;
        u32 rpm = 0;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        err = pm_runtime_get_sync(adev->ddev->dev);
@@ -2819,8 +2652,6 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
                return err;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev))
                err = smu_get_fan_speed_rpm(&adev->smu, &rpm);
        else if (adev->powerplay.pp_funcs->get_fan_speed_rpm)
@@ -2828,8 +2659,6 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev,
        else
                err = -EINVAL;
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -2848,7 +2677,7 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
        u32 value;
        u32 pwm_mode;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        err = pm_runtime_get_sync(adev->ddev->dev);
@@ -2857,15 +2686,11 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
                return err;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev))
                pwm_mode = smu_get_fan_control_mode(&adev->smu);
        else
                pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
 
-       up_read(&adev->reset_sem);
-
        if (pwm_mode != AMD_FAN_CTRL_MANUAL) {
                pm_runtime_mark_last_busy(adev->ddev->dev);
                pm_runtime_put_autosuspend(adev->ddev->dev);
@@ -2879,8 +2704,6 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
                return err;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev))
                err = smu_set_fan_speed_rpm(&adev->smu, value);
        else if (adev->powerplay.pp_funcs->set_fan_speed_rpm)
@@ -2888,8 +2711,6 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev,
        else
                err = -EINVAL;
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -2907,7 +2728,7 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,
        u32 pwm_mode = 0;
        int ret;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        ret = pm_runtime_get_sync(adev->ddev->dev);
@@ -2916,23 +2737,18 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev,
                return ret;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev)) {
                pwm_mode = smu_get_fan_control_mode(&adev->smu);
        } else {
                if (!adev->powerplay.pp_funcs->get_fan_control_mode) {
                        pm_runtime_mark_last_busy(adev->ddev->dev);
                        pm_runtime_put_autosuspend(adev->ddev->dev);
-                       up_read(&adev->reset_sem);
                        return -EINVAL;
                }
 
                pwm_mode = amdgpu_dpm_get_fan_control_mode(adev);
        }
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -2949,7 +2765,7 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
        int value;
        u32 pwm_mode;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        err = kstrtoint(buf, 10, &value);
@@ -2969,22 +2785,17 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev,
                return err;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev)) {
                smu_set_fan_control_mode(&adev->smu, pwm_mode);
        } else {
                if (!adev->powerplay.pp_funcs->set_fan_control_mode) {
                        pm_runtime_mark_last_busy(adev->ddev->dev);
                        pm_runtime_put_autosuspend(adev->ddev->dev);
-                       up_read(&adev->reset_sem);
                        return -EINVAL;
                }
                amdgpu_dpm_set_fan_control_mode(adev, pwm_mode);
        }
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -2999,7 +2810,7 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev,
        u32 vddgfx;
        int r, size = sizeof(vddgfx);
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        r = pm_runtime_get_sync(adev->ddev->dev);
@@ -3008,11 +2819,9 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev,
                return r;
        }
 
-       down_read(&adev->reset_sem);
        /* get the voltage */
        r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX,
                                   (void *)&vddgfx, &size);
-       up_read(&adev->reset_sem);
 
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
@@ -3038,7 +2847,7 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
        u32 vddnb;
        int r, size = sizeof(vddnb);
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        /* only APUs have vddnb */
@@ -3051,11 +2860,9 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
                return r;
        }
 
-       down_read(&adev->reset_sem);
        /* get the voltage */
        r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB,
                                   (void *)&vddnb, &size);
-       up_read(&adev->reset_sem);
 
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
@@ -3082,7 +2889,7 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
        int r, size = sizeof(u32);
        unsigned uw;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        r = pm_runtime_get_sync(adev->ddev->dev);
@@ -3091,11 +2898,9 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
                return r;
        }
 
-       down_read(&adev->reset_sem);
        /* get the voltage */
        r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER,
                                   (void *)&query, &size);
-       up_read(&adev->reset_sem);
 
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
@@ -3125,7 +2930,7 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
        ssize_t size;
        int r;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        r = pm_runtime_get_sync(adev->ddev->dev);
@@ -3134,8 +2939,6 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
                return r;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev)) {
                smu_get_power_limit(&adev->smu, &limit, true);
                size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
@@ -3146,8 +2949,6 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
                size = snprintf(buf, PAGE_SIZE, "\n");
        }
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -3163,7 +2964,7 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
        ssize_t size;
        int r;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        r = pm_runtime_get_sync(adev->ddev->dev);
@@ -3172,8 +2973,6 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
                return r;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev)) {
                smu_get_power_limit(&adev->smu, &limit, false);
                size = snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
@@ -3184,8 +2983,6 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
                size = snprintf(buf, PAGE_SIZE, "\n");
        }
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -3202,7 +2999,7 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
        int err;
        u32 value;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        if (amdgpu_sriov_vf(adev))
@@ -3221,8 +3018,6 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
                return err;
        }
 
-       down_read(&adev->reset_sem);
-
        if (is_support_sw_smu(adev))
                err = smu_set_power_limit(&adev->smu, value);
        else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit)
@@ -3230,8 +3025,6 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
        else
                err = -EINVAL;
 
-       up_read(&adev->reset_sem);
-
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
 
@@ -3249,7 +3042,7 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev,
        uint32_t sclk;
        int r, size = sizeof(sclk);
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        r = pm_runtime_get_sync(adev->ddev->dev);
@@ -3258,11 +3051,9 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev,
                return r;
        }
 
-       down_read(&adev->reset_sem);
        /* get the sclk */
        r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK,
                                   (void *)&sclk, &size);
-       up_read(&adev->reset_sem);
 
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
@@ -3288,7 +3079,7 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev,
        uint32_t mclk;
        int r, size = sizeof(mclk);
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        r = pm_runtime_get_sync(adev->ddev->dev);
@@ -3297,11 +3088,9 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev,
                return r;
        }
 
-       down_read(&adev->reset_sem);
        /* get the sclk */
        r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK,
                                   (void *)&mclk, &size);
-       up_read(&adev->reset_sem);
 
        pm_runtime_mark_last_busy(adev->ddev->dev);
        pm_runtime_put_autosuspend(adev->ddev->dev);
@@ -4188,7 +3977,7 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
        u32 flags = 0;
        int r;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EPERM;
 
        r = pm_runtime_get_sync(dev->dev);
@@ -4204,7 +3993,6 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
                return 0;
        }
 
-       down_read(&adev->reset_sem);
        if (!is_support_sw_smu(adev) &&
            adev->powerplay.pp_funcs->debugfs_print_current_performance_level) {
                mutex_lock(&adev->pm.mutex);
@@ -4217,13 +4005,10 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
        } else {
                r = amdgpu_debugfs_pm_info_pp(m, adev);
        }
-       up_read(&adev->reset_sem);
        if (r)
                goto out;
 
-       down_read(&adev->reset_sem);
        amdgpu_device_ip_get_clockgating_state(adev, &flags);
-       up_read(&adev->reset_sem);
 
        seq_printf(m, "Clock Gating Flags Mask: 0x%x\n", flags);
        amdgpu_parse_cg_state(m, flags);
index 116a89990f39e0cedf2e0c6f9577b79495fcc15b..aa1e77c60c0a640259f708232bcce74fc1adc31d 100644 (file)
@@ -1869,7 +1869,7 @@ static int psp_load_smu_fw(struct psp_context *psp)
                return 0;
 
 
-       if (amdgpu_in_reset(adev) && ras && ras->supported) {
+       if (adev->in_gpu_reset && ras && ras->supported) {
                ret = amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_UNLOAD);
                if (ret) {
                        DRM_WARN("Failed to set MP1 state prepare for reload\n");
@@ -1984,7 +1984,7 @@ static int psp_load_fw(struct amdgpu_device *adev)
        int ret;
        struct psp_context *psp = &adev->psp;
 
-       if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) {
+       if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) {
                psp_ring_stop(psp, PSP_RING_TYPE__KM); /* should not destroy ring, only stop */
                goto skip_memalloc;
        }
index cd1403f83dcf88e9a3830e3e70c06c7f92d8944f..f09082578865e958572bbaeeff0974b73333055f 100644 (file)
@@ -2079,7 +2079,7 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
                        amdgpu_ras_request_reset_on_boot(adev,
                                        ras_block->block);
                        return 0;
-               } else if (adev->in_suspend || amdgpu_in_reset(adev)) {
+               } else if (adev->in_suspend || adev->in_gpu_reset) {
                        /* in resume phase, if fail to enable ras,
                         * clean up all ras fs nodes, and disable ras */
                        goto cleanup;
@@ -2088,7 +2088,7 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev,
        }
 
        /* in resume phase, no need to create ras fs node */
-       if (adev->in_suspend || amdgpu_in_reset(adev))
+       if (adev->in_suspend || adev->in_gpu_reset)
                return 0;
 
        if (ih_info->cb) {
index c7421aa32946cbd96338910aed1a6602e1f36686..2d502e98fad0f5c4c2b17759ca07c1e8728b95c0 100644 (file)
@@ -2098,7 +2098,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
        uint64_t size;
        int r;
 
-       if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
+       if (!adev->mman.initialized || adev->in_gpu_reset ||
            adev->mman.buffer_funcs_enabled == enable)
                return;
 
index 039245c98ff87de0daf16c67686a023d2871ac87..183743c5fb7bf7af69ac1350b69929f8190e6995 100644 (file)
@@ -628,8 +628,7 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
        struct amdgpu_firmware_info *ucode = NULL;
 
  /* for baremetal, the ucode is allocated in gtt, so don't need to fill the bo when reset/suspend */
-       if (!amdgpu_sriov_vf(adev) &&
-               (amdgpu_in_reset(adev) || adev->in_suspend))
+       if (!amdgpu_sriov_vf(adev) && (adev->in_gpu_reset || adev->in_suspend))
                return 0;
        /*
         * if SMU loaded firmware, it needn't add SMC, UVD, and VCE
index 1e211544f2dcea2e1fc34e5e7e162611bf677366..ae720a6dc5a0ab8e7d0463ceb1e011e80abfb75d 100644 (file)
@@ -93,7 +93,7 @@ failed_undo:
        amdgpu_ring_undo(ring);
        spin_unlock_irqrestore(&kiq->ring_lock, flags);
 failed_kiq:
-       dev_warn(adev->dev, "failed to write reg %x wait reg %x\n", reg0, reg1);
+       pr_err("failed to write reg %x wait reg %x\n", reg0, reg1);
 }
 
 /**
index b2046c3a404decd1c732d66b0d5af10cd975bb5e..f826945989c727a1a5b23fef67e7541e5d62c86f 100644 (file)
@@ -325,9 +325,9 @@ static inline bool is_virtual_machine(void)
 #define amdgpu_sriov_is_pp_one_vf(adev) \
        ((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
 #define amdgpu_sriov_is_debug(adev) \
-       ((!amdgpu_in_reset(adev)) && adev->virt.tdr_debug)
+       ((!adev->in_gpu_reset) && adev->virt.tdr_debug)
 #define amdgpu_sriov_is_normal(adev) \
-       ((!amdgpu_in_reset(adev)) && (!adev->virt.tdr_debug))
+       ((!adev->in_gpu_reset) && (!adev->virt.tdr_debug))
 
 bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
 void amdgpu_virt_init_setting(struct amdgpu_device *adev);
index 67a756f4337bb4d422fbfa49e3302a006d29d751..cd6e6eb7d966b59b2092b1a423ac45b40aa39607 100644 (file)
@@ -372,7 +372,7 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo
        tmp->hive_id = adev->gmc.xgmi.hive_id;
        INIT_LIST_HEAD(&tmp->device_list);
        mutex_init(&tmp->hive_lock);
-       atomic_set(&tmp->in_reset, 0);
+       mutex_init(&tmp->reset_lock);
        task_barrier_init(&tmp->tb);
 
        if (lock)
@@ -397,7 +397,6 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
                                                hive->hi_req_gpu : adev;
        bool is_hi_req = pstate == AMDGPU_XGMI_PSTATE_MAX_VEGA20;
        bool init_low = hive->pstate == AMDGPU_XGMI_PSTATE_UNKNOWN;
-       bool locked;
 
        /* fw bug so temporarily disable pstate switching */
        return 0;
@@ -405,9 +404,7 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
        if (!hive || adev->asic_type != CHIP_VEGA20)
                return 0;
 
-       locked = atomic_read(&hive->in_reset) ? false : true;
-       if (locked)
-               mutex_lock(&hive->hive_lock);
+       mutex_lock(&hive->hive_lock);
 
        if (is_hi_req)
                hive->hi_req_count++;
@@ -442,8 +439,7 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
                                                        adev : NULL;
        }
 out:
-       if (locked)
-               mutex_unlock(&hive->hive_lock);
+       mutex_unlock(&hive->hive_lock);
        return ret;
 }
 
@@ -598,6 +594,7 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
        if(!(--hive->number_devices)){
                amdgpu_xgmi_sysfs_destroy(adev, hive);
                mutex_destroy(&hive->hive_lock);
+               mutex_destroy(&hive->reset_lock);
        }
 
        return psp_xgmi_terminate(&adev->psp);
index 61720cd4a1ee243070f75df73729abdba2bafa88..6999eab16a72090c184736d432ffc857e4a935d6 100644 (file)
@@ -30,8 +30,7 @@ struct amdgpu_hive_info {
        uint64_t                hive_id;
        struct list_head        device_list;
        int number_devices;
-       struct mutex hive_lock;
-       atomic_t in_reset;
+       struct mutex hive_lock, reset_lock;
        struct kobject *kobj;
        struct device_attribute dev_attr;
        struct amdgpu_device *adev;
index 8341bd965202bfa277bbaf63235873588b99d06c..4cfc786699c7fcaac2b903c463ea85bbd9379812 100644 (file)
@@ -755,7 +755,6 @@ static void atom_op_jump(atom_exec_context *ctx, int *ptr, int arg)
                                /* jiffies wrap around we will just wait a little longer */
                                ctx->last_jump_jiffies = jiffies;
                        }
-                       schedule();
                } else {
                        ctx->last_jump = ctx->start + target;
                        ctx->last_jump_jiffies = jiffies;
index de6e6de418679e6474435a59c2c39543d8e75829..e87d43537013aa34069c99d56c85a50efd8b6a7a 100644 (file)
@@ -6201,7 +6201,7 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
        struct v10_gfx_mqd *mqd = ring->mqd_ptr;
        int mqd_idx = ring - &adev->gfx.gfx_ring[0];
 
-       if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+       if (!adev->in_gpu_reset && !adev->in_suspend) {
                memset((void *)mqd, 0, sizeof(*mqd));
                mutex_lock(&adev->srbm_mutex);
                nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
@@ -6213,7 +6213,7 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring)
                mutex_unlock(&adev->srbm_mutex);
                if (adev->gfx.me.mqd_backup[mqd_idx])
                        memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
-       } else if (amdgpu_in_reset(adev)) {
+       } else if (adev->in_gpu_reset) {
                /* reset mqd with the backup copy */
                if (adev->gfx.me.mqd_backup[mqd_idx])
                        memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd));
@@ -6566,7 +6566,7 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring)
 
        gfx_v10_0_kiq_setting(ring);
 
-       if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
+       if (adev->in_gpu_reset) { /* for GPU_RESET case */
                /* reset MQD to a clean status */
                if (adev->gfx.mec.mqd_backup[mqd_idx])
                        memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
@@ -6602,7 +6602,7 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
        struct v10_compute_mqd *mqd = ring->mqd_ptr;
        int mqd_idx = ring - &adev->gfx.compute_ring[0];
 
-       if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+       if (!adev->in_gpu_reset && !adev->in_suspend) {
                memset((void *)mqd, 0, sizeof(*mqd));
                mutex_lock(&adev->srbm_mutex);
                nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
@@ -6612,7 +6612,7 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
 
                if (adev->gfx.mec.mqd_backup[mqd_idx])
                        memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
-       } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
+       } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
                /* reset MQD to a clean status */
                if (adev->gfx.mec.mqd_backup[mqd_idx])
                        memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
index 7df567a6656dd2933fc14f301fa12081aff07b47..14fd04b699da5533c2d84fe9919be3360fe7548d 100644 (file)
@@ -4633,7 +4633,7 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
 
        gfx_v8_0_kiq_setting(ring);
 
-       if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
+       if (adev->in_gpu_reset) { /* for GPU_RESET case */
                /* reset MQD to a clean status */
                if (adev->gfx.mec.mqd_backup[mqd_idx])
                        memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
@@ -4670,7 +4670,7 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
        struct vi_mqd *mqd = ring->mqd_ptr;
        int mqd_idx = ring - &adev->gfx.compute_ring[0];
 
-       if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+       if (!adev->in_gpu_reset && !adev->in_suspend) {
                memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
                ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
                ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
@@ -4682,7 +4682,7 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
 
                if (adev->gfx.mec.mqd_backup[mqd_idx])
                        memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
-       } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
+       } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
                /* reset MQD to a clean status */
                if (adev->gfx.mec.mqd_backup[mqd_idx])
                        memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
index 93c63ff3b35ee59d6a5175688b4daf61ce47eef0..2c5bb282cc01ce0b16db736b2cdff1dd1e6461c9 100644 (file)
@@ -3686,7 +3686,7 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
 
        gfx_v9_0_kiq_setting(ring);
 
-       if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
+       if (adev->in_gpu_reset) { /* for GPU_RESET case */
                /* reset MQD to a clean status */
                if (adev->gfx.mec.mqd_backup[mqd_idx])
                        memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
@@ -3724,7 +3724,7 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
        struct v9_mqd *mqd = ring->mqd_ptr;
        int mqd_idx = ring - &adev->gfx.compute_ring[0];
 
-       if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+       if (!adev->in_gpu_reset && !adev->in_suspend) {
                memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
                ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
                ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
@@ -3736,7 +3736,7 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
 
                if (adev->gfx.mec.mqd_backup[mqd_idx])
                        memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
-       } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
+       } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
                /* reset MQD to a clean status */
                if (adev->gfx.mec.mqd_backup[mqd_idx])
                        memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
@@ -3930,7 +3930,7 @@ static int gfx_v9_0_hw_fini(void *handle)
        /* Use deinitialize sequence from CAIL when unbinding device from driver,
         * otherwise KIQ is hanging when binding back
         */
-       if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
+       if (!adev->in_gpu_reset && !adev->in_suspend) {
                mutex_lock(&adev->srbm_mutex);
                soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
                                adev->gfx.kiq.ring.pipe,
@@ -4088,7 +4088,7 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
         *
         * also don't wait anymore for IRQ context
         * */
-       if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
+       if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
                goto failed_kiq_read;
 
        might_sleep();
index 9d3b1245a339c378a764aee913b608d4d20ec783..ec8c0af395538cffe54b5d08fa147086d96099f3 100644 (file)
@@ -287,7 +287,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
         */
        if (adev->gfx.kiq.ring.sched.ready &&
            (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
-           !amdgpu_in_reset(adev)) {
+           !adev->in_gpu_reset) {
 
                struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
                const unsigned eng = 17;
@@ -312,7 +312,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 
        if (!adev->mman.buffer_funcs_enabled ||
            !adev->ib_pool_ready ||
-           amdgpu_in_reset(adev) ||
+           adev->in_gpu_reset ||
            ring->sched.ready == false) {
                gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0);
                mutex_unlock(&adev->mman.gtt_window_lock);
index 80c146df338aaea3fc994a8f8f30f7c1519cffef..3ce5c1d2fdf2b50f35f873b2e1e19636afa49a9e 100644 (file)
@@ -434,7 +434,7 @@ static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
        int vmid;
        unsigned int tmp;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EIO;
 
        for (vmid = 1; vmid < 16; vmid++) {
index 9ab65ca7df777f02c519b3c2d3b3a9cee45eac3f..3e6615f9d39c1fcfab0cc9e7f85c77906f4c7e58 100644 (file)
@@ -635,7 +635,7 @@ static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
        int vmid;
        unsigned int tmp;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EIO;
 
        for (vmid = 1; vmid < 16; vmid++) {
index 773ee11b3d173235ef05da63fb022eb27ddd776b..6a780b674018ff6d9fbc040373fd78d83c9edd92 100644 (file)
@@ -501,7 +501,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
         */
        if (adev->gfx.kiq.ring.sched.ready &&
                        (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
-                       !amdgpu_in_reset(adev)) {
+                       !adev->in_gpu_reset) {
                uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
                uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
 
@@ -596,7 +596,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
        struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
        struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 
-       if (amdgpu_in_reset(adev))
+       if (adev->in_gpu_reset)
                return -EIO;
 
        if (ring->sched.ready) {
@@ -633,8 +633,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
                spin_unlock(&adev->gfx.kiq.ring_lock);
                r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
                if (r < 1) {
-                       dev_info(adev->dev,
-                               "wait for kiq fence error: %ld\n", r);
+                       DRM_ERROR("wait for kiq fence error: %ld.\n", r);
                        return -ETIME;
                }
 
index fe31cbeccfe91a20f55ebcc1fc870a8c284826f2..5fd67e1cc2a0465ef469434d29e2585ffcd042f0 100644 (file)
@@ -238,16 +238,20 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
        struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
        struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
        int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
+       int locked;
 
        /* block amdgpu_gpu_recover till msg FLR COMPLETE received,
         * otherwise the mailbox msg will be ruined/reseted by
         * the VF FLR.
         *
-        * we can unlock the reset_sem to allow "amdgpu_job_timedout"
+        * we can unlock the lock_reset to allow "amdgpu_job_timedout"
         * to run gpu_recover() after FLR_NOTIFICATION_CMPL received
         * which means host side had finished this VF's FLR.
         */
-       down_read(&adev->reset_sem);
+       locked = mutex_trylock(&adev->lock_reset);
+       if (locked)
+               adev->in_gpu_reset = true;
+
        do {
                if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
                        goto flr_done;
@@ -257,7 +261,10 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
        } while (timeout > 1);
 
 flr_done:
-       up_read(&adev->reset_sem);
+       if (locked) {
+               adev->in_gpu_reset = false;
+               mutex_unlock(&adev->lock_reset);
+       }
 
        /* Trigger recovery for world switch failure if no TDR */
        if (amdgpu_device_should_recover_gpu(adev)
index 6f55172e8337449c36687b23e37be3318ccbe30a..ce2bf1fb79ed12a2c7a11cb30eb5c8c9f49eebe0 100644 (file)
@@ -259,16 +259,20 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
        struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
        struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
        int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT;
+       int locked;
 
        /* block amdgpu_gpu_recover till msg FLR COMPLETE received,
         * otherwise the mailbox msg will be ruined/reseted by
         * the VF FLR.
         *
-        * we can unlock the reset_sem to allow "amdgpu_job_timedout"
+        * we can unlock the lock_reset to allow "amdgpu_job_timedout"
         * to run gpu_recover() after FLR_NOTIFICATION_CMPL received
         * which means host side had finished this VF's FLR.
         */
-       down_read(&adev->reset_sem);
+       locked = mutex_trylock(&adev->lock_reset);
+       if (locked)
+               adev->in_gpu_reset = true;
+
        do {
                if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
                        goto flr_done;
@@ -278,7 +282,10 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
        } while (timeout > 1);
 
 flr_done:
-       up_read(&adev->reset_sem);
+       if (locked) {
+               adev->in_gpu_reset = false;
+               mutex_unlock(&adev->lock_reset);
+       }
 
        /* Trigger recovery for world switch failure if no TDR */
        if (amdgpu_device_should_recover_gpu(adev)
index 7ad1537820b54dd5a5f3a74f8ed0c06e84cb4756..e0e60b0d0669ebcfce6a33fba36691e2323c103b 100644 (file)
@@ -304,17 +304,15 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
                                struct qcm_process_device *qpd,
                                struct queue *q)
 {
-       if (!dqm->is_resetting) {
-               /* On GFX v7, CP doesn't flush TC at dequeue */
-               if (q->device->device_info->asic_family == CHIP_HAWAII)
-                       if (flush_texture_cache_nocpsch(q->device, qpd))
-                               pr_err("Failed to flush TC\n");
+       /* On GFX v7, CP doesn't flush TC at dequeue */
+       if (q->device->device_info->asic_family == CHIP_HAWAII)
+               if (flush_texture_cache_nocpsch(q->device, qpd))
+                       pr_err("Failed to flush TC\n");
 
-               kfd_flush_tlb(qpd_to_pdd(qpd));
+       kfd_flush_tlb(qpd_to_pdd(qpd));
 
-               /* Release the vmid mapping */
-               set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
-       }
+       /* Release the vmid mapping */
+       set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
        dqm->vmid_pasid[qpd->vmid] = 0;
 
        qpd->vmid = 0;
index ee2258404c8faf730e535d31af779294f92dada2..40695d52e9a8df98256cd82d57c9c5b4c45bf513 100644 (file)
@@ -1551,10 +1551,6 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
 void kfd_flush_tlb(struct kfd_process_device *pdd)
 {
        struct kfd_dev *dev = pdd->dev;
-       struct device_queue_manager *dqm = dev->dqm;
-
-       if (dqm->is_resetting)
-               return;
 
        if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
                /* Nothing to flush until a VMID is assigned, which
index 031b7eb55356962329b104dd1f1ebc9c149382c8..653b4a0d51d8c538cd11aec14d49f3d0796f1f35 100644 (file)
@@ -1658,7 +1658,7 @@ static int dm_suspend(void *handle)
        struct amdgpu_display_manager *dm = &adev->dm;
        int ret = 0;
 
-       if (amdgpu_in_reset(adev)) {
+       if (adev->in_gpu_reset) {
                mutex_lock(&dm->dc_lock);
                dm->cached_dc_state = dc_copy_state(dm->dc->current_state);
 
@@ -1844,7 +1844,7 @@ static int dm_resume(void *handle)
        struct dc_state *dc_state;
        int i, r, j;
 
-       if (amdgpu_in_reset(adev)) {
+       if (adev->in_gpu_reset) {
                dc_state = dm->cached_dc_state;
 
                r = dm_dmub_hw_init(adev);
index f87a73cb7ec5966c2e06a7ff9dce96d6c0c05f1e..7d17c4f1b4893d9ed906f8a9b63194bc7ceb671e 100644 (file)
@@ -1110,7 +1110,7 @@ static int smu_disable_dpms(struct smu_context *smu)
        struct amdgpu_device *adev = smu->adev;
        int ret = 0;
        bool use_baco = !smu->is_apu &&
-               ((amdgpu_in_reset(adev) &&
+               ((adev->in_gpu_reset &&
                  (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) ||
                 ((adev->in_runpm || adev->in_hibernate) && amdgpu_asic_supports_baco(adev)));
 
index da84012b7fd5167c5883fb57ad164883c9b0f639..c7216362b68da1e9805f648c0ab160e19654fb95 100644 (file)
@@ -489,7 +489,7 @@ static int vega20_setup_asic_task(struct pp_hwmgr *hwmgr)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
        int ret = 0;
-       bool use_baco = (amdgpu_in_reset(adev) &&
+       bool use_baco = (adev->in_gpu_reset &&
                         (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) ||
                (adev->in_runpm && amdgpu_asic_supports_baco(adev));