drm/amdgpu: rework dma_resv handling v3

author Christian König <christian.koenig@amd.com>

Wed, 9 Jun 2021 11:51:36 +0000 (13:51 +0200)

committer Christian König <christian.koenig@amd.com>

Tue, 22 Jun 2021 09:05:05 +0000 (11:05 +0200)
author Christian König <christian.koenig@amd.com>
Wed, 9 Jun 2021 11:51:36 +0000 (13:51 +0200)
committer Christian König <christian.koenig@amd.com>
Tue, 22 Jun 2021 09:05:05 +0000 (11:05 +0200)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h

index a130e766cbdbe24de4b4b21cf678bd7c5acaa272..c905a4cfc173d8ba57b04803fab48135f910f76d 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -34,6 +34,7 @@ struct amdgpu_fpriv;
  struct amdgpu_bo_list_entry {
         struct ttm_validate_buffer      tv;
         struct amdgpu_bo_va             *bo_va;
+       struct dma_fence_chain          *chain;
         uint32_t                        priority;
         struct page                     **user_pages;
         bool                            user_invalidated;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

index 9ce649a1a8d37bf6326ba2f9dfc43dbea19ed35d..25655414e9c00d5b0b2851b98f4396512637b5ec 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -572,6 +572,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
                 goto out;
         }
  
+       amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+               struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+
+               e->bo_va = amdgpu_vm_bo_find(vm, bo);
+
+               if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
+                       e->chain = dma_fence_chain_alloc();
+                       if (!e->chain) {
+                               r = -ENOMEM;
+                               goto error_validate;
+                       }
+               }
+       }
+
         amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
                                           &p->bytes_moved_vis_threshold);
         p->bytes_moved = 0;
@@ -599,15 +613,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
         gws = p->bo_list->gws_obj;
         oa = p->bo_list->oa_obj;
  
-       amdgpu_bo_list_for_each_entry(e, p->bo_list) {
-               struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
-
-               /* Make sure we use the exclusive slot for shared BOs */
-               if (bo->prime_shared_count)
-                       e->tv.num_shared = 0;
-               e->bo_va = amdgpu_vm_bo_find(vm, bo);
-       }
-
         if (gds) {
                 p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
                 p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
@@ -629,8 +634,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
         }
  
  error_validate:
-       if (r)
+       if (r) {
+               amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+                       dma_fence_chain_free(e->chain);
+                       e->chain = NULL;
+               }
                 ttm_eu_backoff_reservation(&p->ticket, &p->validated);
+       }
  out:
         return r;
  }
@@ -670,9 +680,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
  {
         unsigned i;
  
-       if (error && backoff)
+       if (error && backoff) {
+               struct amdgpu_bo_list_entry *e;
+
+               amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
+                       dma_fence_chain_free(e->chain);
+                       e->chain = NULL;
+               }
+
                 ttm_eu_backoff_reservation(&parser->ticket,
                                            &parser->validated);
+       }
  
         for (i = 0; i < parser->num_post_deps; i++) {
                 drm_syncobj_put(parser->post_deps[i].syncobj);
@@ -1245,6 +1263,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
  
         amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
  
+       amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+               struct dma_resv *resv = e->tv.bo->base.resv;
+               struct dma_fence_chain *chain = e->chain;
+
+               if (!chain)
+                       continue;
+
+               /*
+                * Work around dma_resv shortcommings by wrapping up the
+                * submission in a dma_fence_chain and add it as exclusive
+                * fence, but first add the submission as shared fence to make
+                * sure that shared fences never signal before the exclusive
+                * one.
+                */
+               dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
+                                    dma_fence_get(p->fence), 1);
+
+               dma_resv_add_shared_fence(resv, p->fence);
+               rcu_assign_pointer(resv->fence_excl, &chain->base);
+               e->chain = NULL;
+       }
+
         ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
         mutex_unlock(&p->adev->notifier_lock);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c

index c3053b83b80cdd47b5d6604a60fd9fa2b97d88cc..23219fc3b28cd3a26c4259afcceebe63a9cef54c 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -42,48 +42,6 @@
  #include <linux/pci-p2pdma.h>
  #include <linux/pm_runtime.h>
  
-static int
-__dma_resv_make_exclusive(struct dma_resv *obj)
-{
-       struct dma_fence **fences;
-       unsigned int count;
-       int r;
-
-       if (!dma_resv_shared_list(obj)) /* no shared fences to convert */
-               return 0;
-
-       r = dma_resv_get_fences(obj, NULL, &count, &fences);
-       if (r)
-               return r;
-
-       if (count == 0) {
-               /* Now that was unexpected. */
-       } else if (count == 1) {
-               dma_resv_add_excl_fence(obj, fences[0]);
-               dma_fence_put(fences[0]);
-               kfree(fences);
-       } else {
-               struct dma_fence_array *array;
-
-               array = dma_fence_array_create(count, fences,
-                                              dma_fence_context_alloc(1), 0,
-                                              false);
-               if (!array)
-                       goto err_fences_put;
-
-               dma_resv_add_excl_fence(obj, &array->base);
-               dma_fence_put(&array->base);
-       }
-
-       return 0;
-
-err_fences_put:
-       while (count--)
-               dma_fence_put(fences[count]);
-       kfree(fences);
-       return -ENOMEM;
-}
-
  /**
   * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
   *
@@ -110,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
         if (r < 0)
                 goto out;
  
-       r = amdgpu_bo_reserve(bo, false);
-       if (unlikely(r != 0))
-               goto out;
-
-       /*
-        * We only create shared fences for internal use, but importers
-        * of the dmabuf rely on exclusive fences for implicitly
-        * tracking write hazards. As any of the current fences may
-        * correspond to a write, we need to convert all existing
-        * fences on the reservation object into a single exclusive
-        * fence.
-        */
-       r = __dma_resv_make_exclusive(bo->tbo.base.resv);
-       if (r)
-               goto out;
-
-       bo->prime_shared_count++;
-       amdgpu_bo_unreserve(bo);
         return 0;
  
  out:
@@ -150,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
         struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
         struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
  
-       if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
-               bo->prime_shared_count--;
-
         pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
         pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
  }
@@ -406,8 +343,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
         bo = gem_to_amdgpu_bo(gobj);
         bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
         bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
-       if (dma_buf->ops != &amdgpu_dmabuf_ops)
-               bo->prime_shared_count = 1;
  
         dma_resv_unlock(resv);
         return gobj;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

index 9cf4beaf646c7226c9d6f4454060f2f046d2e7c2..0780e8d189928cce74e337524b27c1afdda05b4c 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -829,7 +829,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
                 break;
         }
         case AMDGPU_GEM_OP_SET_PLACEMENT:
-               if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) {
+               if (robj->tbo.base.import_attach &&
+                   args->value & AMDGPU_GEM_DOMAIN_VRAM) {
                         r = -EINVAL;
                         amdgpu_bo_unreserve(robj);
                         break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

index b7a2070d90af2332e1164e4aca4c84633f20fe8a..d13490975ac37f07912f9ec6b08d6a79b7b38e55 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -892,7 +892,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
                 return -EINVAL;
  
         /* A shared bo cannot be migrated to VRAM */
-       if (bo->prime_shared_count || bo->tbo.base.import_attach) {
+       if (bo->tbo.base.import_attach) {
                 if (domain & AMDGPU_GEM_DOMAIN_GTT)
                         domain = AMDGPU_GEM_DOMAIN_GTT;
                 else
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h

index 126df03a70666df6fa68e3d3c01e05f720054803..c03dfd298f453b45c5dbcd1b1c0219fca3ecbfa6 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -99,7 +99,6 @@ struct amdgpu_bo {
         struct ttm_buffer_object        tbo;
         struct ttm_bo_kmap_obj          kmap;
         u64                             flags;
-       unsigned                        prime_shared_count;
         /* per VM structure for page tables and with virtual addresses */
         struct amdgpu_vm_bo_base        *vm_bo;
         /* Constant after initialization */
author	Christian König <christian.koenig@amd.com>
	Wed, 9 Jun 2021 11:51:36 +0000 (13:51 +0200)
committer	Christian König <christian.koenig@amd.com>
	Tue, 22 Jun 2021 09:05:05 +0000 (11:05 +0200)
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_object.h		patch \| blob \| history