]> git.baikalelectronics.ru Git - kernel.git/commitdiff
drm/amdgpu: Add DMA mapping of GTT BOs
authorFelix Kuehling <Felix.Kuehling@amd.com>
Sun, 11 Apr 2021 22:52:19 +0000 (18:52 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 20 May 2021 02:44:06 +0000 (22:44 -0400)
Use DMABufs with dynamic attachment to DMA-map GTT BOs on other GPUs.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Oak Zeng <Oak.Zeng@amd.com>
Acked-by: Ramesh Errabolu <Ramesh.Errabolu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

index a83ac39afdd1d4a775bb0296b4e96f55569df829..81264517d532abbc99cd5bf2d7a956fb8d4b88b6 100644 (file)
@@ -47,6 +47,7 @@ struct amdgpu_device;
 enum kfd_mem_attachment_type {
        KFD_MEM_ATT_SHARED,     /* Share kgd_mem->bo or another attachment's */
        KFD_MEM_ATT_USERPTR,    /* SG bo to DMA map pages from a userptr bo */
+       KFD_MEM_ATT_DMABUF,     /* DMAbuf to DMA map TTM BOs */
 };
 
 struct kfd_mem_attachment {
@@ -62,6 +63,7 @@ struct kfd_mem_attachment {
 struct kgd_mem {
        struct mutex lock;
        struct amdgpu_bo *bo;
+       struct dma_buf *dmabuf;
        struct list_head attachments;
        /* protected by amdkfd_process_info.lock */
        struct ttm_validate_buffer validate_list;
index 4fb180d1c758d30fedeccac69b85cdc76d8d8b74..bc838c319cb587f33df1929a738eefd6d64659ed 100644 (file)
@@ -529,6 +529,16 @@ free_sg:
        return ret;
 }
 
+static int
+kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment)
+{
+       struct ttm_operation_ctx ctx = {.interruptible = true};
+       struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+
+       amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+       return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+}
+
 static int
 kfd_mem_dmamap_attachment(struct kgd_mem *mem,
                          struct kfd_mem_attachment *attachment)
@@ -538,6 +548,8 @@ kfd_mem_dmamap_attachment(struct kgd_mem *mem,
                return 0;
        case KFD_MEM_ATT_USERPTR:
                return kfd_mem_dmamap_userptr(mem, attachment);
+       case KFD_MEM_ATT_DMABUF:
+               return kfd_mem_dmamap_dmabuf(attachment);
        default:
                WARN_ON_ONCE(1);
        }
@@ -567,6 +579,19 @@ kfd_mem_dmaunmap_userptr(struct kgd_mem *mem,
        ttm->sg = NULL;
 }
 
+static void
+kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment)
+{
+       struct ttm_operation_ctx ctx = {.interruptible = true};
+       struct amdgpu_bo *bo = attachment->bo_va->base.bo;
+
+       amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+       ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+       /* FIXME: This does not guarantee that amdgpu_ttm_tt_unpopulate is
+        * called
+        */
+}
+
 static void
 kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
                            struct kfd_mem_attachment *attachment)
@@ -577,6 +602,9 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
        case KFD_MEM_ATT_USERPTR:
                kfd_mem_dmaunmap_userptr(mem, attachment);
                break;
+       case KFD_MEM_ATT_DMABUF:
+               kfd_mem_dmaunmap_dmabuf(attachment);
+               break;
        default:
                WARN_ON_ONCE(1);
        }
@@ -610,6 +638,38 @@ kfd_mem_attach_userptr(struct amdgpu_device *adev, struct kgd_mem *mem,
        return 0;
 }
 
+static int
+kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
+                     struct amdgpu_bo **bo)
+{
+       struct drm_gem_object *gobj;
+
+       if (!mem->dmabuf) {
+               mem->dmabuf = amdgpu_gem_prime_export(&mem->bo->tbo.base,
+                       mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
+                               DRM_RDWR : 0);
+               if (IS_ERR(mem->dmabuf)) {
+                       mem->dmabuf = NULL;
+                       return PTR_ERR(mem->dmabuf);
+               }
+       }
+
+       gobj = amdgpu_gem_prime_import(&adev->ddev, mem->dmabuf);
+       if (IS_ERR(gobj))
+               return PTR_ERR(gobj);
+
+       /* Import takes an extra reference on the dmabuf. Drop it now to
+        * avoid leaking it. We only need the one reference in
+        * kgd_mem->dmabuf.
+        */
+       dma_buf_put(mem->dmabuf);
+
+       *bo = gem_to_amdgpu_bo(gobj);
+       (*bo)->parent = amdgpu_bo_ref(mem->bo);
+
+       return 0;
+}
+
 /* kfd_mem_attach - Add a BO to a VM
  *
  * Everything that needs to bo done only once when a BO is first added
@@ -667,8 +727,20 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
                        ret = kfd_mem_attach_userptr(adev, mem, &bo[i]);
                        if (ret)
                                goto unwind;
+               } else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT &&
+                          mem->bo->tbo.type != ttm_bo_type_sg) {
+                       /* GTT BOs use DMA-mapping ability of dynamic-attach
+                        * DMA bufs. TODO: The same should work for VRAM on
+                        * large-BAR GPUs.
+                        */
+                       attachment[i]->type = KFD_MEM_ATT_DMABUF;
+                       ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]);
+                       if (ret)
+                               goto unwind;
                } else {
-                       /* FIXME: Need to DMA-map other BO types */
+                       /* FIXME: Need to DMA-map other BO types:
+                        * large-BAR VRAM, doorbells, MMIO remap
+                        */
                        attachment[i]->type = KFD_MEM_ATT_SHARED;
                        bo[i] = mem->bo;
                        drm_gem_object_get(&bo[i]->tbo.base);
@@ -1527,6 +1599,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 
        /* Free the BO*/
        drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
+       if (mem->dmabuf)
+               dma_buf_put(mem->dmabuf);
        drm_gem_object_put(&mem->bo->tbo.base);
        mutex_destroy(&mem->lock);
        kfree(mem);