]> git.baikalelectronics.ru Git - kernel.git/commitdiff
drm/amdgpu: allocate entities on demand
authorNirmoy Das <nirmoy.das@amd.com>
Tue, 21 Jan 2020 14:53:53 +0000 (15:53 +0100)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 30 Jan 2020 22:15:42 +0000 (17:15 -0500)
Currently we pre-allocate entities and fences for all the HW IPs on
context creation and some of which are might never be used.

This patch tries to resolve entity/fences wastage by creating entity
only when needed.

v2: allocate memory for entity and fences together

Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h

index 05c2af61e7de93aff6004355597df30bc5ad008f..94a6c42f29ea59a42f62bc1ef1077e21566df3a1 100644 (file)
@@ -42,19 +42,12 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
        [AMDGPU_HW_IP_VCN_JPEG] =       1,
 };
 
-static int amdgpu_ctx_total_num_entities(void)
-{
-       unsigned i, num_entities = 0;
-
-       for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
-               num_entities += amdgpu_ctx_num_entities[i];
-
-       return num_entities;
-}
-
 static int amdgpu_ctx_priority_permit(struct drm_file *filp,
                                      enum drm_sched_priority priority)
 {
+       if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
+               return -EINVAL;
+
        /* NORMAL and below are accessible by everyone */
        if (priority <= DRM_SCHED_PRIORITY_NORMAL)
                return 0;
@@ -68,64 +61,24 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
        return -EACCES;
 }
 
-static int amdgpu_ctx_init(struct amdgpu_device *adev,
-                          enum drm_sched_priority priority,
-                          struct drm_file *filp,
-                          struct amdgpu_ctx *ctx)
+static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const u32 ring)
 {
-       unsigned num_entities = amdgpu_ctx_total_num_entities();
-       unsigned i, j;
+       struct amdgpu_device *adev = ctx->adev;
+       struct amdgpu_ctx_entity *entity;
+       struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
+       unsigned num_scheds = 0;
+       enum drm_sched_priority priority;
        int r;
 
-       if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
-               return -EINVAL;
-
-       r = amdgpu_ctx_priority_permit(filp, priority);
-       if (r)
-               return r;
-
-       memset(ctx, 0, sizeof(*ctx));
-       ctx->adev = adev;
-
-
-       ctx->entities[0] = kcalloc(num_entities,
-                                  sizeof(struct amdgpu_ctx_entity),
-                                  GFP_KERNEL);
-       if (!ctx->entities[0])
-               return -ENOMEM;
-
+       entity = kcalloc(1, offsetof(typeof(*entity), fences[amdgpu_sched_jobs]),
+                        GFP_KERNEL);
+       if (!entity)
+               return  -ENOMEM;
 
-       for (i = 0; i < num_entities; ++i) {
-               struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
-
-               entity->sequence = 1;
-               entity->fences = kcalloc(amdgpu_sched_jobs,
-                                        sizeof(struct dma_fence*), GFP_KERNEL);
-               if (!entity->fences) {
-                       r = -ENOMEM;
-                       goto error_cleanup_memory;
-               }
-       }
-       for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
-               ctx->entities[i] = ctx->entities[i - 1] +
-                       amdgpu_ctx_num_entities[i - 1];
-
-       kref_init(&ctx->refcount);
-       spin_lock_init(&ctx->ring_lock);
-       mutex_init(&ctx->lock);
-
-       ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
-       ctx->reset_counter_query = ctx->reset_counter;
-       ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
-       ctx->init_priority = priority;
-       ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
-
-       for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-               struct drm_gpu_scheduler **scheds;
-               struct drm_gpu_scheduler *sched;
-               unsigned num_scheds = 0;
-
-               switch (i) {
+       entity->sequence = 1;
+       priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
+                               ctx->init_priority : ctx->override_priority;
+       switch (hw_ip) {
                case AMDGPU_HW_IP_GFX:
                        sched = &adev->gfx.gfx_ring[0].sched;
                        scheds = &sched;
@@ -166,63 +119,90 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
                        scheds = adev->jpeg.jpeg_sched;
                        num_scheds =  adev->jpeg.num_jpeg_sched;
                        break;
-               }
-
-               for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
-                       r = drm_sched_entity_init(&ctx->entities[i][j].entity,
-                                                 priority, scheds,
-                                                 num_scheds, &ctx->guilty);
-               if (r)
-                       goto error_cleanup_entities;
        }
 
+       r = drm_sched_entity_init(&entity->entity, priority, scheds, num_scheds,
+                                 &ctx->guilty);
+       if (r)
+               goto error_free_entity;
+
+       ctx->entities[hw_ip][ring] = entity;
        return 0;
 
-error_cleanup_entities:
-       for (i = 0; i < num_entities; ++i)
-               drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+error_free_entity:
+       kfree(entity);
 
-error_cleanup_memory:
-       for (i = 0; i < num_entities; ++i) {
-               struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
+       return r;
+}
 
-               kfree(entity->fences);
-               entity->fences = NULL;
-       }
+static int amdgpu_ctx_init(struct amdgpu_device *adev,
+                          enum drm_sched_priority priority,
+                          struct drm_file *filp,
+                          struct amdgpu_ctx *ctx)
+{
+       int r;
 
-       kfree(ctx->entities[0]);
-       ctx->entities[0] = NULL;
-       return r;
+       r = amdgpu_ctx_priority_permit(filp, priority);
+       if (r)
+               return r;
+
+       memset(ctx, 0, sizeof(*ctx));
+
+       ctx->adev = adev;
+
+       kref_init(&ctx->refcount);
+       spin_lock_init(&ctx->ring_lock);
+       mutex_init(&ctx->lock);
+
+       ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
+       ctx->reset_counter_query = ctx->reset_counter;
+       ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
+       ctx->init_priority = priority;
+       ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
+
+       return 0;
+
+}
+
+static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
+{
+
+       int i;
+
+       if (!entity)
+               return;
+
+       for (i = 0; i < amdgpu_sched_jobs; ++i)
+               dma_fence_put(entity->fences[i]);
+
+       kfree(entity);
 }
 
 static void amdgpu_ctx_fini(struct kref *ref)
 {
        struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
-       unsigned num_entities = amdgpu_ctx_total_num_entities();
        struct amdgpu_device *adev = ctx->adev;
        unsigned i, j;
 
        if (!adev)
                return;
 
-       for (i = 0; i < num_entities; ++i) {
-               struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
-
-               for (j = 0; j < amdgpu_sched_jobs; ++j)
-                       dma_fence_put(entity->fences[j]);
-
-               kfree(entity->fences);
+       for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+               for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
+                       amdgpu_ctx_fini_entity(ctx->entities[i][j]);
+                       ctx->entities[i][j] = NULL;
+               }
        }
 
-       kfree(ctx->entities[0]);
        mutex_destroy(&ctx->lock);
-
        kfree(ctx);
 }
 
 int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
                          u32 ring, struct drm_sched_entity **entity)
 {
+       int r;
+
        if (hw_ip >= AMDGPU_HW_IP_NUM) {
                DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
                return -EINVAL;
@@ -239,7 +219,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
                return -EINVAL;
        }
 
-       *entity = &ctx->entities[hw_ip][ring].entity;
+       if (ctx->entities[hw_ip][ring] == NULL) {
+               r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
+               if (r)
+                       return r;
+       }
+
+       *entity = &ctx->entities[hw_ip][ring]->entity;
        return 0;
 }
 
@@ -279,14 +265,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 static void amdgpu_ctx_do_release(struct kref *ref)
 {
        struct amdgpu_ctx *ctx;
-       unsigned num_entities;
-       u32 i;
+       u32 i, j;
 
        ctx = container_of(ref, struct amdgpu_ctx, refcount);
+       for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+               for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+                       if (!ctx->entities[i][j])
+                               continue;
 
-       num_entities = amdgpu_ctx_total_num_entities();
-       for (i = 0; i < num_entities; i++)
-               drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+                       drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
+               }
+       }
 
        amdgpu_ctx_fini(ref);
 }
@@ -516,19 +505,23 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
 void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
                                  enum drm_sched_priority priority)
 {
-       unsigned num_entities = amdgpu_ctx_total_num_entities();
        enum drm_sched_priority ctx_prio;
-       unsigned i;
+       unsigned i, j;
 
        ctx->override_priority = priority;
 
        ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
                        ctx->init_priority : ctx->override_priority;
+       for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+               for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+                       struct drm_sched_entity *entity;
 
-       for (i = 0; i < num_entities; i++) {
-               struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
+                       if (!ctx->entities[i][j])
+                               continue;
 
-               drm_sched_entity_set_priority(entity, ctx_prio);
+                       entity = &ctx->entities[i][j]->entity;
+                       drm_sched_entity_set_priority(entity, ctx_prio);
+               }
        }
 }
 
@@ -564,20 +557,24 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
 
 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
 {
-       unsigned num_entities = amdgpu_ctx_total_num_entities();
        struct amdgpu_ctx *ctx;
        struct idr *idp;
-       uint32_t id, i;
+       uint32_t id, i, j;
 
        idp = &mgr->ctx_handles;
 
        mutex_lock(&mgr->lock);
        idr_for_each_entry(idp, ctx, id) {
-               for (i = 0; i < num_entities; i++) {
-                       struct drm_sched_entity *entity;
+               for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+                       for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+                               struct drm_sched_entity *entity;
+
+                               if (!ctx->entities[i][j])
+                                       continue;
 
-                       entity = &ctx->entities[0][i].entity;
-                       timeout = drm_sched_entity_flush(entity, timeout);
+                               entity = &ctx->entities[i][j]->entity;
+                               timeout = drm_sched_entity_flush(entity, timeout);
+                       }
                }
        }
        mutex_unlock(&mgr->lock);
@@ -586,10 +583,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
 
 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 {
-       unsigned num_entities = amdgpu_ctx_total_num_entities();
        struct amdgpu_ctx *ctx;
        struct idr *idp;
-       uint32_t id, i;
+       uint32_t id, i, j;
 
        idp = &mgr->ctx_handles;
 
@@ -599,8 +595,17 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
                        continue;
                }
 
-               for (i = 0; i < num_entities; i++)
-                       drm_sched_entity_fini(&ctx->entities[0][i].entity);
+               for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+                       for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+                               struct drm_sched_entity *entity;
+
+                               if (!ctx->entities[i][j])
+                                       continue;
+
+                               entity = &ctx->entities[i][j]->entity;
+                               drm_sched_entity_fini(entity);
+                       }
+               }
        }
 }
 
index a6cd9d4b078c3c61e8606be2d32b29a794d20fec..de490f183af2bcf115182e25a24f8b9d8a34bf17 100644 (file)
@@ -29,10 +29,12 @@ struct drm_device;
 struct drm_file;
 struct amdgpu_fpriv;
 
+#define AMDGPU_MAX_ENTITY_NUM 4
+
 struct amdgpu_ctx_entity {
        uint64_t                sequence;
-       struct dma_fence        **fences;
        struct drm_sched_entity entity;
+       struct dma_fence        *fences[];
 };
 
 struct amdgpu_ctx {
@@ -42,7 +44,7 @@ struct amdgpu_ctx {
        unsigned                        reset_counter_query;
        uint32_t                        vram_lost_counter;
        spinlock_t                      ring_lock;
-       struct amdgpu_ctx_entity        *entities[AMDGPU_HW_IP_NUM];
+       struct amdgpu_ctx_entity        *entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM];
        bool                            preamble_presented;
        enum drm_sched_priority         init_priority;
        enum drm_sched_priority         override_priority;