drm/amdkfd: Enable over-subscription with >1 GWS queue

author Joseph Greathouse <Joseph.Greathouse@amd.com>

Wed, 18 Sep 2019 19:49:57 +0000 (14:49 -0500)

committer Alex Deucher <alexander.deucher@amd.com>

Tue, 28 Apr 2020 20:20:30 +0000 (16:20 -0400)
author Joseph Greathouse <Joseph.Greathouse@amd.com>
Wed, 18 Sep 2019 19:49:57 +0000 (14:49 -0500)
committer Alex Deucher <alexander.deucher@amd.com>
Tue, 28 Apr 2020 20:20:30 +0000 (16:20 -0400)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c

index f8fa03a12add711f4ac76647a36656bb7f74a6ed..ff47b1f69b68be49cc0ec7b19bfc598d9de48f9c 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -215,6 +215,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
         }
  
         q_properties->is_interop = false;
+       q_properties->is_gws = false;
         q_properties->queue_percent = args->queue_percentage;
         q_properties->priority = args->queue_priority;
         q_properties->queue_address = args->ring_base_address;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

index 77ea0f0cb163b93d2819368214592b50e7466fe0..ae954779181303f0131a60a771e10b9b4626e33c 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -505,8 +505,13 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
                 deallocate_vmid(dqm, qpd, q);
         }
         qpd->queue_count--;
-       if (q->properties.is_active)
+       if (q->properties.is_active) {
                 decrement_queue_count(dqm, q->properties.type);
+               if (q->properties.is_gws) {
+                       dqm->gws_queue_count--;
+                       qpd->mapped_gws_queue = false;
+               }
+       }
  
         return retval;
  }
@@ -583,6 +588,20 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
         else if (!q->properties.is_active && prev_active)
                 decrement_queue_count(dqm, q->properties.type);
  
+       if (q->gws && !q->properties.is_gws) {
+               if (q->properties.is_active) {
+                       dqm->gws_queue_count++;
+                       pdd->qpd.mapped_gws_queue = true;
+               }
+               q->properties.is_gws = true;
+       } else if (!q->gws && q->properties.is_gws) {
+               if (q->properties.is_active) {
+                       dqm->gws_queue_count--;
+                       pdd->qpd.mapped_gws_queue = false;
+               }
+               q->properties.is_gws = false;
+       }
+
         if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
                 retval = map_queues_cpsch(dqm);
         else if (q->properties.is_active &&
@@ -631,6 +650,10 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
                                 q->properties.type)];
                 q->properties.is_active = false;
                 decrement_queue_count(dqm, q->properties.type);
+               if (q->properties.is_gws) {
+                       dqm->gws_queue_count--;
+                       qpd->mapped_gws_queue = false;
+               }
  
                 if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
                         continue;
@@ -744,6 +767,10 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
                                 q->properties.type)];
                 q->properties.is_active = true;
                 increment_queue_count(dqm, q->properties.type);
+               if (q->properties.is_gws) {
+                       dqm->gws_queue_count++;
+                       qpd->mapped_gws_queue = true;
+               }
  
                 if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
                         continue;
@@ -913,6 +940,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
         INIT_LIST_HEAD(&dqm->queues);
         dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
         dqm->active_cp_queue_count = 0;
+       dqm->gws_queue_count = 0;
  
         for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
                 int pipe_offset = pipe * get_queues_per_pipe(dqm);
@@ -1082,7 +1110,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
         INIT_LIST_HEAD(&dqm->queues);
         dqm->active_queue_count = dqm->processes_count = 0;
         dqm->active_cp_queue_count = 0;
-
+       dqm->gws_queue_count = 0;
         dqm->active_runlist = false;
         dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
         dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
@@ -1432,6 +1460,10 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
                                 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
                 if (retval == -ETIME)
                         qpd->reset_wavefronts = true;
+               if (q->properties.is_gws) {
+                       dqm->gws_queue_count--;
+                       qpd->mapped_gws_queue = false;
+               }
         }
  
         /*
@@ -1650,8 +1682,13 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
                 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
                         deallocate_sdma_queue(dqm, q);
  
-               if (q->properties.is_active)
+               if (q->properties.is_active) {
                         decrement_queue_count(dqm, q->properties.type);
+                       if (q->properties.is_gws) {
+                               dqm->gws_queue_count--;
+                               qpd->mapped_gws_queue = false;
+                       }
+               }
  
                 dqm->total_queue_count--;
         }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h

index 50d919f814e9add4e77c4747aef9bf16677e1694..4afa015c69b11cf5c9bf4ce114a33359745aba28 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -182,6 +182,7 @@ struct device_queue_manager {
         unsigned int            processes_count;
         unsigned int            active_queue_count;
         unsigned int            active_cp_queue_count;
+       unsigned int            gws_queue_count;
         unsigned int            total_queue_count;
         unsigned int            next_pipe_to_allocate;
         unsigned int            *allocated_queues;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c

index bae706462f962790be6ef3d463605c9f08a01ee6..a2b77d1df8540b516e2335b06544b07f9f70fda0 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -126,6 +126,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev,
  
         prop.queue_size = queue_size;
         prop.is_interop = false;
+       prop.is_gws = false;
         prop.priority = 1;
         prop.queue_percent = 100;
         prop.type = type;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c

index efdb75e7677b393a3173c5803a6047c78bb5b9d5..685ca82d42fe8a81a231dc8902c318103d80c7ed 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -41,7 +41,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
                                 unsigned int *rlib_size,
                                 bool *over_subscription)
  {
-       unsigned int process_count, queue_count, compute_queue_count;
+       unsigned int process_count, queue_count, compute_queue_count, gws_queue_count;
         unsigned int map_queue_size;
         unsigned int max_proc_per_quantum = 1;
         struct kfd_dev *dev = pm->dqm->dev;
@@ -49,6 +49,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
         process_count = pm->dqm->processes_count;
         queue_count = pm->dqm->active_queue_count;
         compute_queue_count = pm->dqm->active_cp_queue_count;
+       gws_queue_count = pm->dqm->gws_queue_count;
  
         /* check if there is over subscription
          * Note: the arbitration between the number of VMIDs and
@@ -61,7 +62,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
                 max_proc_per_quantum = dev->max_proc_per_quantum;
  
         if ((process_count > max_proc_per_quantum) ||
-           compute_queue_count > get_cp_queues_num(pm->dqm)) {
+           compute_queue_count > get_cp_queues_num(pm->dqm) ||
+           gws_queue_count > 1) {
                 *over_subscription = true;
                 pr_debug("Over subscribed runlist\n");
         }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c

index 2de01009f1b6d6efc623b1800c8e60d260179589..bdca9dc5f1181bdeebd8467b980e17f5d18ec583 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -43,7 +43,7 @@ static int pm_map_process_v9(struct packet_manager *pm,
         packet->bitfields2.pasid = qpd->pqm->process->pasid;
         packet->bitfields14.gds_size = qpd->gds_size & 0x3F;
         packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF;
-       packet->bitfields14.num_gws = qpd->num_gws;
+       packet->bitfields14.num_gws = (qpd->mapped_gws_queue) ? qpd->num_gws : 0;
         packet->bitfields14.num_oac = qpd->num_oac;
         packet->bitfields14.sdma_enable = 1;
         packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h

index 43b888b311c7064ed4a1a8c13577f673ef22e9f2..d48b3344926762ec74a7dff812f3f985938c2b90 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -411,6 +411,10 @@ enum KFD_QUEUE_PRIORITY {
   * @is_active: Defines if the queue is active or not. @is_active and
   * @is_evicted are protected by the DQM lock.
   *
+ * @is_gws: Defines if the queue has been updated to be GWS-capable or not.
+ * @is_gws should be protected by the DQM lock, since changing it can yield the
+ * possibility of updating DQM state on number of GWS queues.
+ *
   * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid
   * of the queue.
   *
@@ -433,6 +437,7 @@ struct queue_properties {
         bool is_interop;
         bool is_evicted;
         bool is_active;
+       bool is_gws;
         /* Not relevant for user mode queues in cp scheduling */
         unsigned int vmid;
         /* Relevant only for sdma queues*/
@@ -564,6 +569,14 @@ struct qcm_process_device {
          */
         bool reset_wavefronts;
  
+       /* This flag tells us if this process has a GWS-capable
+        * queue that will be mapped into the runlist. It's
+        * possible to request a GWS BO, but not have the queue
+        * currently mapped, and this changes how the MAP_PROCESS
+        * PM4 packet is configured.
+        */
+       bool mapped_gws_queue;
+
         /*
          * All the memory management data should be here too
          */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c

index fe0cd49d4ea7ce1708dfc00e2388a4bd18dd8c9b..82b4c5a9382ab682278a32f44d0f994126d9f24f 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -858,6 +858,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
         pdd->qpd.dqm = dev->dqm;
         pdd->qpd.pqm = &p->pqm;
         pdd->qpd.evicted = 0;
+       pdd->qpd.mapped_gws_queue = false;
         pdd->process = p;
         pdd->bound = PDD_UNBOUND;
         pdd->already_dequeued = false;
author	Joseph Greathouse <Joseph.Greathouse@amd.com>
	Wed, 18 Sep 2019 19:49:57 +0000 (14:49 -0500)
committer	Alex Deucher <alexander.deucher@amd.com>
	Tue, 28 Apr 2020 20:20:30 +0000 (16:20 -0400)
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c		patch \| blob \| history
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c		patch \| blob \| history
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h		patch \| blob \| history
drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c		patch \| blob \| history
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c		patch \| blob \| history
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c		patch \| blob \| history
drivers/gpu/drm/amd/amdkfd/kfd_priv.h		patch \| blob \| history
drivers/gpu/drm/amd/amdkfd/kfd_process.c		patch \| blob \| history