]> git.baikalelectronics.ru Git - kernel.git/commitdiff
drm/amdkfd: Enable over-subscription with >1 GWS queue
authorJoseph Greathouse <Joseph.Greathouse@amd.com>
Wed, 18 Sep 2019 19:49:57 +0000 (14:49 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 28 Apr 2020 20:20:30 +0000 (16:20 -0400)
The current GWS usage model will only allows a single GWS-enabled
process to be active on the GPU at once. This ensures that a
barrier-using kernel gets a known amount of GPU hardware, to
prevent deadlock due to inability to go beyond the GWS barrier.

The HWS watches how many GWS entries are assigned to each process,
and goes into over-subscription mode when two processes need more
than the 64 that are available. The current KFD method for working
with this is to allocate all 64 GWS entries to each GWS-capable
process.

When more than one GWS-enabled process is in the runlist, we must
make sure the runlist is in over-subscription mode, so that the
HWS gets a chained RUN_LIST packet and continues scheduling
kernels.

Signed-off-by: Joseph Greathouse <Joseph.Greathouse@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_process.c

index f8fa03a12add711f4ac76647a36656bb7f74a6ed..ff47b1f69b68be49cc0ec7b19bfc598d9de48f9c 100644 (file)
@@ -215,6 +215,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
        }
 
        q_properties->is_interop = false;
+       q_properties->is_gws = false;
        q_properties->queue_percent = args->queue_percentage;
        q_properties->priority = args->queue_priority;
        q_properties->queue_address = args->ring_base_address;
index 77ea0f0cb163b93d2819368214592b50e7466fe0..ae954779181303f0131a60a771e10b9b4626e33c 100644 (file)
@@ -505,8 +505,13 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
                deallocate_vmid(dqm, qpd, q);
        }
        qpd->queue_count--;
-       if (q->properties.is_active)
+       if (q->properties.is_active) {
                decrement_queue_count(dqm, q->properties.type);
+               if (q->properties.is_gws) {
+                       dqm->gws_queue_count--;
+                       qpd->mapped_gws_queue = false;
+               }
+       }
 
        return retval;
 }
@@ -583,6 +588,20 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
        else if (!q->properties.is_active && prev_active)
                decrement_queue_count(dqm, q->properties.type);
 
+       if (q->gws && !q->properties.is_gws) {
+               if (q->properties.is_active) {
+                       dqm->gws_queue_count++;
+                       pdd->qpd.mapped_gws_queue = true;
+               }
+               q->properties.is_gws = true;
+       } else if (!q->gws && q->properties.is_gws) {
+               if (q->properties.is_active) {
+                       dqm->gws_queue_count--;
+                       pdd->qpd.mapped_gws_queue = false;
+               }
+               q->properties.is_gws = false;
+       }
+
        if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
                retval = map_queues_cpsch(dqm);
        else if (q->properties.is_active &&
@@ -631,6 +650,10 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
                                q->properties.type)];
                q->properties.is_active = false;
                decrement_queue_count(dqm, q->properties.type);
+               if (q->properties.is_gws) {
+                       dqm->gws_queue_count--;
+                       qpd->mapped_gws_queue = false;
+               }
 
                if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
                        continue;
@@ -744,6 +767,10 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
                                q->properties.type)];
                q->properties.is_active = true;
                increment_queue_count(dqm, q->properties.type);
+               if (q->properties.is_gws) {
+                       dqm->gws_queue_count++;
+                       qpd->mapped_gws_queue = true;
+               }
 
                if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
                        continue;
@@ -913,6 +940,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
        INIT_LIST_HEAD(&dqm->queues);
        dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
        dqm->active_cp_queue_count = 0;
+       dqm->gws_queue_count = 0;
 
        for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
                int pipe_offset = pipe * get_queues_per_pipe(dqm);
@@ -1082,7 +1110,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
        INIT_LIST_HEAD(&dqm->queues);
        dqm->active_queue_count = dqm->processes_count = 0;
        dqm->active_cp_queue_count = 0;
-
+       dqm->gws_queue_count = 0;
        dqm->active_runlist = false;
        dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
        dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
@@ -1432,6 +1460,10 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
                                KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
                if (retval == -ETIME)
                        qpd->reset_wavefronts = true;
+               if (q->properties.is_gws) {
+                       dqm->gws_queue_count--;
+                       qpd->mapped_gws_queue = false;
+               }
        }
 
        /*
@@ -1650,8 +1682,13 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
                else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
                        deallocate_sdma_queue(dqm, q);
 
-               if (q->properties.is_active)
+               if (q->properties.is_active) {
                        decrement_queue_count(dqm, q->properties.type);
+                       if (q->properties.is_gws) {
+                               dqm->gws_queue_count--;
+                               qpd->mapped_gws_queue = false;
+                       }
+               }
 
                dqm->total_queue_count--;
        }
index 50d919f814e9add4e77c4747aef9bf16677e1694..4afa015c69b11cf5c9bf4ce114a33359745aba28 100644 (file)
@@ -182,6 +182,7 @@ struct device_queue_manager {
        unsigned int            processes_count;
        unsigned int            active_queue_count;
        unsigned int            active_cp_queue_count;
+       unsigned int            gws_queue_count;
        unsigned int            total_queue_count;
        unsigned int            next_pipe_to_allocate;
        unsigned int            *allocated_queues;
index bae706462f962790be6ef3d463605c9f08a01ee6..a2b77d1df8540b516e2335b06544b07f9f70fda0 100644 (file)
@@ -126,6 +126,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev,
 
        prop.queue_size = queue_size;
        prop.is_interop = false;
+       prop.is_gws = false;
        prop.priority = 1;
        prop.queue_percent = 100;
        prop.type = type;
index efdb75e7677b393a3173c5803a6047c78bb5b9d5..685ca82d42fe8a81a231dc8902c318103d80c7ed 100644 (file)
@@ -41,7 +41,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
                                unsigned int *rlib_size,
                                bool *over_subscription)
 {
-       unsigned int process_count, queue_count, compute_queue_count;
+       unsigned int process_count, queue_count, compute_queue_count, gws_queue_count;
        unsigned int map_queue_size;
        unsigned int max_proc_per_quantum = 1;
        struct kfd_dev *dev = pm->dqm->dev;
@@ -49,6 +49,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
        process_count = pm->dqm->processes_count;
        queue_count = pm->dqm->active_queue_count;
        compute_queue_count = pm->dqm->active_cp_queue_count;
+       gws_queue_count = pm->dqm->gws_queue_count;
 
        /* check if there is over subscription
         * Note: the arbitration between the number of VMIDs and
@@ -61,7 +62,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
                max_proc_per_quantum = dev->max_proc_per_quantum;
 
        if ((process_count > max_proc_per_quantum) ||
-           compute_queue_count > get_cp_queues_num(pm->dqm)) {
+           compute_queue_count > get_cp_queues_num(pm->dqm) ||
+           gws_queue_count > 1) {
                *over_subscription = true;
                pr_debug("Over subscribed runlist\n");
        }
index 2de01009f1b6d6efc623b1800c8e60d260179589..bdca9dc5f1181bdeebd8467b980e17f5d18ec583 100644 (file)
@@ -43,7 +43,7 @@ static int pm_map_process_v9(struct packet_manager *pm,
        packet->bitfields2.pasid = qpd->pqm->process->pasid;
        packet->bitfields14.gds_size = qpd->gds_size & 0x3F;
        packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF;
-       packet->bitfields14.num_gws = qpd->num_gws;
+       packet->bitfields14.num_gws = (qpd->mapped_gws_queue) ? qpd->num_gws : 0;
        packet->bitfields14.num_oac = qpd->num_oac;
        packet->bitfields14.sdma_enable = 1;
        packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
index 43b888b311c7064ed4a1a8c13577f673ef22e9f2..d48b3344926762ec74a7dff812f3f985938c2b90 100644 (file)
@@ -411,6 +411,10 @@ enum KFD_QUEUE_PRIORITY {
  * @is_active: Defines if the queue is active or not. @is_active and
  * @is_evicted are protected by the DQM lock.
  *
+ * @is_gws: Defines if the queue has been updated to be GWS-capable or not.
+ * @is_gws should be protected by the DQM lock, since changing it can yield the
+ * possibility of updating DQM state on number of GWS queues.
+ *
  * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid
  * of the queue.
  *
@@ -433,6 +437,7 @@ struct queue_properties {
        bool is_interop;
        bool is_evicted;
        bool is_active;
+       bool is_gws;
        /* Not relevant for user mode queues in cp scheduling */
        unsigned int vmid;
        /* Relevant only for sdma queues*/
@@ -564,6 +569,14 @@ struct qcm_process_device {
         */
        bool reset_wavefronts;
 
+       /* This flag tells us if this process has a GWS-capable
+        * queue that will be mapped into the runlist. It's
+        * possible to request a GWS BO, but not have the queue
+        * currently mapped, and this changes how the MAP_PROCESS
+        * PM4 packet is configured.
+        */
+       bool mapped_gws_queue;
+
        /*
         * All the memory management data should be here too
         */
index fe0cd49d4ea7ce1708dfc00e2388a4bd18dd8c9b..82b4c5a9382ab682278a32f44d0f994126d9f24f 100644 (file)
@@ -858,6 +858,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
        pdd->qpd.dqm = dev->dqm;
        pdd->qpd.pqm = &p->pqm;
        pdd->qpd.evicted = 0;
+       pdd->qpd.mapped_gws_queue = false;
        pdd->process = p;
        pdd->bound = PDD_UNBOUND;
        pdd->already_dequeued = false;