]> git.baikalelectronics.ru Git - kernel.git/commitdiff
drm/nouveau/ga102-: support ttm buffer moves via copy engine
authorBen Skeggs <bskeggs@redhat.com>
Thu, 16 Sep 2021 22:04:06 +0000 (08:04 +1000)
committerMaarten Lankhorst <maarten.lankhorst@linux.intel.com>
Wed, 6 Oct 2021 09:05:45 +0000 (11:05 +0200)
We don't currently have any kind of real acceleration on Ampere GPUs,
but the TTM memcpy() fallback paths aren't really designed to handle
copies between different devices, such as on Optimus systems, and
result in a kernel OOPS.

A few options were investigated to try and fix this, but didn't work
out, and likely would have resulted in a very unpleasant experience
for users anyway.

This commit adds just enough support for setting up a single channel
connected to a copy engine, which the kernel can use to accelerate
the buffer copies between devices.  Userspace has no access to this
incomplete channel support, but it's suitable for TTM's needs.

A more complete implementation of host(fifo) for Ampere GPUs is in
the works, but the required changes are far too invasive that they
would be unsuitable to backport to fix this issue on current kernels.

v2: fix GPFIFO length in RAMFC (reported by Karol)

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Cc: Lyude Paul <lyude@redhat.com>
Cc: Karol Herbst <kherbst@redhat.com>
Cc: <stable@vger.kernel.org> # v5.12+
Reviewed-by: Karol Herbst <kherbst@redhat.com>
Tested-by: Karol Herbst <kherbst@redhat.com>
Signed-off-by: Karol Herbst <kherbst@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210916220406.666454-1-skeggsb@gmail.com
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
drivers/gpu/drm/nouveau/include/nvif/class.h
drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
drivers/gpu/drm/nouveau/nouveau_bo.c
drivers/gpu/drm/nouveau/nouveau_chan.c
drivers/gpu/drm/nouveau/nouveau_drm.c
drivers/gpu/drm/nouveau/nv84_fence.c
drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c [new file with mode: 0644]
drivers/gpu/drm/nouveau/nvkm/subdev/top/ga100.c

index c68cc957248e2b268f7fcdc2b0f77b2cf4d4a948..a582c0cb0cb0d2c5919a0b6a2cbbaf685df1651e 100644 (file)
@@ -71,6 +71,7 @@
 #define PASCAL_CHANNEL_GPFIFO_A                       /* cla06f.h */ 0x0000c06f
 #define VOLTA_CHANNEL_GPFIFO_A                        /* clc36f.h */ 0x0000c36f
 #define TURING_CHANNEL_GPFIFO_A                       /* clc36f.h */ 0x0000c46f
+#define AMPERE_CHANNEL_GPFIFO_B                       /* clc36f.h */ 0x0000c76f
 
 #define NV50_DISP                                     /* cl5070.h */ 0x00005070
 #define G82_DISP                                      /* cl5070.h */ 0x00008270
 #define PASCAL_DMA_COPY_B                                            0x0000c1b5
 #define VOLTA_DMA_COPY_A                                             0x0000c3b5
 #define TURING_DMA_COPY_A                                            0x0000c5b5
+#define AMPERE_DMA_COPY_B                                            0x0000c7b5
 
 #define FERMI_DECOMPRESS                                             0x000090b8
 
index 54fab7cc36c1b84ba925ffa78a846fe0918a02bc..64ee82c7c1be5936de87c8ab155b47920fbe91ff 100644 (file)
@@ -77,4 +77,5 @@ int gp100_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct
 int gp10b_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **);
 int gv100_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **);
 int tu102_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **);
+int ga102_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **);
 #endif
index 6d07e653f82d5b74a330eadea83ea0a4074da7a8..c58bcdba2c7aa30d2423390b10020f750dd5df96 100644 (file)
@@ -844,6 +844,7 @@ nouveau_bo_move_init(struct nouveau_drm *drm)
                            struct ttm_resource *, struct ttm_resource *);
                int (*init)(struct nouveau_channel *, u32 handle);
        } _methods[] = {
+               {  "COPY", 4, 0xc7b5, nve0_bo_move_copy, nve0_bo_move_init },
                {  "COPY", 4, 0xc5b5, nve0_bo_move_copy, nve0_bo_move_init },
                {  "GRCE", 0, 0xc5b5, nve0_bo_move_copy, nvc0_bo_move_init },
                {  "COPY", 4, 0xc3b5, nve0_bo_move_copy, nve0_bo_move_init },
index 80099ef7570226b65ce3fa98bc4b2e19999dedcb..ea7769135b0dcf0cd23aa43e4fb2f3fde31e41e7 100644 (file)
@@ -250,7 +250,8 @@ static int
 nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device,
                    u64 runlist, bool priv, struct nouveau_channel **pchan)
 {
-       static const u16 oclasses[] = { TURING_CHANNEL_GPFIFO_A,
+       static const u16 oclasses[] = { AMPERE_CHANNEL_GPFIFO_B,
+                                       TURING_CHANNEL_GPFIFO_A,
                                        VOLTA_CHANNEL_GPFIFO_A,
                                        PASCAL_CHANNEL_GPFIFO_A,
                                        MAXWELL_CHANNEL_GPFIFO_A,
@@ -386,7 +387,8 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart)
 
        nvif_object_map(&chan->user, NULL, 0);
 
-       if (chan->user.oclass >= FERMI_CHANNEL_GPFIFO) {
+       if (chan->user.oclass >= FERMI_CHANNEL_GPFIFO &&
+           chan->user.oclass < AMPERE_CHANNEL_GPFIFO_B) {
                ret = nvif_notify_ctor(&chan->user, "abi16ChanKilled",
                                       nouveau_channel_killed,
                                       true, NV906F_V0_NTFY_KILLED,
index 1f828c9f691cd5e1e375087ff2a1a90dcc31c731..6109cd9e339918f5798787aad76cd3f1e69961f6 100644 (file)
@@ -345,6 +345,9 @@ nouveau_accel_gr_init(struct nouveau_drm *drm)
        u32 arg0, arg1;
        int ret;
 
+       if (device->info.family >= NV_DEVICE_INFO_V0_AMPERE)
+               return;
+
        /* Allocate channel that has access to the graphics engine. */
        if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) {
                arg0 = nvif_fifo_runlist(device, NV_DEVICE_HOST_RUNLIST_ENGINES_GR);
@@ -469,6 +472,7 @@ nouveau_accel_init(struct nouveau_drm *drm)
                case PASCAL_CHANNEL_GPFIFO_A:
                case VOLTA_CHANNEL_GPFIFO_A:
                case TURING_CHANNEL_GPFIFO_A:
+               case AMPERE_CHANNEL_GPFIFO_B:
                        ret = nvc0_fence_create(drm);
                        break;
                default:
index 7c9c928c319668d48b11d0081f0c60153b36c3c6..c3526a8622e3e204adc453f6226cc2abeea2755f 100644 (file)
@@ -204,7 +204,7 @@ nv84_fence_create(struct nouveau_drm *drm)
        priv->base.context_new = nv84_fence_context_new;
        priv->base.context_del = nv84_fence_context_del;
 
-       priv->base.uevent = true;
+       priv->base.uevent = drm->client.device.info.family < NV_DEVICE_INFO_V0_AMPERE;
 
        mutex_init(&priv->mutex);
 
index 93ddf63d111408ecc354b54eb3e682278899b84e..ca75c5f6ecaf80a96429b5a6a81432183ccd02f5 100644 (file)
@@ -2602,6 +2602,7 @@ nv172_chipset = {
        .top      = { 0x00000001, ga100_top_new },
        .disp     = { 0x00000001, ga102_disp_new },
        .dma      = { 0x00000001, gv100_dma_new },
+       .fifo     = { 0x00000001, ga102_fifo_new },
 };
 
 static const struct nvkm_device_chip
@@ -2622,6 +2623,7 @@ nv174_chipset = {
        .top      = { 0x00000001, ga100_top_new },
        .disp     = { 0x00000001, ga102_disp_new },
        .dma      = { 0x00000001, gv100_dma_new },
+       .fifo     = { 0x00000001, ga102_fifo_new },
 };
 
 static const struct nvkm_device_chip
@@ -2642,6 +2644,7 @@ nv177_chipset = {
        .top      = { 0x00000001, ga100_top_new },
        .disp     = { 0x00000001, ga102_disp_new },
        .dma      = { 0x00000001, gv100_dma_new },
+       .fifo     = { 0x00000001, ga102_fifo_new },
 };
 
 static int
index 3209eb7af65fb35e4b4dd70b33f77cd592c726e4..5e831d347a95795f3b22217b532afd4ceeb81714 100644 (file)
@@ -18,6 +18,7 @@ nvkm-y += nvkm/engine/fifo/gp100.o
 nvkm-y += nvkm/engine/fifo/gp10b.o
 nvkm-y += nvkm/engine/fifo/gv100.o
 nvkm-y += nvkm/engine/fifo/tu102.o
+nvkm-y += nvkm/engine/fifo/ga102.o
 
 nvkm-y += nvkm/engine/fifo/chan.o
 nvkm-y += nvkm/engine/fifo/channv50.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c
new file mode 100644 (file)
index 0000000..f897bef
--- /dev/null
@@ -0,0 +1,308 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#define ga102_fifo(p) container_of((p), struct ga102_fifo, base.engine)
+#define ga102_chan(p) container_of((p), struct ga102_chan, object)
+#include <engine/fifo.h>
+#include "user.h"
+
+#include <core/memory.h>
+#include <subdev/mmu.h>
+#include <subdev/timer.h>
+#include <subdev/top.h>
+
+#include <nvif/cl0080.h>
+#include <nvif/clc36f.h>
+#include <nvif/class.h>
+
+struct ga102_fifo {
+       struct nvkm_fifo base;
+};
+
+struct ga102_chan {
+       struct nvkm_object object;
+
+       struct {
+               u32 runl;
+               u32 chan;
+       } ctrl;
+
+       struct nvkm_memory *mthd;
+       struct nvkm_memory *inst;
+       struct nvkm_memory *user;
+       struct nvkm_memory *runl;
+
+       struct nvkm_vmm *vmm;
+};
+
+static int
+ga102_chan_sclass(struct nvkm_object *object, int index, struct nvkm_oclass *oclass)
+{
+       if (index == 0) {
+               oclass->ctor = nvkm_object_new;
+               oclass->base = (struct nvkm_sclass) { -1, -1, AMPERE_DMA_COPY_B };
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
+static int
+ga102_chan_map(struct nvkm_object *object, void *argv, u32 argc,
+              enum nvkm_object_map *type, u64 *addr, u64 *size)
+{
+       struct ga102_chan *chan = ga102_chan(object);
+       struct nvkm_device *device = chan->object.engine->subdev.device;
+       u64 bar2 = nvkm_memory_bar2(chan->user);
+
+       if (bar2 == ~0ULL)
+               return -EFAULT;
+
+       *type = NVKM_OBJECT_MAP_IO;
+       *addr = device->func->resource_addr(device, 3) + bar2;
+       *size = 0x1000;
+       return 0;
+}
+
+static int
+ga102_chan_fini(struct nvkm_object *object, bool suspend)
+{
+       struct ga102_chan *chan = ga102_chan(object);
+       struct nvkm_device *device = chan->object.engine->subdev.device;
+
+       nvkm_wr32(device, chan->ctrl.chan, 0x00000003);
+
+       nvkm_wr32(device, chan->ctrl.runl + 0x098, 0x01000000);
+       nvkm_msec(device, 2000,
+               if (!(nvkm_rd32(device, chan->ctrl.runl + 0x098) & 0x00100000))
+                       break;
+       );
+
+       nvkm_wr32(device, chan->ctrl.runl + 0x088, 0);
+
+       nvkm_wr32(device, chan->ctrl.chan, 0xffffffff);
+       return 0;
+}
+
+static int
+ga102_chan_init(struct nvkm_object *object)
+{
+       struct ga102_chan *chan = ga102_chan(object);
+       struct nvkm_device *device = chan->object.engine->subdev.device;
+
+       nvkm_mask(device, chan->ctrl.runl + 0x300, 0x80000000, 0x80000000);
+
+       nvkm_wr32(device, chan->ctrl.runl + 0x080, lower_32_bits(nvkm_memory_addr(chan->runl)));
+       nvkm_wr32(device, chan->ctrl.runl + 0x084, upper_32_bits(nvkm_memory_addr(chan->runl)));
+       nvkm_wr32(device, chan->ctrl.runl + 0x088, 2);
+
+       nvkm_wr32(device, chan->ctrl.chan, 0x00000002);
+       nvkm_wr32(device, chan->ctrl.runl + 0x0090, 0);
+       return 0;
+}
+
+static void *
+ga102_chan_dtor(struct nvkm_object *object)
+{
+       struct ga102_chan *chan = ga102_chan(object);
+
+       if (chan->vmm) {
+               nvkm_vmm_part(chan->vmm, chan->inst);
+               nvkm_vmm_unref(&chan->vmm);
+       }
+
+       nvkm_memory_unref(&chan->runl);
+       nvkm_memory_unref(&chan->user);
+       nvkm_memory_unref(&chan->inst);
+       nvkm_memory_unref(&chan->mthd);
+       return chan;
+}
+
+static const struct nvkm_object_func
+ga102_chan = {
+       .dtor = ga102_chan_dtor,
+       .init = ga102_chan_init,
+       .fini = ga102_chan_fini,
+       .map = ga102_chan_map,
+       .sclass = ga102_chan_sclass,
+};
+
+static int
+ga102_chan_new(struct nvkm_device *device,
+              const struct nvkm_oclass *oclass, void *argv, u32 argc, struct nvkm_object **pobject)
+{
+       struct volta_channel_gpfifo_a_v0 *args = argv;
+       struct nvkm_top_device *tdev;
+       struct nvkm_vmm *vmm;
+       struct ga102_chan *chan;
+       int ret;
+
+       if (argc != sizeof(*args))
+               return -ENOSYS;
+
+       vmm = nvkm_uvmm_search(oclass->client, args->vmm);
+       if (IS_ERR(vmm))
+               return PTR_ERR(vmm);
+
+       if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
+               return -ENOMEM;
+
+       nvkm_object_ctor(&ga102_chan, oclass, &chan->object);
+       *pobject = &chan->object;
+
+       list_for_each_entry(tdev, &device->top->device, head) {
+               if (tdev->type == NVKM_ENGINE_CE) {
+                       chan->ctrl.runl = tdev->runlist;
+                       break;
+               }
+       }
+
+       if (!chan->ctrl.runl)
+               return -ENODEV;
+
+       chan->ctrl.chan = nvkm_rd32(device, chan->ctrl.runl + 0x004) & 0xfffffff0;
+       args->token = nvkm_rd32(device, chan->ctrl.runl + 0x008) & 0xffff0000;
+
+       ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x1000, true, &chan->mthd);
+       if (ret)
+               return ret;
+
+       ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x1000, true, &chan->inst);
+       if (ret)
+               return ret;
+
+       nvkm_kmap(chan->inst);
+       nvkm_wo32(chan->inst, 0x010, 0x0000face);
+       nvkm_wo32(chan->inst, 0x030, 0x7ffff902);
+       nvkm_wo32(chan->inst, 0x048, lower_32_bits(args->ioffset));
+       nvkm_wo32(chan->inst, 0x04c, upper_32_bits(args->ioffset) |
+                                    (order_base_2(args->ilength / 8) << 16));
+       nvkm_wo32(chan->inst, 0x084, 0x20400000);
+       nvkm_wo32(chan->inst, 0x094, 0x30000001);
+       nvkm_wo32(chan->inst, 0x0ac, 0x00020000);
+       nvkm_wo32(chan->inst, 0x0e4, 0x00000000);
+       nvkm_wo32(chan->inst, 0x0e8, 0);
+       nvkm_wo32(chan->inst, 0x0f4, 0x00001000);
+       nvkm_wo32(chan->inst, 0x0f8, 0x10003080);
+       nvkm_mo32(chan->inst, 0x218, 0x00000000, 0x00000000);
+       nvkm_wo32(chan->inst, 0x220, lower_32_bits(nvkm_memory_bar2(chan->mthd)));
+       nvkm_wo32(chan->inst, 0x224, upper_32_bits(nvkm_memory_bar2(chan->mthd)));
+       nvkm_done(chan->inst);
+
+       ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x1000, true, &chan->user);
+       if (ret)
+               return ret;
+
+       ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x1000, true, &chan->runl);
+       if (ret)
+               return ret;
+
+       nvkm_kmap(chan->runl);
+       nvkm_wo32(chan->runl, 0x00, 0x80030001);
+       nvkm_wo32(chan->runl, 0x04, 1);
+       nvkm_wo32(chan->runl, 0x08, 0);
+       nvkm_wo32(chan->runl, 0x0c, 0x00000000);
+       nvkm_wo32(chan->runl, 0x10, lower_32_bits(nvkm_memory_addr(chan->user)));
+       nvkm_wo32(chan->runl, 0x14, upper_32_bits(nvkm_memory_addr(chan->user)));
+       nvkm_wo32(chan->runl, 0x18, lower_32_bits(nvkm_memory_addr(chan->inst)));
+       nvkm_wo32(chan->runl, 0x1c, upper_32_bits(nvkm_memory_addr(chan->inst)));
+       nvkm_done(chan->runl);
+
+       ret = nvkm_vmm_join(vmm, chan->inst);
+       if (ret)
+               return ret;
+
+       chan->vmm = nvkm_vmm_ref(vmm);
+       return 0;
+}
+
+static const struct nvkm_device_oclass
+ga102_chan_oclass = {
+       .ctor = ga102_chan_new,
+};
+
+static int
+ga102_user_new(struct nvkm_device *device,
+              const struct nvkm_oclass *oclass, void *argv, u32 argc, struct nvkm_object **pobject)
+{
+       return tu102_fifo_user_new(oclass, argv, argc, pobject);
+}
+
+static const struct nvkm_device_oclass
+ga102_user_oclass = {
+       .ctor = ga102_user_new,
+};
+
+static int
+ga102_fifo_sclass(struct nvkm_oclass *oclass, int index, const struct nvkm_device_oclass **class)
+{
+       if (index == 0) {
+               oclass->base = (struct nvkm_sclass) { -1, -1, VOLTA_USERMODE_A };
+               *class = &ga102_user_oclass;
+               return 0;
+       } else
+       if (index == 1) {
+               oclass->base = (struct nvkm_sclass) { 0, 0, AMPERE_CHANNEL_GPFIFO_B };
+               *class = &ga102_chan_oclass;
+               return 0;
+       }
+
+       return 2;
+}
+
+static int
+ga102_fifo_info(struct nvkm_engine *engine, u64 mthd, u64 *data)
+{
+       switch (mthd) {
+       case NV_DEVICE_HOST_CHANNELS: *data = 1; return 0;
+       default:
+               break;
+       }
+
+       return -ENOSYS;
+}
+
+static void *
+ga102_fifo_dtor(struct nvkm_engine *engine)
+{
+       return ga102_fifo(engine);
+}
+
+static const struct nvkm_engine_func
+ga102_fifo = {
+       .dtor = ga102_fifo_dtor,
+       .info = ga102_fifo_info,
+       .base.sclass = ga102_fifo_sclass,
+};
+
+int
+ga102_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
+              struct nvkm_fifo **pfifo)
+{
+       struct ga102_fifo *fifo;
+
+       if (!(fifo = kzalloc(sizeof(*fifo), GFP_KERNEL)))
+               return -ENOMEM;
+
+       nvkm_engine_ctor(&ga102_fifo, device, type, inst, true, &fifo->base.engine);
+       *pfifo = &fifo->base;
+       return 0;
+}
index 31933f3e5a076b36e14fadf75cbef8f39a7815fc..c982d834c8d98579dd436eb632a6086c84f594d0 100644 (file)
@@ -54,7 +54,7 @@ ga100_top_oneinit(struct nvkm_top *top)
                        info->reset   = (data & 0x0000001f);
                        break;
                case 2:
-                       info->runlist = (data & 0x0000fc00) >> 10;
+                       info->runlist = (data & 0x00fffc00);
                        info->engine  = (data & 0x00000003);
                        break;
                default:
@@ -85,9 +85,10 @@ ga100_top_oneinit(struct nvkm_top *top)
                }
 
                nvkm_debug(subdev, "%02x.%d (%8s): addr %06x fault %2d "
-                                  "runlist %2d engine %2d reset %2d\n", type, inst,
+                                  "runlist %6x engine %2d reset %2d\n", type, inst,
                           info->type == NVKM_SUBDEV_NR ? "????????" : nvkm_subdev_type[info->type],
-                          info->addr, info->fault, info->runlist, info->engine, info->reset);
+                          info->addr, info->fault, info->runlist < 0 ? 0 : info->runlist,
+                          info->engine, info->reset);
                info = NULL;
        }