From: Oded Gabbay Date: Fri, 24 Jun 2022 10:38:57 +0000 (+0300) Subject: uapi: habanalabs: add gaudi2 defines X-Git-Tag: baikal/mips/sdk6.1~5150^2~27^2~37 X-Git-Url: https://git.baikalelectronics.ru/sdk/?a=commitdiff_plain;h=d028b4f4cc5c7bc9f607647bcce1eb9584631589;p=kernel.git uapi: habanalabs: add gaudi2 defines Add the new defines for GAUDI2 uapi interface. It includes the following: 1. Enums of engines and PLLs. 2. New information in the info IOCTL that is retrieved by the driver. 3. Update comments regarding the CB/CS/wait for CS ioctls. 4. New fields in the debug IOCTL for configuring the profiler for Gaudi2. There is no new IOCTL. Some of the changes are also relevant for Greco (which will be upstreamed later this year). When ever it says "Greco and onwards", it means it is also for Gaudi2. Signed-off-by: Oded Gabbay --- diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 41a0fa345e4e4..77b89c537ee8b 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -184,6 +184,285 @@ enum gaudi_queue_id { GAUDI_QUEUE_ID_SIZE }; +/* + * In GAUDI2 we have two modes of operation in regard to queues: + * 1. Legacy mode, where each QMAN exposes 4 streams to the user + * 2. F/W mode, where we use F/W to schedule the JOBS to the different queues. + * + * When in legacy mode, the user sends the queue id per JOB according to + * enum gaudi2_queue_id below. + * + * When in F/W mode, the user sends a stream id per Command Submission. The + * stream id is a running number from 0 up to (N-1), where N is the number + * of streams the F/W exposes and is passed to the user in + * struct hl_info_hw_ip_info + */ + +enum gaudi2_queue_id { + GAUDI2_QUEUE_ID_PDMA_0_0 = 0, + GAUDI2_QUEUE_ID_PDMA_0_1 = 1, + GAUDI2_QUEUE_ID_PDMA_0_2 = 2, + GAUDI2_QUEUE_ID_PDMA_0_3 = 3, + GAUDI2_QUEUE_ID_PDMA_1_0 = 4, + GAUDI2_QUEUE_ID_PDMA_1_1 = 5, + GAUDI2_QUEUE_ID_PDMA_1_2 = 6, + GAUDI2_QUEUE_ID_PDMA_1_3 = 7, + GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0 = 8, + GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1 = 9, + GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2 = 10, + GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3 = 11, + GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0 = 12, + GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1 = 13, + GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2 = 14, + GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3 = 15, + GAUDI2_QUEUE_ID_DCORE0_MME_0_0 = 16, + GAUDI2_QUEUE_ID_DCORE0_MME_0_1 = 17, + GAUDI2_QUEUE_ID_DCORE0_MME_0_2 = 18, + GAUDI2_QUEUE_ID_DCORE0_MME_0_3 = 19, + GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 = 20, + GAUDI2_QUEUE_ID_DCORE0_TPC_0_1 = 21, + GAUDI2_QUEUE_ID_DCORE0_TPC_0_2 = 22, + GAUDI2_QUEUE_ID_DCORE0_TPC_0_3 = 23, + GAUDI2_QUEUE_ID_DCORE0_TPC_1_0 = 24, + GAUDI2_QUEUE_ID_DCORE0_TPC_1_1 = 25, + GAUDI2_QUEUE_ID_DCORE0_TPC_1_2 = 26, + GAUDI2_QUEUE_ID_DCORE0_TPC_1_3 = 27, + GAUDI2_QUEUE_ID_DCORE0_TPC_2_0 = 28, + GAUDI2_QUEUE_ID_DCORE0_TPC_2_1 = 29, + GAUDI2_QUEUE_ID_DCORE0_TPC_2_2 = 30, + GAUDI2_QUEUE_ID_DCORE0_TPC_2_3 = 31, + GAUDI2_QUEUE_ID_DCORE0_TPC_3_0 = 32, + GAUDI2_QUEUE_ID_DCORE0_TPC_3_1 = 33, + GAUDI2_QUEUE_ID_DCORE0_TPC_3_2 = 34, + GAUDI2_QUEUE_ID_DCORE0_TPC_3_3 = 35, + GAUDI2_QUEUE_ID_DCORE0_TPC_4_0 = 36, + GAUDI2_QUEUE_ID_DCORE0_TPC_4_1 = 37, + GAUDI2_QUEUE_ID_DCORE0_TPC_4_2 = 38, + GAUDI2_QUEUE_ID_DCORE0_TPC_4_3 = 39, + GAUDI2_QUEUE_ID_DCORE0_TPC_5_0 = 40, + GAUDI2_QUEUE_ID_DCORE0_TPC_5_1 = 41, + GAUDI2_QUEUE_ID_DCORE0_TPC_5_2 = 42, + GAUDI2_QUEUE_ID_DCORE0_TPC_5_3 = 43, + GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 = 44, + GAUDI2_QUEUE_ID_DCORE0_TPC_6_1 = 45, + GAUDI2_QUEUE_ID_DCORE0_TPC_6_2 = 46, + GAUDI2_QUEUE_ID_DCORE0_TPC_6_3 = 47, + GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0 = 48, + GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1 = 49, + GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2 = 50, + GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3 = 51, + GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0 = 52, + GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1 = 53, + GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2 = 54, + GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3 = 55, + GAUDI2_QUEUE_ID_DCORE1_MME_0_0 = 56, + GAUDI2_QUEUE_ID_DCORE1_MME_0_1 = 57, + GAUDI2_QUEUE_ID_DCORE1_MME_0_2 = 58, + GAUDI2_QUEUE_ID_DCORE1_MME_0_3 = 59, + GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 = 60, + GAUDI2_QUEUE_ID_DCORE1_TPC_0_1 = 61, + GAUDI2_QUEUE_ID_DCORE1_TPC_0_2 = 62, + GAUDI2_QUEUE_ID_DCORE1_TPC_0_3 = 63, + GAUDI2_QUEUE_ID_DCORE1_TPC_1_0 = 64, + GAUDI2_QUEUE_ID_DCORE1_TPC_1_1 = 65, + GAUDI2_QUEUE_ID_DCORE1_TPC_1_2 = 66, + GAUDI2_QUEUE_ID_DCORE1_TPC_1_3 = 67, + GAUDI2_QUEUE_ID_DCORE1_TPC_2_0 = 68, + GAUDI2_QUEUE_ID_DCORE1_TPC_2_1 = 69, + GAUDI2_QUEUE_ID_DCORE1_TPC_2_2 = 70, + GAUDI2_QUEUE_ID_DCORE1_TPC_2_3 = 71, + GAUDI2_QUEUE_ID_DCORE1_TPC_3_0 = 72, + GAUDI2_QUEUE_ID_DCORE1_TPC_3_1 = 73, + GAUDI2_QUEUE_ID_DCORE1_TPC_3_2 = 74, + GAUDI2_QUEUE_ID_DCORE1_TPC_3_3 = 75, + GAUDI2_QUEUE_ID_DCORE1_TPC_4_0 = 76, + GAUDI2_QUEUE_ID_DCORE1_TPC_4_1 = 77, + GAUDI2_QUEUE_ID_DCORE1_TPC_4_2 = 78, + GAUDI2_QUEUE_ID_DCORE1_TPC_4_3 = 79, + GAUDI2_QUEUE_ID_DCORE1_TPC_5_0 = 80, + GAUDI2_QUEUE_ID_DCORE1_TPC_5_1 = 81, + GAUDI2_QUEUE_ID_DCORE1_TPC_5_2 = 82, + GAUDI2_QUEUE_ID_DCORE1_TPC_5_3 = 83, + GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0 = 84, + GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1 = 85, + GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2 = 86, + GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3 = 87, + GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0 = 88, + GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1 = 89, + GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2 = 90, + GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3 = 91, + GAUDI2_QUEUE_ID_DCORE2_MME_0_0 = 92, + GAUDI2_QUEUE_ID_DCORE2_MME_0_1 = 93, + GAUDI2_QUEUE_ID_DCORE2_MME_0_2 = 94, + GAUDI2_QUEUE_ID_DCORE2_MME_0_3 = 95, + GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 = 96, + GAUDI2_QUEUE_ID_DCORE2_TPC_0_1 = 97, + GAUDI2_QUEUE_ID_DCORE2_TPC_0_2 = 98, + GAUDI2_QUEUE_ID_DCORE2_TPC_0_3 = 99, + GAUDI2_QUEUE_ID_DCORE2_TPC_1_0 = 100, + GAUDI2_QUEUE_ID_DCORE2_TPC_1_1 = 101, + GAUDI2_QUEUE_ID_DCORE2_TPC_1_2 = 102, + GAUDI2_QUEUE_ID_DCORE2_TPC_1_3 = 103, + GAUDI2_QUEUE_ID_DCORE2_TPC_2_0 = 104, + GAUDI2_QUEUE_ID_DCORE2_TPC_2_1 = 105, + GAUDI2_QUEUE_ID_DCORE2_TPC_2_2 = 106, + GAUDI2_QUEUE_ID_DCORE2_TPC_2_3 = 107, + GAUDI2_QUEUE_ID_DCORE2_TPC_3_0 = 108, + GAUDI2_QUEUE_ID_DCORE2_TPC_3_1 = 109, + GAUDI2_QUEUE_ID_DCORE2_TPC_3_2 = 110, + GAUDI2_QUEUE_ID_DCORE2_TPC_3_3 = 111, + GAUDI2_QUEUE_ID_DCORE2_TPC_4_0 = 112, + GAUDI2_QUEUE_ID_DCORE2_TPC_4_1 = 113, + GAUDI2_QUEUE_ID_DCORE2_TPC_4_2 = 114, + GAUDI2_QUEUE_ID_DCORE2_TPC_4_3 = 115, + GAUDI2_QUEUE_ID_DCORE2_TPC_5_0 = 116, + GAUDI2_QUEUE_ID_DCORE2_TPC_5_1 = 117, + GAUDI2_QUEUE_ID_DCORE2_TPC_5_2 = 118, + GAUDI2_QUEUE_ID_DCORE2_TPC_5_3 = 119, + GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0 = 120, + GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1 = 121, + GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2 = 122, + GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3 = 123, + GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0 = 124, + GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1 = 125, + GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2 = 126, + GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3 = 127, + GAUDI2_QUEUE_ID_DCORE3_MME_0_0 = 128, + GAUDI2_QUEUE_ID_DCORE3_MME_0_1 = 129, + GAUDI2_QUEUE_ID_DCORE3_MME_0_2 = 130, + GAUDI2_QUEUE_ID_DCORE3_MME_0_3 = 131, + GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 = 132, + GAUDI2_QUEUE_ID_DCORE3_TPC_0_1 = 133, + GAUDI2_QUEUE_ID_DCORE3_TPC_0_2 = 134, + GAUDI2_QUEUE_ID_DCORE3_TPC_0_3 = 135, + GAUDI2_QUEUE_ID_DCORE3_TPC_1_0 = 136, + GAUDI2_QUEUE_ID_DCORE3_TPC_1_1 = 137, + GAUDI2_QUEUE_ID_DCORE3_TPC_1_2 = 138, + GAUDI2_QUEUE_ID_DCORE3_TPC_1_3 = 139, + GAUDI2_QUEUE_ID_DCORE3_TPC_2_0 = 140, + GAUDI2_QUEUE_ID_DCORE3_TPC_2_1 = 141, + GAUDI2_QUEUE_ID_DCORE3_TPC_2_2 = 142, + GAUDI2_QUEUE_ID_DCORE3_TPC_2_3 = 143, + GAUDI2_QUEUE_ID_DCORE3_TPC_3_0 = 144, + GAUDI2_QUEUE_ID_DCORE3_TPC_3_1 = 145, + GAUDI2_QUEUE_ID_DCORE3_TPC_3_2 = 146, + GAUDI2_QUEUE_ID_DCORE3_TPC_3_3 = 147, + GAUDI2_QUEUE_ID_DCORE3_TPC_4_0 = 148, + GAUDI2_QUEUE_ID_DCORE3_TPC_4_1 = 149, + GAUDI2_QUEUE_ID_DCORE3_TPC_4_2 = 150, + GAUDI2_QUEUE_ID_DCORE3_TPC_4_3 = 151, + GAUDI2_QUEUE_ID_DCORE3_TPC_5_0 = 152, + GAUDI2_QUEUE_ID_DCORE3_TPC_5_1 = 153, + GAUDI2_QUEUE_ID_DCORE3_TPC_5_2 = 154, + GAUDI2_QUEUE_ID_DCORE3_TPC_5_3 = 155, + GAUDI2_QUEUE_ID_NIC_0_0 = 156, + GAUDI2_QUEUE_ID_NIC_0_1 = 157, + GAUDI2_QUEUE_ID_NIC_0_2 = 158, + GAUDI2_QUEUE_ID_NIC_0_3 = 159, + GAUDI2_QUEUE_ID_NIC_1_0 = 160, + GAUDI2_QUEUE_ID_NIC_1_1 = 161, + GAUDI2_QUEUE_ID_NIC_1_2 = 162, + GAUDI2_QUEUE_ID_NIC_1_3 = 163, + GAUDI2_QUEUE_ID_NIC_2_0 = 164, + GAUDI2_QUEUE_ID_NIC_2_1 = 165, + GAUDI2_QUEUE_ID_NIC_2_2 = 166, + GAUDI2_QUEUE_ID_NIC_2_3 = 167, + GAUDI2_QUEUE_ID_NIC_3_0 = 168, + GAUDI2_QUEUE_ID_NIC_3_1 = 169, + GAUDI2_QUEUE_ID_NIC_3_2 = 170, + GAUDI2_QUEUE_ID_NIC_3_3 = 171, + GAUDI2_QUEUE_ID_NIC_4_0 = 172, + GAUDI2_QUEUE_ID_NIC_4_1 = 173, + GAUDI2_QUEUE_ID_NIC_4_2 = 174, + GAUDI2_QUEUE_ID_NIC_4_3 = 175, + GAUDI2_QUEUE_ID_NIC_5_0 = 176, + GAUDI2_QUEUE_ID_NIC_5_1 = 177, + GAUDI2_QUEUE_ID_NIC_5_2 = 178, + GAUDI2_QUEUE_ID_NIC_5_3 = 179, + GAUDI2_QUEUE_ID_NIC_6_0 = 180, + GAUDI2_QUEUE_ID_NIC_6_1 = 181, + GAUDI2_QUEUE_ID_NIC_6_2 = 182, + GAUDI2_QUEUE_ID_NIC_6_3 = 183, + GAUDI2_QUEUE_ID_NIC_7_0 = 184, + GAUDI2_QUEUE_ID_NIC_7_1 = 185, + GAUDI2_QUEUE_ID_NIC_7_2 = 186, + GAUDI2_QUEUE_ID_NIC_7_3 = 187, + GAUDI2_QUEUE_ID_NIC_8_0 = 188, + GAUDI2_QUEUE_ID_NIC_8_1 = 189, + GAUDI2_QUEUE_ID_NIC_8_2 = 190, + GAUDI2_QUEUE_ID_NIC_8_3 = 191, + GAUDI2_QUEUE_ID_NIC_9_0 = 192, + GAUDI2_QUEUE_ID_NIC_9_1 = 193, + GAUDI2_QUEUE_ID_NIC_9_2 = 194, + GAUDI2_QUEUE_ID_NIC_9_3 = 195, + GAUDI2_QUEUE_ID_NIC_10_0 = 196, + GAUDI2_QUEUE_ID_NIC_10_1 = 197, + GAUDI2_QUEUE_ID_NIC_10_2 = 198, + GAUDI2_QUEUE_ID_NIC_10_3 = 199, + GAUDI2_QUEUE_ID_NIC_11_0 = 200, + GAUDI2_QUEUE_ID_NIC_11_1 = 201, + GAUDI2_QUEUE_ID_NIC_11_2 = 202, + GAUDI2_QUEUE_ID_NIC_11_3 = 203, + GAUDI2_QUEUE_ID_NIC_12_0 = 204, + GAUDI2_QUEUE_ID_NIC_12_1 = 205, + GAUDI2_QUEUE_ID_NIC_12_2 = 206, + GAUDI2_QUEUE_ID_NIC_12_3 = 207, + GAUDI2_QUEUE_ID_NIC_13_0 = 208, + GAUDI2_QUEUE_ID_NIC_13_1 = 209, + GAUDI2_QUEUE_ID_NIC_13_2 = 210, + GAUDI2_QUEUE_ID_NIC_13_3 = 211, + GAUDI2_QUEUE_ID_NIC_14_0 = 212, + GAUDI2_QUEUE_ID_NIC_14_1 = 213, + GAUDI2_QUEUE_ID_NIC_14_2 = 214, + GAUDI2_QUEUE_ID_NIC_14_3 = 215, + GAUDI2_QUEUE_ID_NIC_15_0 = 216, + GAUDI2_QUEUE_ID_NIC_15_1 = 217, + GAUDI2_QUEUE_ID_NIC_15_2 = 218, + GAUDI2_QUEUE_ID_NIC_15_3 = 219, + GAUDI2_QUEUE_ID_NIC_16_0 = 220, + GAUDI2_QUEUE_ID_NIC_16_1 = 221, + GAUDI2_QUEUE_ID_NIC_16_2 = 222, + GAUDI2_QUEUE_ID_NIC_16_3 = 223, + GAUDI2_QUEUE_ID_NIC_17_0 = 224, + GAUDI2_QUEUE_ID_NIC_17_1 = 225, + GAUDI2_QUEUE_ID_NIC_17_2 = 226, + GAUDI2_QUEUE_ID_NIC_17_3 = 227, + GAUDI2_QUEUE_ID_NIC_18_0 = 228, + GAUDI2_QUEUE_ID_NIC_18_1 = 229, + GAUDI2_QUEUE_ID_NIC_18_2 = 230, + GAUDI2_QUEUE_ID_NIC_18_3 = 231, + GAUDI2_QUEUE_ID_NIC_19_0 = 232, + GAUDI2_QUEUE_ID_NIC_19_1 = 233, + GAUDI2_QUEUE_ID_NIC_19_2 = 234, + GAUDI2_QUEUE_ID_NIC_19_3 = 235, + GAUDI2_QUEUE_ID_NIC_20_0 = 236, + GAUDI2_QUEUE_ID_NIC_20_1 = 237, + GAUDI2_QUEUE_ID_NIC_20_2 = 238, + GAUDI2_QUEUE_ID_NIC_20_3 = 239, + GAUDI2_QUEUE_ID_NIC_21_0 = 240, + GAUDI2_QUEUE_ID_NIC_21_1 = 241, + GAUDI2_QUEUE_ID_NIC_21_2 = 242, + GAUDI2_QUEUE_ID_NIC_21_3 = 243, + GAUDI2_QUEUE_ID_NIC_22_0 = 244, + GAUDI2_QUEUE_ID_NIC_22_1 = 245, + GAUDI2_QUEUE_ID_NIC_22_2 = 246, + GAUDI2_QUEUE_ID_NIC_22_3 = 247, + GAUDI2_QUEUE_ID_NIC_23_0 = 248, + GAUDI2_QUEUE_ID_NIC_23_1 = 249, + GAUDI2_QUEUE_ID_NIC_23_2 = 250, + GAUDI2_QUEUE_ID_NIC_23_3 = 251, + GAUDI2_QUEUE_ID_ROT_0_0 = 252, + GAUDI2_QUEUE_ID_ROT_0_1 = 253, + GAUDI2_QUEUE_ID_ROT_0_2 = 254, + GAUDI2_QUEUE_ID_ROT_0_3 = 255, + GAUDI2_QUEUE_ID_ROT_1_0 = 256, + GAUDI2_QUEUE_ID_ROT_1_1 = 257, + GAUDI2_QUEUE_ID_ROT_1_2 = 258, + GAUDI2_QUEUE_ID_ROT_1_3 = 259, + GAUDI2_QUEUE_ID_CPU_PQ = 260, + GAUDI2_QUEUE_ID_SIZE +}; + /* * Engine Numbering * @@ -242,6 +521,85 @@ enum gaudi_engine_id { GAUDI_ENGINE_ID_SIZE }; +enum gaudi2_engine_id { + GAUDI2_DCORE0_ENGINE_ID_EDMA_0 = 0, + GAUDI2_DCORE0_ENGINE_ID_EDMA_1, + GAUDI2_DCORE0_ENGINE_ID_MME, + GAUDI2_DCORE0_ENGINE_ID_TPC_0, + GAUDI2_DCORE0_ENGINE_ID_TPC_1, + GAUDI2_DCORE0_ENGINE_ID_TPC_2, + GAUDI2_DCORE0_ENGINE_ID_TPC_3, + GAUDI2_DCORE0_ENGINE_ID_TPC_4, + GAUDI2_DCORE0_ENGINE_ID_TPC_5, + GAUDI2_DCORE0_ENGINE_ID_DEC_0, + GAUDI2_DCORE0_ENGINE_ID_DEC_1, + GAUDI2_DCORE1_ENGINE_ID_EDMA_0, + GAUDI2_DCORE1_ENGINE_ID_EDMA_1, + GAUDI2_DCORE1_ENGINE_ID_MME, + GAUDI2_DCORE1_ENGINE_ID_TPC_0, + GAUDI2_DCORE1_ENGINE_ID_TPC_1, + GAUDI2_DCORE1_ENGINE_ID_TPC_2, + GAUDI2_DCORE1_ENGINE_ID_TPC_3, + GAUDI2_DCORE1_ENGINE_ID_TPC_4, + GAUDI2_DCORE1_ENGINE_ID_TPC_5, + GAUDI2_DCORE1_ENGINE_ID_DEC_0, + GAUDI2_DCORE1_ENGINE_ID_DEC_1, + GAUDI2_DCORE2_ENGINE_ID_EDMA_0, + GAUDI2_DCORE2_ENGINE_ID_EDMA_1, + GAUDI2_DCORE2_ENGINE_ID_MME, + GAUDI2_DCORE2_ENGINE_ID_TPC_0, + GAUDI2_DCORE2_ENGINE_ID_TPC_1, + GAUDI2_DCORE2_ENGINE_ID_TPC_2, + GAUDI2_DCORE2_ENGINE_ID_TPC_3, + GAUDI2_DCORE2_ENGINE_ID_TPC_4, + GAUDI2_DCORE2_ENGINE_ID_TPC_5, + GAUDI2_DCORE2_ENGINE_ID_DEC_0, + GAUDI2_DCORE2_ENGINE_ID_DEC_1, + GAUDI2_DCORE3_ENGINE_ID_EDMA_0, + GAUDI2_DCORE3_ENGINE_ID_EDMA_1, + GAUDI2_DCORE3_ENGINE_ID_MME, + GAUDI2_DCORE3_ENGINE_ID_TPC_0, + GAUDI2_DCORE3_ENGINE_ID_TPC_1, + GAUDI2_DCORE3_ENGINE_ID_TPC_2, + GAUDI2_DCORE3_ENGINE_ID_TPC_3, + GAUDI2_DCORE3_ENGINE_ID_TPC_4, + GAUDI2_DCORE3_ENGINE_ID_TPC_5, + GAUDI2_DCORE3_ENGINE_ID_DEC_0, + GAUDI2_DCORE3_ENGINE_ID_DEC_1, + GAUDI2_DCORE0_ENGINE_ID_TPC_6, + GAUDI2_ENGINE_ID_PDMA_0, + GAUDI2_ENGINE_ID_PDMA_1, + GAUDI2_ENGINE_ID_ROT_0, + GAUDI2_ENGINE_ID_ROT_1, + GAUDI2_PCIE_ENGINE_ID_DEC_0, + GAUDI2_PCIE_ENGINE_ID_DEC_1, + GAUDI2_ENGINE_ID_NIC0_0, + GAUDI2_ENGINE_ID_NIC0_1, + GAUDI2_ENGINE_ID_NIC1_0, + GAUDI2_ENGINE_ID_NIC1_1, + GAUDI2_ENGINE_ID_NIC2_0, + GAUDI2_ENGINE_ID_NIC2_1, + GAUDI2_ENGINE_ID_NIC3_0, + GAUDI2_ENGINE_ID_NIC3_1, + GAUDI2_ENGINE_ID_NIC4_0, + GAUDI2_ENGINE_ID_NIC4_1, + GAUDI2_ENGINE_ID_NIC5_0, + GAUDI2_ENGINE_ID_NIC5_1, + GAUDI2_ENGINE_ID_NIC6_0, + GAUDI2_ENGINE_ID_NIC6_1, + GAUDI2_ENGINE_ID_NIC7_0, + GAUDI2_ENGINE_ID_NIC7_1, + GAUDI2_ENGINE_ID_NIC8_0, + GAUDI2_ENGINE_ID_NIC8_1, + GAUDI2_ENGINE_ID_NIC9_0, + GAUDI2_ENGINE_ID_NIC9_1, + GAUDI2_ENGINE_ID_NIC10_0, + GAUDI2_ENGINE_ID_NIC10_1, + GAUDI2_ENGINE_ID_NIC11_0, + GAUDI2_ENGINE_ID_NIC11_1, + GAUDI2_ENGINE_ID_SIZE +}; + /* * ASIC specific PLL index * @@ -275,6 +633,22 @@ enum hl_gaudi_pll_index { HL_GAUDI_PLL_MAX }; +enum hl_gaudi2_pll_index { + HL_GAUDI2_CPU_PLL = 0, + HL_GAUDI2_PCI_PLL, + HL_GAUDI2_SRAM_PLL, + HL_GAUDI2_HBM_PLL, + HL_GAUDI2_NIC_PLL, + HL_GAUDI2_DMA_PLL, + HL_GAUDI2_MESH_PLL, + HL_GAUDI2_MME_PLL, + HL_GAUDI2_TPC_PLL, + HL_GAUDI2_IF_PLL, + HL_GAUDI2_VID_PLL, + HL_GAUDI2_MSS_PLL, + HL_GAUDI2_PLL_MAX +}; + /** * enum hl_goya_dma_direction - Direction of DMA operation inside a LIN_DMA packet that is * submitted to the GOYA's DMA QMAN. This attribute is not relevant @@ -326,7 +700,8 @@ enum hl_server_type { HL_SERVER_GAUDI_HLS1 = 1, HL_SERVER_GAUDI_HLS1H = 2, HL_SERVER_GAUDI_TYPE1 = 3, - HL_SERVER_GAUDI_TYPE2 = 4 + HL_SERVER_GAUDI_TYPE2 = 4, + HL_SERVER_GAUDI2_HLS2 = 5 }; /* Opcode for management ioctl @@ -428,8 +803,10 @@ enum hl_server_type { * @device_id: PCI device ID of the ASIC. * @module_id: Module ID of the ASIC for mezzanine cards in servers * (From OCP spec). + * @decoder_enabled_mask: Bit-mask that represents which decoders are enabled. * @first_available_interrupt_id: The first available interrupt ID for the user * to be used when it works with user interrupts. + * Relevant for Gaudi2 and later. * @server_type: Server type that the Gaudi ASIC is currently installed in. * The value is according to enum hl_server_type * @cpld_version: CPLD version on the board. @@ -441,9 +818,15 @@ enum hl_server_type { * @tpc_enabled_mask: Bit-mask that represents which TPCs are enabled. Relevant * for Goya/Gaudi only. * @dram_enabled: Whether the DRAM is enabled. + * @mme_master_slave_mode: Indicate whether the MME is working in master/slave + * configuration. Relevant for Greco and later. * @cpucp_version: The CPUCP f/w version. * @card_name: The card name as passed by the f/w. + * @tpc_enabled_mask_ext: Bit-mask that represents which TPCs are enabled. + * Relevant for Greco and later. * @dram_page_size: The DRAM physical page size. + * @edma_enabled_mask: Bit-mask that represents which EDMAs are enabled. + * Relevant for Gaudi2 and later. * @number_of_user_interrupts: The number of interrupts that are available to the userspace * application to use. Relevant for Gaudi2 and later. * @device_mem_alloc_default_page_size: default page size used in device memory allocation. @@ -456,7 +839,7 @@ struct hl_info_hw_ip_info { __u32 num_of_events; __u32 device_id; __u32 module_id; - __u32 reserved; + __u32 decoder_enabled_mask; __u16 first_available_interrupt_id; __u16 server_type; __u32 cpld_version; @@ -466,12 +849,13 @@ struct hl_info_hw_ip_info { __u32 psoc_pci_pll_div_factor; __u8 tpc_enabled_mask; __u8 dram_enabled; - __u8 pad[2]; + __u8 reserved; + __u8 mme_master_slave_mode; __u8 cpucp_version[HL_INFO_VERSION_MAX_LEN]; __u8 card_name[HL_INFO_CARD_NAME_MAX_LEN]; - __u64 reserved2; + __u64 tpc_enabled_mask_ext; __u64 dram_page_size; - __u32 reserved3; + __u32 edma_enabled_mask; __u16 number_of_user_interrupts; __u16 pad2; __u64 reserved4; @@ -821,16 +1205,16 @@ union hl_cb_args { /* HL_CS_CHUNK_FLAGS_ values * * HL_CS_CHUNK_FLAGS_USER_ALLOC_CB: - * Indicates if the CB was allocated and mapped by userspace. - * User allocated CB is a command buffer allocated by the user, via malloc - * (or similar). After allocating the CB, the user invokes “memory ioctl” - * to map the user memory into a device virtual address. The user provides - * this address via the cb_handle field. The interface provides the - * ability to create a large CBs, Which aren’t limited to - * “HL_MAX_CB_SIZE”. Therefore, it increases the PCI-DMA queues - * throughput. This CB allocation method also reduces the use of Linux - * DMA-able memory pool. Which are limited and used by other Linux - * sub-systems. + * Indicates if the CB was allocated and mapped by userspace + * (relevant to greco and above). User allocated CB is a command buffer, + * allocated by the user, via malloc (or similar). After allocating the + * CB, the user invokes - “memory ioctl” to map the user memory into a + * device virtual address. The user provides this address via the + * cb_handle field. The interface provides the ability to create a + * large CBs, Which aren’t limited to “HL_MAX_CB_SIZE”. Therefore, it + * increases the PCI-DMA queues throughput. This CB allocation method + * also reduces the use of Linux DMA-able memory pool. Which are limited + * and used by other Linux sub-systems. */ #define HL_CS_CHUNK_FLAGS_USER_ALLOC_CB 0x1 @@ -840,12 +1224,17 @@ union hl_cb_args { */ struct hl_cs_chunk { union { - /* For external queue, this represents a Handle of CB on the + /* Goya/Gaudi: + * For external queue, this represents a Handle of CB on the * Host. * For internal queue in Goya, this represents an SRAM or * a DRAM address of the internal CB. In Gaudi, this might also * represent a mapped host address of the CB. * + * Greco onwards: + * For H/W queue, this represents either a Handle of CB on the + * Host, or an SRAM, a DRAM, or a mapped host address of the CB. + * * A mapped host address is in the device address space, after * a host address was mapped by the device MMU. */ @@ -910,11 +1299,12 @@ struct hl_cs_chunk { __u32 pad[10]; }; -/* SIGNAL and WAIT/COLLECTIVE_WAIT flags are mutually exclusive */ +/* SIGNAL/WAIT/COLLECTIVE_WAIT flags are mutually exclusive */ #define HL_CS_FLAGS_FORCE_RESTORE 0x1 #define HL_CS_FLAGS_SIGNAL 0x2 #define HL_CS_FLAGS_WAIT 0x4 #define HL_CS_FLAGS_COLLECTIVE_WAIT 0x8 + #define HL_CS_FLAGS_TIMESTAMP 0x20 #define HL_CS_FLAGS_STAGED_SUBMISSION 0x40 #define HL_CS_FLAGS_STAGED_SUBMISSION_FIRST 0x80 @@ -1174,14 +1564,19 @@ union hl_wait_cs_args { /* Opcode to allocate device memory */ #define HL_MEM_OP_ALLOC 0 + /* Opcode to free previously allocated device memory */ #define HL_MEM_OP_FREE 1 + /* Opcode to map host and device memory */ #define HL_MEM_OP_MAP 2 + /* Opcode to unmap previously mapped host and device memory */ #define HL_MEM_OP_UNMAP 3 + /* Opcode to map a hw block */ #define HL_MEM_OP_MAP_BLOCK 4 + /* Opcode to create DMA-BUF object for an existing device memory allocation * and to export an FD of that DMA-BUF back to the caller */ @@ -1400,7 +1795,16 @@ struct hl_debug_params_bmon { /* Trace source ID */ __u32 id; - __u32 pad; + + /* Control register */ + __u32 control; + + /* Two more address ranges that the user can request to filter */ + __u64 start_addr2; + __u64 end_addr2; + + __u64 start_addr3; + __u64 end_addr3; }; struct hl_debug_params_spmu { @@ -1409,7 +1813,11 @@ struct hl_debug_params_spmu { /* Number of event types selection */ __u32 event_types_num; - __u32 pad; + + /* TRC configuration register values */ + __u32 pmtrc_val; + __u32 trc_ctrl_host_val; + __u32 trc_en_host_val; }; /* Opcode for ETR component */ @@ -1524,16 +1932,23 @@ struct hl_debug_args { * (or if its the first CS for this context). The user can also order the * driver to run the "restore" phase explicitly * + * Goya/Gaudi: * There are two types of queues - external and internal. External queues * are DMA queues which transfer data from/to the Host. All other queues are * internal. The driver will get completion notifications from the device only * on JOBS which are enqueued in the external queues. * + * Greco onwards: + * There is a single type of queue for all types of engines, either DMA engines + * for transfers from/to the host or inside the device, or compute engines. + * The driver will get completion notifications from the device for all queues. + * * For jobs on external queues, the user needs to create command buffers * through the CB ioctl and give the CB's handle to the CS ioctl. For jobs on * internal queues, the user needs to prepare a "command buffer" with packets * on either the device SRAM/DRAM or the host, and give the device address of * that buffer to the CS ioctl. + * For jobs on H/W queues both options of command buffers are valid. * * This IOCTL is asynchronous in regard to the actual execution of the CS. This * means it returns immediately after ALL the JOBS were enqueued on their @@ -1542,7 +1957,7 @@ struct hl_debug_args { * * Upon successful enqueue, the IOCTL returns a sequence number which the user * can use with the "Wait for CS" IOCTL to check whether the handle's CS - * external JOBS have been completed. Note that if the CS has internal JOBS + * non-internal JOBS have been completed. Note that if the CS has internal JOBS * which can execute AFTER the external JOBS have finished, the driver might * report that the CS has finished executing BEFORE the internal JOBS have * actually finished executing.