#include <linux/uaccess.h>
#include <linux/slab.h>
+#define HL_CS_FLAGS_SIG_WAIT (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT)
+
static void job_wq_completion(struct work_struct *work);
static long _hl_cs_wait_ioctl(struct hl_device *hdev,
struct hl_ctx *ctx, u64 timeout_us, u64 seq);
union hl_cs_args *args = data;
struct hl_ctx *ctx = hpriv->ctx;
void __user *chunks_execute, *chunks_restore;
- u32 num_chunks_execute, num_chunks_restore;
+ u32 num_chunks_execute, num_chunks_restore, sig_wait_flags;
u64 cs_seq = ULONG_MAX;
int rc, do_ctx_switch;
bool need_soft_reset = false;
goto out;
}
+ sig_wait_flags = args->in.cs_flags & HL_CS_FLAGS_SIG_WAIT;
+
+ if (unlikely((sig_wait_flags & HL_CS_FLAGS_SIG_WAIT) &&
+ (!hdev->supports_sync_stream))) {
+ dev_err(hdev->dev, "Sync stream CS is not supported\n");
+ rc = -EINVAL;
+ goto out;
+ }
+
chunks_execute = (void __user *) (uintptr_t) args->in.chunks_execute;
num_chunks_execute = args->in.num_chunks_execute;
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
*
- * Copyright 2016-2019 HabanaLabs, Ltd.
+ * Copyright 2016-2020 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
* compatibility
*/
struct hl_cs_chunk {
- /*
- * For external queue, this represents a Handle of CB on the Host
- * For internal queue, this represents an SRAM or DRAM address of the
- * internal CB
- */
- __u64 cb_handle;
+ union {
+ /* For external queue, this represents a Handle of CB on the
+ * Host.
+ * For internal queue in Goya, this represents an SRAM or
+ * a DRAM address of the internal CB. In Gaudi, this might also
+ * represent a mapped host address of the CB.
+ *
+ * A mapped host address is in the device address space, after
+ * a host address was mapped by the device MMU.
+ */
+ __u64 cb_handle;
+
+ /* Relevant only when HL_CS_FLAGS_WAIT is set.
+ * This holds address of array of u64 values that contain
+ * signal CS sequence numbers. The wait described by this job
+ * will listen on all those signals (wait event per signal)
+ */
+ __u64 signal_seq_arr;
+ };
+
/* Index of queue to put the CB on */
__u32 queue_index;
- /*
- * Size of command buffer with valid packets
- * Can be smaller then actual CB size
- */
- __u32 cb_size;
+
+ union {
+ /*
+ * Size of command buffer with valid packets
+ * Can be smaller then actual CB size
+ */
+ __u32 cb_size;
+
+ /* Relevant only when HL_CS_FLAGS_WAIT is set.
+ * Number of entries in signal_seq_arr
+ */
+ __u32 num_signal_seq_arr;
+ };
+
/* HL_CS_CHUNK_FLAGS_* */
__u32 cs_chunk_flags;
+
/* Align structure to 64 bytes */
__u32 pad[11];
};
+/* SIGNAL and WAIT flags are mutually exclusive */
#define HL_CS_FLAGS_FORCE_RESTORE 0x1
+#define HL_CS_FLAGS_SIGNAL 0x2
+#define HL_CS_FLAGS_WAIT 0x4
#define HL_CS_STATUS_SUCCESS 0
#define HL_MAX_JOBS_PER_CS 512
struct hl_cs_in {
+
/* this holds address of array of hl_cs_chunk for restore phase */
__u64 chunks_restore;
- /* this holds address of array of hl_cs_chunk for execution phase */
+
+ /* holds address of array of hl_cs_chunk for execution phase */
__u64 chunks_execute;
+
/* this holds address of array of hl_cs_chunk for store phase -
* Currently not in use
*/
__u64 chunks_store;
+
/* Number of chunks in restore phase array. Maximum number is
* HL_MAX_JOBS_PER_CS
*/
__u32 num_chunks_restore;
+
/* Number of chunks in execution array. Maximum number is
* HL_MAX_JOBS_PER_CS
*/
__u32 num_chunks_execute;
+
/* Number of chunks in restore phase array - Currently not in use */
__u32 num_chunks_store;
+
/* HL_CS_FLAGS_* */
__u32 cs_flags;
+
/* Context ID - Currently not in use */
__u32 ctx_id;
};
* For jobs on external queues, the user needs to create command buffers
* through the CB ioctl and give the CB's handle to the CS ioctl. For jobs on
* internal queues, the user needs to prepare a "command buffer" with packets
- * on either the SRAM or DRAM, and give the device address of that buffer to
- * the CS ioctl.
+ * on either the device SRAM/DRAM or the host, and give the device address of
+ * that buffer to the CS ioctl.
*
* This IOCTL is asynchronous in regard to the actual execution of the CS. This
* means it returns immediately after ALL the JOBS were enqueued on their
* external JOBS have been completed. Note that if the CS has internal JOBS
* which can execute AFTER the external JOBS have finished, the driver might
* report that the CS has finished executing BEFORE the internal JOBS have
- * actually finish executing.
+ * actually finished executing.
*
* Even though the sequence number increments per CS, the user can NOT
* automatically assume that if CS with sequence number N finished, then CS