summaryrefslogtreecommitdiff
path: root/drivers/misc/habanalabs/habanalabs.h
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs/habanalabs.h')
-rw-r--r--drivers/misc/habanalabs/habanalabs.h275
1 files changed, 275 insertions, 0 deletions
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 744e37bbc2a6..9adc7c6ec08b 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -16,6 +16,9 @@
#include <linux/cdev.h>
#include <linux/iopoll.h>
#include <linux/irqreturn.h>
+#include <linux/dma-fence.h>
+#include <linux/dma-direction.h>
+#include <linux/scatterlist.h>
#define HL_NAME "habanalabs"
@@ -31,6 +34,11 @@
#define HL_MAX_QUEUES 128
+#define HL_MAX_JOBS_PER_CS 64
+
+/* MUST BE POWER OF 2 and larger than 1 */
+#define HL_MAX_PENDING_CS 64
+
struct hl_device;
struct hl_fpriv;
@@ -62,6 +70,16 @@ struct hw_queue_properties {
};
/**
+ * enum vm_type_t - virtual memory mapping request information.
+ * @VM_TYPE_USERPTR: mapping of user memory to device virtual address.
+ * @VM_TYPE_PHYS_LIST: mapping of DRAM memory to device virtual address.
+ */
+enum vm_type_t {
+ VM_TYPE_USERPTR,
+ VM_TYPE_PHYS_LIST
+};
+
+/**
* enum hl_device_hw_state - H/W device state. use this to understand whether
* to do reset before hw_init or not
* @HL_DEVICE_HW_STATE_CLEAN: H/W state is clean. i.e. after hard reset
@@ -147,6 +165,19 @@ struct asic_fixed_properties {
u8 tpc_enabled_mask;
};
+/**
+ * struct hl_dma_fence - wrapper for fence object used by command submissions.
+ * @base_fence: kernel fence object.
+ * @lock: spinlock to protect fence.
+ * @hdev: habanalabs device structure.
+ * @cs_seq: command submission sequence number.
+ */
+struct hl_dma_fence {
+ struct dma_fence base_fence;
+ spinlock_t lock;
+ struct hl_device *hdev;
+ u64 cs_seq;
+};
/*
* Command Buffers
@@ -175,6 +206,7 @@ struct hl_cb_mgr {
* @mmap_size: Holds the CB's size that was mmaped.
* @size: holds the CB's size.
* @id: the CB's ID.
+ * @cs_cnt: holds number of CS that this CB participates in.
* @ctx_id: holds the ID of the owner's context.
* @mmap: true if the CB is currently mmaped to user.
* @is_pool: true if CB was acquired from the pool, false otherwise.
@@ -189,6 +221,7 @@ struct hl_cb {
u32 mmap_size;
u32 size;
u32 id;
+ u32 cs_cnt;
u32 ctx_id;
u8 mmap;
u8 is_pool;
@@ -313,6 +346,8 @@ enum hl_asic_type {
ASIC_INVALID
};
+struct hl_cs_parser;
+
/**
* enum hl_pm_mng_profile - power management profile.
* @PM_AUTO: internal clock is set by KMD.
@@ -372,7 +407,14 @@ enum hl_pll_frequency {
* @dma_pool_free: free small DMA allocation from pool.
* @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool.
* @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool.
+ * @hl_dma_unmap_sg: DMA unmap scatter-gather list.
+ * @cs_parser: parse Command Submission.
+ * @asic_dma_map_sg: DMA map scatter-gather list.
+ * @get_dma_desc_list_size: get number of LIN_DMA packets required for CB.
+ * @add_end_of_cb_packets: Add packets to the end of CB, if device requires it.
* @update_eq_ci: update event queue CI.
+ * @context_switch: called upon ASID context switch.
+ * @restore_phase_topology: clear all SOBs amd MONs.
* @add_device_attr: add ASIC specific device attributes.
* @handle_eqe: handle event queue entry (IRQ) from ArmCP.
* @set_pll_profile: change PLL profile (manual/automatic).
@@ -380,6 +422,7 @@ enum hl_pll_frequency {
* @send_heartbeat: send is-alive packet to ArmCP and verify response.
* @enable_clock_gating: enable clock gating for reducing power consumption.
* @disable_clock_gating: disable clock for accessing registers on HBW.
+ * @is_device_idle: return true if device is idle, false otherwise.
* @soft_reset_late_init: perform certain actions needed after soft reset.
* @hw_queues_lock: acquire H/W queues lock.
* @hw_queues_unlock: release H/W queues lock.
@@ -419,7 +462,20 @@ struct hl_asic_funcs {
size_t size, dma_addr_t *dma_handle);
void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev,
size_t size, void *vaddr);
+ void (*hl_dma_unmap_sg)(struct hl_device *hdev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction dir);
+ int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser);
+ int (*asic_dma_map_sg)(struct hl_device *hdev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction dir);
+ u32 (*get_dma_desc_list_size)(struct hl_device *hdev,
+ struct sg_table *sgt);
+ void (*add_end_of_cb_packets)(u64 kernel_address, u32 len, u64 cq_addr,
+ u32 cq_val, u32 msix_num);
void (*update_eq_ci)(struct hl_device *hdev, u32 val);
+ int (*context_switch)(struct hl_device *hdev, u32 asid);
+ void (*restore_phase_topology)(struct hl_device *hdev);
void (*add_device_attr)(struct hl_device *hdev,
struct attribute_group *dev_attr_grp);
void (*handle_eqe)(struct hl_device *hdev,
@@ -430,6 +486,7 @@ struct hl_asic_funcs {
int (*send_heartbeat)(struct hl_device *hdev);
void (*enable_clock_gating)(struct hl_device *hdev);
void (*disable_clock_gating)(struct hl_device *hdev);
+ bool (*is_device_idle)(struct hl_device *hdev);
int (*soft_reset_late_init)(struct hl_device *hdev);
void (*hw_queues_lock)(struct hl_device *hdev);
void (*hw_queues_unlock)(struct hl_device *hdev);
@@ -453,12 +510,28 @@ struct hl_asic_funcs {
* @hdev: pointer to the device structure.
* @refcount: reference counter for the context. Context is released only when
* this hits 0l. It is incremented on CS and CS_WAIT.
+ * @cs_pending: array of DMA fence objects representing pending CS.
+ * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
+ * to user so user could inquire about CS. It is used as
+ * index to cs_pending array.
+ * @cs_lock: spinlock to protect cs_sequence.
+ * @thread_restore_token: token to prevent multiple threads of the same context
+ * from running the restore phase. Only one thread
+ * should run it.
+ * @thread_restore_wait_token: token to prevent the threads that didn't run
+ * the restore phase from moving to their execution
+ * phase before the restore phase has finished.
* @asid: context's unique address space ID in the device's MMU.
*/
struct hl_ctx {
struct hl_fpriv *hpriv;
struct hl_device *hdev;
struct kref refcount;
+ struct dma_fence *cs_pending[HL_MAX_PENDING_CS];
+ u64 cs_sequence;
+ spinlock_t cs_lock;
+ atomic_t thread_restore_token;
+ u32 thread_restore_wait_token;
u32 asid;
};
@@ -473,14 +546,129 @@ struct hl_ctx_mgr {
};
+
+/*
+ * COMMAND SUBMISSIONS
+ */
+
+/**
+ * struct hl_userptr - memory mapping chunk information
+ * @vm_type: type of the VM.
+ * @job_node: linked-list node for hanging the object on the Job's list.
+ * @vec: pointer to the frame vector.
+ * @sgt: pointer to the scatter-gather table that holds the pages.
+ * @dir: for DMA unmapping, the direction must be supplied, so save it.
+ * @debugfs_list: node in debugfs list of command submissions.
+ * @addr: user-space virtual pointer to the start of the memory area.
+ * @size: size of the memory area to pin & map.
+ * @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise.
+ */
+struct hl_userptr {
+ enum vm_type_t vm_type; /* must be first */
+ struct list_head job_node;
+ struct frame_vector *vec;
+ struct sg_table *sgt;
+ enum dma_data_direction dir;
+ struct list_head debugfs_list;
+ u64 addr;
+ u32 size;
+ u8 dma_mapped;
+};
+
+/**
+ * struct hl_cs - command submission.
+ * @jobs_in_queue_cnt: per each queue, maintain counter of submitted jobs.
+ * @ctx: the context this CS belongs to.
+ * @job_list: list of the CS's jobs in the various queues.
+ * @job_lock: spinlock for the CS's jobs list. Needed for free_job.
+ * @refcount: reference counter for usage of the CS.
+ * @fence: pointer to the fence object of this CS.
+ * @work_tdr: delayed work node for TDR.
+ * @mirror_node : node in device mirror list of command submissions.
+ * @sequence: the sequence number of this CS.
+ * @submitted: true if CS was submitted to H/W.
+ * @completed: true if CS was completed by device.
+ * @timedout : true if CS was timedout.
+ * @tdr_active: true if TDR was activated for this CS (to prevent
+ * double TDR activation).
+ * @aborted: true if CS was aborted due to some device error.
+ */
+struct hl_cs {
+ u8 jobs_in_queue_cnt[HL_MAX_QUEUES];
+ struct hl_ctx *ctx;
+ struct list_head job_list;
+ spinlock_t job_lock;
+ struct kref refcount;
+ struct dma_fence *fence;
+ struct delayed_work work_tdr;
+ struct list_head mirror_node;
+ u64 sequence;
+ u8 submitted;
+ u8 completed;
+ u8 timedout;
+ u8 tdr_active;
+ u8 aborted;
+};
+
/**
* struct hl_cs_job - command submission job.
+ * @cs_node: the node to hang on the CS jobs list.
+ * @cs: the CS this job belongs to.
+ * @user_cb: the CB we got from the user.
+ * @patched_cb: in case of patching, this is internal CB which is submitted on
+ * the queue instead of the CB we got from the IOCTL.
* @finish_work: workqueue object to run when job is completed.
+ * @userptr_list: linked-list of userptr mappings that belong to this job and
+ * wait for completion.
* @id: the id of this job inside a CS.
+ * @hw_queue_id: the id of the H/W queue this job is submitted to.
+ * @user_cb_size: the actual size of the CB we got from the user.
+ * @job_cb_size: the actual size of the CB that we put on the queue.
+ * @ext_queue: whether the job is for external queue or internal queue.
*/
struct hl_cs_job {
+ struct list_head cs_node;
+ struct hl_cs *cs;
+ struct hl_cb *user_cb;
+ struct hl_cb *patched_cb;
struct work_struct finish_work;
+ struct list_head userptr_list;
u32 id;
+ u32 hw_queue_id;
+ u32 user_cb_size;
+ u32 job_cb_size;
+ u8 ext_queue;
+};
+
+/**
+ * struct hl_cs_parser - command submission paerser properties.
+ * @user_cb: the CB we got from the user.
+ * @patched_cb: in case of patching, this is internal CB which is submitted on
+ * the queue instead of the CB we got from the IOCTL.
+ * @job_userptr_list: linked-list of userptr mappings that belong to the related
+ * job and wait for completion.
+ * @cs_sequence: the sequence number of the related CS.
+ * @ctx_id: the ID of the context the related CS belongs to.
+ * @hw_queue_id: the id of the H/W queue this job is submitted to.
+ * @user_cb_size: the actual size of the CB we got from the user.
+ * @patched_cb_size: the size of the CB after parsing.
+ * @ext_queue: whether the job is for external queue or internal queue.
+ * @job_id: the id of the related job inside the related CS.
+ * @use_virt_addr: whether to treat the addresses in the CB as virtual during
+ * parsing.
+ */
+struct hl_cs_parser {
+ struct hl_cb *user_cb;
+ struct hl_cb *patched_cb;
+ struct list_head *job_userptr_list;
+ u64 cs_sequence;
+ u32 ctx_id;
+ u32 hw_queue_id;
+ u32 user_cb_size;
+ u32 patched_cb_size;
+ u8 ext_queue;
+ u8 job_id;
+ u8 use_virt_addr;
};
@@ -497,6 +685,7 @@ struct hl_cs_job {
* @ctx_mgr: context manager to handle multiple context for this FD.
* @cb_mgr: command buffer manager to handle multiple buffers for this FD.
* @refcount: number of related contexts.
+ * @restore_phase_mutex: lock for context switch and restore phase.
*/
struct hl_fpriv {
struct hl_device *hdev;
@@ -506,6 +695,7 @@ struct hl_fpriv {
struct hl_ctx_mgr ctx_mgr;
struct hl_cb_mgr cb_mgr;
struct kref refcount;
+ struct mutex restore_phase_mutex;
};
@@ -577,6 +767,8 @@ struct hl_device_reset_work {
* @eq_wq: work queue of event queue for executing work in process context.
* @kernel_ctx: KMD context structure.
* @kernel_queues: array of hl_hw_queue.
+ * @hw_queues_mirror_list: CS mirror list for TDR.
+ * @hw_queues_mirror_lock: protects hw_queues_mirror_list.
* @kernel_cb_mgr: command buffer manager for creating/destroying/handling CGs.
* @event_queue: event queue for IRQ from ArmCP.
* @dma_pool: DMA pool for small allocations.
@@ -604,6 +796,7 @@ struct hl_device_reset_work {
* @in_reset: is device in reset flow.
* @curr_pll_profile: current PLL profile.
* @fd_open_cnt: number of open user processes.
+ * @timeout_jiffies: device CS timeout value.
* @max_power: the max power of the device, as configured by the sysadmin. This
* value is saved so in case of hard-reset, KMD will restore this
* value and update the F/W after the re-initialization
@@ -617,7 +810,10 @@ struct hl_device_reset_work {
* @hwmon_initialized: is H/W monitor sensors was initialized.
* @hard_reset_pending: is there a hard reset work pending.
* @heartbeat: is heartbeat sanity check towards ArmCP enabled.
+ * @reset_on_lockup: true if a reset should be done in case of stuck CS, false
+ * otherwise.
* @init_done: is the initialization of the device done.
+ * @mmu_enable: is MMU enabled.
*/
struct hl_device {
struct pci_dev *pdev;
@@ -634,6 +830,8 @@ struct hl_device {
struct workqueue_struct *eq_wq;
struct hl_ctx *kernel_ctx;
struct hl_hw_queue *kernel_queues;
+ struct list_head hw_queues_mirror_list;
+ spinlock_t hw_queues_mirror_lock;
struct hl_cb_mgr kernel_cb_mgr;
struct hl_eq event_queue;
struct dma_pool *dma_pool;
@@ -661,6 +859,7 @@ struct hl_device {
atomic_t in_reset;
atomic_t curr_pll_profile;
atomic_t fd_open_cnt;
+ u64 timeout_jiffies;
u64 max_power;
u32 major;
u32 high_pll;
@@ -672,9 +871,11 @@ struct hl_device {
u8 hwmon_initialized;
u8 hard_reset_pending;
u8 heartbeat;
+ u8 reset_on_lockup;
u8 init_done;
/* Parameters for bring-up */
+ u8 mmu_enable;
u8 cpu_enable;
u8 reset_pcilink;
u8 cpu_queues_enable;
@@ -712,6 +913,58 @@ struct hl_ioctl_desc {
* Kernel module functions that can be accessed by entire module
*/
+/**
+ * hl_mem_area_inside_range() - Checks whether address+size are inside a range.
+ * @address: The start address of the area we want to validate.
+ * @size: The size in bytes of the area we want to validate.
+ * @range_start_address: The start address of the valid range.
+ * @range_end_address: The end address of the valid range.
+ *
+ * Return: true if the area is inside the valid range, false otherwise.
+ */
+static inline bool hl_mem_area_inside_range(u64 address, u32 size,
+ u64 range_start_address, u64 range_end_address)
+{
+ u64 end_address = address + size;
+
+ if ((address >= range_start_address) &&
+ (end_address <= range_end_address) &&
+ (end_address > address))
+ return true;
+
+ return false;
+}
+
+/**
+ * hl_mem_area_crosses_range() - Checks whether address+size crossing a range.
+ * @address: The start address of the area we want to validate.
+ * @size: The size in bytes of the area we want to validate.
+ * @range_start_address: The start address of the valid range.
+ * @range_end_address: The end address of the valid range.
+ *
+ * Return: true if the area overlaps part or all of the valid range,
+ * false otherwise.
+ */
+static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
+ u64 range_start_address, u64 range_end_address)
+{
+ u64 end_address = address + size;
+
+ if ((address >= range_start_address) &&
+ (address < range_end_address))
+ return true;
+
+ if ((end_address >= range_start_address) &&
+ (end_address < range_end_address))
+ return true;
+
+ if ((address < range_start_address) &&
+ (end_address >= range_end_address))
+ return true;
+
+ return false;
+}
+
int hl_device_open(struct inode *inode, struct file *filp);
bool hl_device_disabled_or_in_reset(struct hl_device *hdev);
int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
@@ -725,8 +978,10 @@ int hl_hw_queues_create(struct hl_device *hdev);
void hl_hw_queues_destroy(struct hl_device *hdev);
int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
u32 cb_size, u64 cb_ptr);
+int hl_hw_queue_schedule_cs(struct hl_cs *cs);
u32 hl_hw_queue_add_ptr(u32 ptr, u16 val);
void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id);
+void hl_int_hw_queue_update_ci(struct hl_cs *cs);
void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset);
#define hl_queue_inc_ptr(p) hl_hw_queue_add_ptr(p, 1)
@@ -740,6 +995,8 @@ void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q);
void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q);
irqreturn_t hl_irq_handler_cq(int irq, void *arg);
irqreturn_t hl_irq_handler_eq(int irq, void *arg);
+u32 hl_cq_inc_ptr(u32 ptr);
+
int hl_asid_init(struct hl_device *hdev);
void hl_asid_fini(struct hl_device *hdev);
unsigned long hl_asid_alloc(struct hl_device *hdev);
@@ -748,9 +1005,13 @@ void hl_asid_free(struct hl_device *hdev, unsigned long asid);
int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv);
void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx);
int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx);
+void hl_ctx_do_release(struct kref *ref);
+void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx);
int hl_ctx_put(struct hl_ctx *ctx);
+struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr);
void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr);
+
int hl_device_init(struct hl_device *hdev, struct class *hclass);
void hl_device_fini(struct hl_device *hdev);
int hl_device_suspend(struct hl_device *hdev);
@@ -782,8 +1043,20 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size);
int hl_cb_pool_init(struct hl_device *hdev);
int hl_cb_pool_fini(struct hl_device *hdev);
+void hl_cs_rollback_all(struct hl_device *hdev);
+struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue);
+
void goya_set_asic_funcs(struct hl_device *hdev);
+int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u32 size,
+ struct hl_userptr *userptr);
+int hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr);
+void hl_userptr_delete_list(struct hl_device *hdev,
+ struct list_head *userptr_list);
+bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size,
+ struct list_head *userptr_list,
+ struct hl_userptr **userptr);
+
long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq);
long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr);
@@ -799,5 +1072,7 @@ void hl_set_max_power(struct hl_device *hdev, u64 value);
/* IOCTLs */
long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data);
+int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data);
+int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data);
#endif /* HABANALABSP_H_ */