summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-01-09 17:09:31 -0500
committerDavid S. Miller <davem@davemloft.net>2017-01-09 17:09:31 -0500
commitbda65b4255ac983ce36a6c0ea6a7794f8e8fcc86 (patch)
treead280b2ecc1266329416f043dd20cea515b3dc48 /include
parentb369e7fd41f7cdbe2488cb736ef4f958bb94b5e2 (diff)
parentf502d834950a28e02651bb7e2cc7111ddd352644 (diff)
Merge tag 'mlx5-4kuar-for-4.11' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux
Saeed Mahameed says: ==================== mlx5 4K UAR The following series of patches optimizes the usage of the UAR area which is contained within the BAR 0-1. Previous versions of the firmware and the driver assumed each system page contains a single UAR. This patch set will query the firmware for a new capability that if published, means that the firmware can support UARs of fixed 4K regardless of system page size. In the case of powerpc, where page size equals 64KB, this means we can utilize 16 UARs per system page. Since user space processes by default consume eight UARs per context this means that with this change a process will need a single system page to fulfill that requirement and in fact make use of more UARs which is better in terms of performance. In addition to optimizing user-space processes, we introduce an allocator that can be used by kernel consumers to allocate blue flame registers (which are areas within a UAR that are used to write doorbells). This provides further optimization on using the UAR area since the Ethernet driver makes use of a single blue flame register per system page and now it will use two blue flame registers per 4K. The series also makes changes to naming conventions and now the terms used in the driver code match the terms used in the PRM (programmers reference manual). Thus, what used to be called UUAR (micro UAR) is now called BFREG (blue flame register). In order to support compatibility between different versions of library/driver/firmware, the library has now means to notify the kernel driver that it supports the new scheme and the kernel can notify the library if it supports this extension. So mixed versions of libraries can run concurrently without any issues. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/linux/mlx5/cq.h5
-rw-r--r--include/linux/mlx5/device.h23
-rw-r--r--include/linux/mlx5/doorbell.h6
-rw-r--r--include/linux/mlx5/driver.h81
-rw-r--r--include/linux/mlx5/mlx5_ifc.h7
-rw-r--r--include/uapi/rdma/mlx5-abi.h19
6 files changed, 81 insertions, 60 deletions
diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index 7c3c0d3aca37..95898847c7d4 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -42,13 +42,13 @@ struct mlx5_core_cq {
int cqe_sz;
__be32 *set_ci_db;
__be32 *arm_db;
+ struct mlx5_uars_page *uar;
atomic_t refcount;
struct completion free;
unsigned vector;
unsigned int irqn;
void (*comp) (struct mlx5_core_cq *);
void (*event) (struct mlx5_core_cq *, enum mlx5_event);
- struct mlx5_uar *uar;
u32 cons_index;
unsigned arm_sn;
struct mlx5_rsc_debug *dbg;
@@ -144,7 +144,6 @@ enum {
static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd,
void __iomem *uar_page,
- spinlock_t *doorbell_lock,
u32 cons_index)
{
__be32 doorbell[2];
@@ -164,7 +163,7 @@ static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd,
doorbell[0] = cpu_to_be32(sn << 28 | cmd | ci);
doorbell[1] = cpu_to_be32(cq->cqn);
- mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, doorbell_lock);
+ mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, NULL);
}
int mlx5_init_cq_table(struct mlx5_core_dev *dev);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 7a79a20c54d2..7c5265db02a9 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -212,10 +212,20 @@ enum {
};
enum {
- MLX5_BF_REGS_PER_PAGE = 4,
- MLX5_MAX_UAR_PAGES = 1 << 8,
- MLX5_NON_FP_BF_REGS_PER_PAGE = 2,
- MLX5_MAX_UUARS = MLX5_MAX_UAR_PAGES * MLX5_NON_FP_BF_REGS_PER_PAGE,
+ MLX5_ADAPTER_PAGE_SHIFT = 12,
+ MLX5_ADAPTER_PAGE_SIZE = 1 << MLX5_ADAPTER_PAGE_SHIFT,
+};
+
+enum {
+ MLX5_BFREGS_PER_UAR = 4,
+ MLX5_MAX_UARS = 1 << 8,
+ MLX5_NON_FP_BFREGS_PER_UAR = 2,
+ MLX5_FP_BFREGS_PER_UAR = MLX5_BFREGS_PER_UAR -
+ MLX5_NON_FP_BFREGS_PER_UAR,
+ MLX5_MAX_BFREGS = MLX5_MAX_UARS *
+ MLX5_NON_FP_BFREGS_PER_UAR,
+ MLX5_UARS_IN_PAGE = PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE,
+ MLX5_NON_FP_BFREGS_IN_PAGE = MLX5_NON_FP_BFREGS_PER_UAR * MLX5_UARS_IN_PAGE,
};
enum {
@@ -389,11 +399,6 @@ enum {
};
enum {
- MLX5_ADAPTER_PAGE_SHIFT = 12,
- MLX5_ADAPTER_PAGE_SIZE = 1 << MLX5_ADAPTER_PAGE_SHIFT,
-};
-
-enum {
MLX5_CAP_OFF_CMDIF_CSUM = 46,
};
diff --git a/include/linux/mlx5/doorbell.h b/include/linux/mlx5/doorbell.h
index afc78a3f4462..0787de28f2fc 100644
--- a/include/linux/mlx5/doorbell.h
+++ b/include/linux/mlx5/doorbell.h
@@ -68,10 +68,12 @@ static inline void mlx5_write64(__be32 val[2], void __iomem *dest,
{
unsigned long flags;
- spin_lock_irqsave(doorbell_lock, flags);
+ if (doorbell_lock)
+ spin_lock_irqsave(doorbell_lock, flags);
__raw_writel((__force u32) val[0], dest);
__raw_writel((__force u32) val[1], dest + 4);
- spin_unlock_irqrestore(doorbell_lock, flags);
+ if (doorbell_lock)
+ spin_unlock_irqrestore(doorbell_lock, flags);
}
#endif
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index cdd2bd62f86d..3a309f6a4a15 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -187,36 +187,18 @@ enum mlx5_eq_type {
#endif
};
-struct mlx5_uuar_info {
- struct mlx5_uar *uars;
- int num_uars;
- int num_low_latency_uuars;
- unsigned long *bitmap;
+struct mlx5_bfreg_info {
+ u32 *sys_pages;
+ int num_low_latency_bfregs;
unsigned int *count;
- struct mlx5_bf *bfs;
/*
- * protect uuar allocation data structs
+ * protect bfreg allocation data structs
*/
struct mutex lock;
u32 ver;
-};
-
-struct mlx5_bf {
- void __iomem *reg;
- void __iomem *regreg;
- int buf_size;
- struct mlx5_uar *uar;
- unsigned long offset;
- int need_lock;
- /* protect blue flame buffer selection when needed
- */
- spinlock_t lock;
-
- /* serialize 64 bit writes when done as two 32 bit accesses
- */
- spinlock_t lock32;
- int uuarn;
+ bool lib_uar_4k;
+ u32 num_sys_pages;
};
struct mlx5_cmd_first {
@@ -451,14 +433,38 @@ struct mlx5_eq_table {
spinlock_t lock;
};
-struct mlx5_uar {
- u32 index;
- struct list_head bf_list;
- unsigned free_bf_bmap;
- void __iomem *bf_map;
+struct mlx5_uars_page {
void __iomem *map;
+ bool wc;
+ u32 index;
+ struct list_head list;
+ unsigned int bfregs;
+ unsigned long *reg_bitmap; /* for non fast path bf regs */
+ unsigned long *fp_bitmap;
+ unsigned int reg_avail;
+ unsigned int fp_avail;
+ struct kref ref_count;
+ struct mlx5_core_dev *mdev;
};
+struct mlx5_bfreg_head {
+ /* protect blue flame registers allocations */
+ struct mutex lock;
+ struct list_head list;
+};
+
+struct mlx5_bfreg_data {
+ struct mlx5_bfreg_head reg_head;
+ struct mlx5_bfreg_head wc_head;
+};
+
+struct mlx5_sq_bfreg {
+ void __iomem *map;
+ struct mlx5_uars_page *up;
+ bool wc;
+ u32 index;
+ unsigned int offset;
+};
struct mlx5_core_health {
struct health_buffer __iomem *health;
@@ -578,8 +584,6 @@ struct mlx5_priv {
struct mlx5_eq_table eq_table;
struct msix_entry *msix_arr;
struct mlx5_irq_info *irq_info;
- struct mlx5_uuar_info uuari;
- MLX5_DECLARE_DOORBELL_LOCK(cq_uar_lock);
/* pages stuff */
struct workqueue_struct *pg_wq;
@@ -644,6 +648,8 @@ struct mlx5_priv {
void *pfault_ctx;
struct srcu_struct pfault_srcu;
#endif
+ struct mlx5_bfreg_data bfregs;
+ struct mlx5_uars_page *uar;
};
enum mlx5_device_state {
@@ -712,7 +718,6 @@ struct mlx5_td {
};
struct mlx5e_resources {
- struct mlx5_uar cq_uar;
u32 pdn;
struct mlx5_td td;
struct mlx5_core_mkey mkey;
@@ -902,11 +907,6 @@ void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome);
int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type);
int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn);
int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn);
-int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari);
-int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari);
-int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar,
- bool map_wc);
-void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar);
void mlx5_health_cleanup(struct mlx5_core_dev *dev);
int mlx5_health_init(struct mlx5_core_dev *dev);
void mlx5_start_health_poll(struct mlx5_core_dev *dev);
@@ -972,7 +972,7 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec);
void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type);
int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
int nent, u64 mask, const char *name,
- struct mlx5_uar *uar, enum mlx5_eq_type type);
+ enum mlx5_eq_type type);
int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
int mlx5_start_eqs(struct mlx5_core_dev *dev);
int mlx5_stop_eqs(struct mlx5_core_dev *dev);
@@ -1021,6 +1021,9 @@ void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev);
int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index);
void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate);
bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate);
+int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg,
+ bool map_wc, bool fast_path);
+void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg);
static inline int fw_initializing(struct mlx5_core_dev *dev)
{
@@ -1080,6 +1083,8 @@ int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev);
int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev);
bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
+struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev);
+void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up);
struct mlx5_profile {
u64 mask;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index d0f090884617..37327f6ba9cb 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -905,7 +905,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 uc[0x1];
u8 rc[0x1];
- u8 reserved_at_240[0xa];
+ u8 uar_4k[0x1];
+ u8 reserved_at_241[0x9];
u8 uar_sz[0x6];
u8 reserved_at_250[0x8];
u8 log_pg_sz[0x8];
@@ -997,7 +998,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 device_frequency_mhz[0x20];
u8 device_frequency_khz[0x20];
- u8 reserved_at_500[0x80];
+ u8 reserved_at_500[0x20];
+ u8 num_of_uars_per_page[0x20];
+ u8 reserved_at_540[0x40];
u8 reserved_at_580[0x3f];
u8 cqe_compression[0x1];
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index fae6cdaeb56d..85dc966ea70b 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -61,19 +61,24 @@ enum {
*/
struct mlx5_ib_alloc_ucontext_req {
- __u32 total_num_uuars;
- __u32 num_low_latency_uuars;
+ __u32 total_num_bfregs;
+ __u32 num_low_latency_bfregs;
+};
+
+enum mlx5_lib_caps {
+ MLX5_LIB_CAP_4K_UAR = (u64)1 << 0,
};
struct mlx5_ib_alloc_ucontext_req_v2 {
- __u32 total_num_uuars;
- __u32 num_low_latency_uuars;
+ __u32 total_num_bfregs;
+ __u32 num_low_latency_bfregs;
__u32 flags;
__u32 comp_mask;
__u8 max_cqe_version;
__u8 reserved0;
__u16 reserved1;
__u32 reserved2;
+ __u64 lib_caps;
};
enum mlx5_ib_alloc_ucontext_resp_mask {
@@ -88,7 +93,7 @@ enum mlx5_user_cmds_supp_uhw {
struct mlx5_ib_alloc_ucontext_resp {
__u32 qp_tab_size;
__u32 bf_reg_size;
- __u32 tot_uuars;
+ __u32 tot_bfregs;
__u32 cache_line_size;
__u16 max_sq_desc_sz;
__u16 max_rq_desc_sz;
@@ -103,6 +108,8 @@ struct mlx5_ib_alloc_ucontext_resp {
__u8 cmds_supp_uhw;
__u16 reserved2;
__u64 hca_core_clock_offset;
+ __u32 log_uar_size;
+ __u32 num_uars_per_page;
};
struct mlx5_ib_alloc_pd_resp {
@@ -241,7 +248,7 @@ struct mlx5_ib_create_qp_rss {
};
struct mlx5_ib_create_qp_resp {
- __u32 uuar_index;
+ __u32 bfreg_index;
};
struct mlx5_ib_alloc_mw {