diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-11-27 10:17:28 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-11-27 10:17:28 -0800 |
commit | d76886972823ce456c0c61cd2284e85668e2131e (patch) | |
tree | 2171359a7aeb2539c327f6d2604b1ad3aa21f588 /drivers/infiniband/hw | |
parent | 0e45384cecccaa950783e67e7a29ed470133f19d (diff) | |
parent | f295e4cece5cb4c60715fed539abcd62468f9ef1 (diff) | |
download | linux-d76886972823ce456c0c61cd2284e85668e2131e.tar.gz |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"Again another fairly quiet cycle with few notable core code changes
and the usual variety of driver bug fixes and small improvements.
- Various driver updates and bug fixes for siw, bnxt_re, hns, qedr,
iw_cxgb4, vmw_pvrdma, mlx5
- Improvements in SRPT from working with iWarp
- SRIOV VF support for bnxt_re
- Skeleton kernel-doc files for drivers/infiniband
- User visible counters for events related to ODP
- Common code for tracking of mmap lifetimes so that drivers can link
HW object liftime to a VMA
- ODP bug fixes and rework
- RDMA READ support for efa
- Removal of the very old cxgb3 driver"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (168 commits)
RDMA/hns: Delete unnecessary callback functions for cq
RDMA/hns: Rename the functions used inside creating cq
RDMA/hns: Redefine the member of hns_roce_cq struct
RDMA/hns: Redefine interfaces used in creating cq
RDMA/efa: Expose RDMA read related attributes
RDMA/efa: Support remote read access in MR registration
RDMA/efa: Store network attributes in device attributes
IB/hfi1: remove redundant assignment to variable ret
RDMA/bnxt_re: Fix missing le16_to_cpu
RDMA/bnxt_re: Fix stat push into dma buffer on gen p5 devices
RDMA/bnxt_re: Fix chip number validation Broadcom's Gen P5 series
RDMA/bnxt_re: Fix Kconfig indentation
IB/mlx5: Implement callbacks for getting VFs GUID attributes
IB/ipoib: Add ndo operation for getting VFs GUID attributes
IB/core: Add interfaces to get VF node and port GUIDs
net/core: Add support for getting VF GUIDs
RDMA/qedr: Fix null-pointer dereference when calling rdma_user_mmap_get_offset
RDMA/cm: Use refcount_t type for refcount variable
IB/mlx5: Support extended number of strides for Striding RQ
IB/mlx4: Update HW GID table while adding vlan GID
...
Diffstat (limited to 'drivers/infiniband/hw')
107 files changed, 2666 insertions, 11674 deletions
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile index 433fca59febd..0aeccd984889 100644 --- a/drivers/infiniband/hw/Makefile +++ b/drivers/infiniband/hw/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_INFINIBAND_MTHCA) += mthca/ obj-$(CONFIG_INFINIBAND_QIB) += qib/ -obj-$(CONFIG_INFINIBAND_CXGB3) += cxgb3/ obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/ obj-$(CONFIG_INFINIBAND_EFA) += efa/ obj-$(CONFIG_INFINIBAND_I40IW) += i40iw/ diff --git a/drivers/infiniband/hw/bnxt_re/Kconfig b/drivers/infiniband/hw/bnxt_re/Kconfig index ab8779d23382..b83f1cc38c52 100644 --- a/drivers/infiniband/hw/bnxt_re/Kconfig +++ b/drivers/infiniband/hw/bnxt_re/Kconfig @@ -1,11 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only config INFINIBAND_BNXT_RE - tristate "Broadcom Netxtreme HCA support" - depends on 64BIT - depends on ETHERNET && NETDEVICES && PCI && INET && DCB - select NET_VENDOR_BROADCOM - select BNXT - ---help--- + tristate "Broadcom Netxtreme HCA support" + depends on 64BIT + depends on ETHERNET && NETDEVICES && PCI && INET && DCB + select NET_VENDOR_BROADCOM + select BNXT + ---help--- This driver supports Broadcom NetXtreme-E 10/25/40/50 gigabit RoCE HCAs. To compile this driver as a module, choose M here: the module will be called bnxt_re. diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index e55a1666c0cd..725b2350e349 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -108,6 +108,7 @@ struct bnxt_re_sqp_entries { #define BNXT_RE_MAX_MSIX 9 #define BNXT_RE_AEQ_IDX 0 #define BNXT_RE_NQ_IDX 1 +#define BNXT_RE_GEN_P5_MAX_VF 64 struct bnxt_re_dev { struct ib_device ibdev; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index b4149dc9e824..9b6ca15a183c 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -191,24 +191,6 @@ int bnxt_re_query_device(struct ib_device *ibdev, return 0; } -int bnxt_re_modify_device(struct ib_device *ibdev, - int device_modify_mask, - struct ib_device_modify *device_modify) -{ - switch (device_modify_mask) { - case IB_DEVICE_MODIFY_SYS_IMAGE_GUID: - /* Modify the GUID requires the modification of the GID table */ - /* GUID should be made as READ-ONLY */ - break; - case IB_DEVICE_MODIFY_NODE_DESC: - /* Node Desc should be made as READ-ONLY */ - break; - default: - break; - } - return 0; -} - /* Port */ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, struct ib_port_attr *port_attr) @@ -855,7 +837,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, bytes += (qplib_qp->sq.max_wqe * psn_sz); } bytes = PAGE_ALIGN(bytes); - umem = ib_umem_get(udata, ureq.qpsva, bytes, IB_ACCESS_LOCAL_WRITE, 1); + umem = ib_umem_get(udata, ureq.qpsva, bytes, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(umem)) return PTR_ERR(umem); @@ -869,7 +851,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, bytes = (qplib_qp->rq.max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE); bytes = PAGE_ALIGN(bytes); umem = ib_umem_get(udata, ureq.qprva, bytes, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(umem)) goto rqfail; qp->rumem = umem; @@ -1322,7 +1304,7 @@ static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev, bytes = (qplib_srq->max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE); bytes = PAGE_ALIGN(bytes); - umem = ib_umem_get(udata, ureq.srqva, bytes, IB_ACCESS_LOCAL_WRITE, 1); + umem = ib_umem_get(udata, ureq.srqva, bytes, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(umem)) return PTR_ERR(umem); @@ -2565,7 +2547,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, cq->umem = ib_umem_get(udata, req.cq_va, entries * sizeof(struct cq_base), - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(cq->umem)) { rc = PTR_ERR(cq->umem); goto fail; @@ -3530,7 +3512,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, /* The fixed portion of the rkey is the same as the lkey */ mr->ib_mr.rkey = mr->qplib_mr.rkey; - umem = ib_umem_get(udata, start, length, mr_access_flags, 0); + umem = ib_umem_get(udata, start, length, mr_access_flags); if (IS_ERR(umem)) { dev_err(rdev_to_dev(rdev), "Failed to get umem"); rc = -EFAULT; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 31662b1ee35a..23d972da5652 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -145,9 +145,6 @@ struct bnxt_re_ucontext { int bnxt_re_query_device(struct ib_device *ibdev, struct ib_device_attr *ib_attr, struct ib_udata *udata); -int bnxt_re_modify_device(struct ib_device *ibdev, - int device_modify_mask, - struct ib_device_modify *device_modify); int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, struct ib_port_attr *port_attr); int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num, diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 30a54f8aa42c..e7e8a0f49464 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -119,61 +119,76 @@ static void bnxt_re_get_sriov_func_type(struct bnxt_re_dev *rdev) * reserved for the function. The driver may choose to allocate fewer * resources than the firmware maximum. */ -static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev) +static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev) { - u32 vf_qps = 0, vf_srqs = 0, vf_cqs = 0, vf_mrws = 0, vf_gids = 0; - u32 i; - u32 vf_pct; - u32 num_vfs; - struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; + struct bnxt_qplib_dev_attr *attr; + struct bnxt_qplib_ctx *ctx; + int i; - rdev->qplib_ctx.qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT, - dev_attr->max_qp); + attr = &rdev->dev_attr; + ctx = &rdev->qplib_ctx; - rdev->qplib_ctx.mrw_count = BNXT_RE_MAX_MRW_COUNT_256K; + ctx->qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT, + attr->max_qp); + ctx->mrw_count = BNXT_RE_MAX_MRW_COUNT_256K; /* Use max_mr from fw since max_mrw does not get set */ - rdev->qplib_ctx.mrw_count = min_t(u32, rdev->qplib_ctx.mrw_count, - dev_attr->max_mr); - rdev->qplib_ctx.srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT, - dev_attr->max_srq); - rdev->qplib_ctx.cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT, - dev_attr->max_cq); - - for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) - rdev->qplib_ctx.tqm_count[i] = - rdev->dev_attr.tqm_alloc_reqs[i]; - - if (rdev->num_vfs) { - /* - * Reserve a set of resources for the PF. Divide the remaining - * resources among the VFs - */ - vf_pct = 100 - BNXT_RE_PCT_RSVD_FOR_PF; - num_vfs = 100 * rdev->num_vfs; - vf_qps = (rdev->qplib_ctx.qpc_count * vf_pct) / num_vfs; - vf_srqs = (rdev->qplib_ctx.srqc_count * vf_pct) / num_vfs; - vf_cqs = (rdev->qplib_ctx.cq_count * vf_pct) / num_vfs; - /* - * The driver allows many more MRs than other resources. If the - * firmware does also, then reserve a fixed amount for the PF - * and divide the rest among VFs. VFs may use many MRs for NFS - * mounts, ISER, NVME applications, etc. If the firmware - * severely restricts the number of MRs, then let PF have - * half and divide the rest among VFs, as for the other - * resource types. - */ - if (rdev->qplib_ctx.mrw_count < BNXT_RE_MAX_MRW_COUNT_64K) - vf_mrws = rdev->qplib_ctx.mrw_count * vf_pct / num_vfs; - else - vf_mrws = (rdev->qplib_ctx.mrw_count - - BNXT_RE_RESVD_MR_FOR_PF) / rdev->num_vfs; - vf_gids = BNXT_RE_MAX_GID_PER_VF; + ctx->mrw_count = min_t(u32, ctx->mrw_count, attr->max_mr); + ctx->srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT, + attr->max_srq); + ctx->cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT, attr->max_cq); + if (!bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx)) + for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) + rdev->qplib_ctx.tqm_count[i] = + rdev->dev_attr.tqm_alloc_reqs[i]; +} + +static void bnxt_re_limit_vf_res(struct bnxt_qplib_ctx *qplib_ctx, u32 num_vf) +{ + struct bnxt_qplib_vf_res *vf_res; + u32 mrws = 0; + u32 vf_pct; + u32 nvfs; + + vf_res = &qplib_ctx->vf_res; + /* + * Reserve a set of resources for the PF. Divide the remaining + * resources among the VFs + */ + vf_pct = 100 - BNXT_RE_PCT_RSVD_FOR_PF; + nvfs = num_vf; + num_vf = 100 * num_vf; + vf_res->max_qp_per_vf = (qplib_ctx->qpc_count * vf_pct) / num_vf; + vf_res->max_srq_per_vf = (qplib_ctx->srqc_count * vf_pct) / num_vf; + vf_res->max_cq_per_vf = (qplib_ctx->cq_count * vf_pct) / num_vf; + /* + * The driver allows many more MRs than other resources. If the + * firmware does also, then reserve a fixed amount for the PF and + * divide the rest among VFs. VFs may use many MRs for NFS + * mounts, ISER, NVME applications, etc. If the firmware severely + * restricts the number of MRs, then let PF have half and divide + * the rest among VFs, as for the other resource types. + */ + if (qplib_ctx->mrw_count < BNXT_RE_MAX_MRW_COUNT_64K) { + mrws = qplib_ctx->mrw_count * vf_pct; + nvfs = num_vf; + } else { + mrws = qplib_ctx->mrw_count - BNXT_RE_RESVD_MR_FOR_PF; } - rdev->qplib_ctx.vf_res.max_mrw_per_vf = vf_mrws; - rdev->qplib_ctx.vf_res.max_gid_per_vf = vf_gids; - rdev->qplib_ctx.vf_res.max_qp_per_vf = vf_qps; - rdev->qplib_ctx.vf_res.max_srq_per_vf = vf_srqs; - rdev->qplib_ctx.vf_res.max_cq_per_vf = vf_cqs; + vf_res->max_mrw_per_vf = (mrws / nvfs); + vf_res->max_gid_per_vf = BNXT_RE_MAX_GID_PER_VF; +} + +static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev) +{ + u32 num_vfs; + + memset(&rdev->qplib_ctx.vf_res, 0, sizeof(struct bnxt_qplib_vf_res)); + bnxt_re_limit_pf_res(rdev); + + num_vfs = bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ? + BNXT_RE_GEN_P5_MAX_VF : rdev->num_vfs; + if (num_vfs) + bnxt_re_limit_vf_res(&rdev->qplib_ctx, num_vfs); } /* for handling bnxt_en callbacks later */ @@ -193,9 +208,11 @@ static void bnxt_re_sriov_config(void *p, int num_vfs) return; rdev->num_vfs = num_vfs; - bnxt_re_set_resource_limits(rdev); - bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw, - &rdev->qplib_ctx); + if (!bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx)) { + bnxt_re_set_resource_limits(rdev); + bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw, + &rdev->qplib_ctx); + } } static void bnxt_re_shutdown(void *p) @@ -477,6 +494,7 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1); req.update_period_ms = cpu_to_le32(1000); req.stats_dma_addr = cpu_to_le64(dma_map); + req.stats_dma_length = cpu_to_le16(sizeof(struct ctx_hw_stats_ext)); req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE; bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); @@ -625,7 +643,6 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .map_mr_sg = bnxt_re_map_mr_sg, .mmap = bnxt_re_mmap, .modify_ah = bnxt_re_modify_ah, - .modify_device = bnxt_re_modify_device, .modify_qp = bnxt_re_modify_qp, .modify_srq = bnxt_re_modify_srq, .poll_cq = bnxt_re_poll_cq, @@ -895,10 +912,14 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq, return 0; } +#define BNXT_RE_GEN_P5_PF_NQ_DB 0x10000 +#define BNXT_RE_GEN_P5_VF_NQ_DB 0x4000 static u32 bnxt_re_get_nqdb_offset(struct bnxt_re_dev *rdev, u16 indx) { return bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ? - 0x10000 : rdev->msix_entries[indx].db_offset; + (rdev->is_virtfn ? BNXT_RE_GEN_P5_VF_NQ_DB : + BNXT_RE_GEN_P5_PF_NQ_DB) : + rdev->msix_entries[indx].db_offset; } static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev) @@ -1270,10 +1291,10 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) return; } rdev->qplib_ctx.hwrm_intf_ver = - (u64)resp.hwrm_intf_major << 48 | - (u64)resp.hwrm_intf_minor << 32 | - (u64)resp.hwrm_intf_build << 16 | - resp.hwrm_intf_patch; + (u64)le16_to_cpu(resp.hwrm_intf_major) << 48 | + (u64)le16_to_cpu(resp.hwrm_intf_minor) << 32 | + (u64)le16_to_cpu(resp.hwrm_intf_build) << 16 | + le16_to_cpu(resp.hwrm_intf_patch); } static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev) @@ -1408,8 +1429,8 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) rdev->is_virtfn); if (rc) goto disable_rcfw; - if (!rdev->is_virtfn) - bnxt_re_set_resource_limits(rdev); + + bnxt_re_set_resource_limits(rdev); rc = bnxt_qplib_alloc_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx, 0, bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx)); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 60c8f76aab33..5cdfa84faf85 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -494,8 +494,10 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, * shall setup this area for VF. Skipping the * HW programming */ - if (is_virtfn || bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx)) + if (is_virtfn) goto skip_ctx_setup; + if (bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx)) + goto config_vf_res; level = ctx->qpc_tbl.level; req.qpc_pg_size_qpc_lvl = (level << CMDQ_INITIALIZE_FW_QPC_LVL_SFT) | @@ -540,6 +542,7 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, req.number_of_srq = cpu_to_le32(ctx->srqc_tbl.max_elements); req.number_of_cq = cpu_to_le32(ctx->cq_tbl.max_elements); +config_vf_res: req.max_qp_per_vf = cpu_to_le32(ctx->vf_res.max_qp_per_vf); req.max_mrw_per_vf = cpu_to_le32(ctx->vf_res.max_mrw_per_vf); req.max_srq_per_vf = cpu_to_le32(ctx->vf_res.max_srq_per_vf); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index fbda11a7ab1a..aaa76d792185 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -186,7 +186,9 @@ struct bnxt_qplib_chip_ctx { u8 chip_metal; }; -#define CHIP_NUM_57500 0x1750 +#define CHIP_NUM_57508 0x1750 +#define CHIP_NUM_57504 0x1751 +#define CHIP_NUM_57502 0x1752 struct bnxt_qplib_res { struct pci_dev *pdev; @@ -203,7 +205,9 @@ struct bnxt_qplib_res { static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx) { - return (cctx->chip_num == CHIP_NUM_57500); + return (cctx->chip_num == CHIP_NUM_57508 || + cctx->chip_num == CHIP_NUM_57504 || + cctx->chip_num == CHIP_NUM_57502); } static inline u8 bnxt_qplib_get_hwq_type(struct bnxt_qplib_res *res) diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig deleted file mode 100644 index 8c1a72bff447..000000000000 --- a/drivers/infiniband/hw/cxgb3/Kconfig +++ /dev/null @@ -1,19 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -config INFINIBAND_CXGB3 - tristate "Chelsio RDMA Driver" - depends on CHELSIO_T3 - select GENERIC_ALLOCATOR - ---help--- - This is an iWARP/RDMA driver for the Chelsio T3 1GbE and - 10GbE adapters. - - For general information about Chelsio and our products, visit - our website at <http://www.chelsio.com>. - - For customer support, please visit our customer support page at - <http://www.chelsio.com/support.html>. - - Please send feedback to <linux-bugs@chelsio.com>. - - To compile this driver as a module, choose M here: the module - will be called iw_cxgb3. diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile deleted file mode 100644 index 34bb86a6ae3a..000000000000 --- a/drivers/infiniband/hw/cxgb3/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -ccflags-y := -I $(srctree)/drivers/net/ethernet/chelsio/cxgb3 - -obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o - -iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \ - iwch_provider.o iwch.o cxio_hal.o cxio_resource.o diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c deleted file mode 100644 index 95b22a651673..000000000000 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ /dev/null @@ -1,1312 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <asm/delay.h> - -#include <linux/mutex.h> -#include <linux/netdevice.h> -#include <linux/sched.h> -#include <linux/spinlock.h> -#include <linux/pci.h> -#include <linux/dma-mapping.h> -#include <linux/slab.h> -#include <net/net_namespace.h> - -#include "cxio_resource.h" -#include "cxio_hal.h" -#include "cxgb3_offload.h" -#include "sge_defs.h" - -static LIST_HEAD(rdev_list); -static cxio_hal_ev_callback_func_t cxio_ev_cb = NULL; - -static struct cxio_rdev *cxio_hal_find_rdev_by_name(char *dev_name) -{ - struct cxio_rdev *rdev; - - list_for_each_entry(rdev, &rdev_list, entry) - if (!strcmp(rdev->dev_name, dev_name)) - return rdev; - return NULL; -} - -static struct cxio_rdev *cxio_hal_find_rdev_by_t3cdev(struct t3cdev *tdev) -{ - struct cxio_rdev *rdev; - - list_for_each_entry(rdev, &rdev_list, entry) - if (rdev->t3cdev_p == tdev) - return rdev; - return NULL; -} - -int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq, - enum t3_cq_opcode op, u32 credit) -{ - int ret; - struct t3_cqe *cqe; - u32 rptr; - - struct rdma_cq_op setup; - setup.id = cq->cqid; - setup.credits = (op == CQ_CREDIT_UPDATE) ? credit : 0; - setup.op = op; - ret = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_OP, &setup); - - if ((ret < 0) || (op == CQ_CREDIT_UPDATE)) - return ret; - - /* - * If the rearm returned an index other than our current index, - * then there might be CQE's in flight (being DMA'd). We must wait - * here for them to complete or the consumer can miss a notification. - */ - if (Q_PTR2IDX((cq->rptr), cq->size_log2) != ret) { - int i=0; - - rptr = cq->rptr; - - /* - * Keep the generation correct by bumping rptr until it - * matches the index returned by the rearm - 1. - */ - while (Q_PTR2IDX((rptr+1), cq->size_log2) != ret) - rptr++; - - /* - * Now rptr is the index for the (last) cqe that was - * in-flight at the time the HW rearmed the CQ. We - * spin until that CQE is valid. - */ - cqe = cq->queue + Q_PTR2IDX(rptr, cq->size_log2); - while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) { - udelay(1); - if (i++ > 1000000) { - pr_err("%s: stalled rnic\n", rdev_p->dev_name); - return -EIO; - } - } - - return 1; - } - - return 0; -} - -static int cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid) -{ - struct rdma_cq_setup setup; - setup.id = cqid; - setup.base_addr = 0; /* NULL address */ - setup.size = 0; /* disaable the CQ */ - setup.credits = 0; - setup.credit_thres = 0; - setup.ovfl_mode = 0; - return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); -} - -static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid) -{ - u64 sge_cmd; - struct t3_modify_qp_wr *wqe; - struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_KERNEL); - if (!skb) { - pr_debug("%s alloc_skb failed\n", __func__); - return -ENOMEM; - } - wqe = skb_put_zero(skb, sizeof(*wqe)); - build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, - T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 0, qpid, 7, - T3_SOPEOP); - wqe->flags = cpu_to_be32(MODQP_WRITE_EC); - sge_cmd = qpid << 8 | 3; - wqe->sge_cmd = cpu_to_be64(sge_cmd); - skb->priority = CPL_PRIORITY_CONTROL; - return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); -} - -int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel) -{ - struct rdma_cq_setup setup; - int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe); - - size += 1; /* one extra page for storing cq-in-err state */ - cq->cqid = cxio_hal_get_cqid(rdev_p->rscp); - if (!cq->cqid) - return -ENOMEM; - if (kernel) { - cq->sw_queue = kzalloc(size, GFP_KERNEL); - if (!cq->sw_queue) - return -ENOMEM; - } - cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), size, - &(cq->dma_addr), GFP_KERNEL); - if (!cq->queue) { - kfree(cq->sw_queue); - return -ENOMEM; - } - dma_unmap_addr_set(cq, mapping, cq->dma_addr); - setup.id = cq->cqid; - setup.base_addr = (u64) (cq->dma_addr); - setup.size = 1UL << cq->size_log2; - setup.credits = 65535; - setup.credit_thres = 1; - if (rdev_p->t3cdev_p->type != T3A) - setup.ovfl_mode = 0; - else - setup.ovfl_mode = 1; - return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); -} - -static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) -{ - struct cxio_qpid_list *entry; - u32 qpid; - int i; - - mutex_lock(&uctx->lock); - if (!list_empty(&uctx->qpids)) { - entry = list_entry(uctx->qpids.next, struct cxio_qpid_list, - entry); - list_del(&entry->entry); - qpid = entry->qpid; - kfree(entry); - } else { - qpid = cxio_hal_get_qpid(rdev_p->rscp); - if (!qpid) - goto out; - for (i = qpid+1; i & rdev_p->qpmask; i++) { - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - break; - entry->qpid = i; - list_add_tail(&entry->entry, &uctx->qpids); - } - } -out: - mutex_unlock(&uctx->lock); - pr_debug("%s qpid 0x%x\n", __func__, qpid); - return qpid; -} - -static void put_qpid(struct cxio_rdev *rdev_p, u32 qpid, - struct cxio_ucontext *uctx) -{ - struct cxio_qpid_list *entry; - - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - return; - pr_debug("%s qpid 0x%x\n", __func__, qpid); - entry->qpid = qpid; - mutex_lock(&uctx->lock); - list_add_tail(&entry->entry, &uctx->qpids); - mutex_unlock(&uctx->lock); -} - -void cxio_release_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) -{ - struct list_head *pos, *nxt; - struct cxio_qpid_list *entry; - - mutex_lock(&uctx->lock); - list_for_each_safe(pos, nxt, &uctx->qpids) { - entry = list_entry(pos, struct cxio_qpid_list, entry); - list_del_init(&entry->entry); - if (!(entry->qpid & rdev_p->qpmask)) - cxio_hal_put_qpid(rdev_p->rscp, entry->qpid); - kfree(entry); - } - mutex_unlock(&uctx->lock); -} - -void cxio_init_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) -{ - INIT_LIST_HEAD(&uctx->qpids); - mutex_init(&uctx->lock); -} - -int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain, - struct t3_wq *wq, struct cxio_ucontext *uctx) -{ - int depth = 1UL << wq->size_log2; - int rqsize = 1UL << wq->rq_size_log2; - - wq->qpid = get_qpid(rdev_p, uctx); - if (!wq->qpid) - return -ENOMEM; - - wq->rq = kcalloc(depth, sizeof(struct t3_swrq), GFP_KERNEL); - if (!wq->rq) - goto err1; - - wq->rq_addr = cxio_hal_rqtpool_alloc(rdev_p, rqsize); - if (!wq->rq_addr) - goto err2; - - wq->sq = kcalloc(depth, sizeof(struct t3_swsq), GFP_KERNEL); - if (!wq->sq) - goto err3; - - wq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), - depth * sizeof(union t3_wr), - &(wq->dma_addr), GFP_KERNEL); - if (!wq->queue) - goto err4; - - dma_unmap_addr_set(wq, mapping, wq->dma_addr); - wq->doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr; - if (!kernel_domain) - wq->udb = (u64)rdev_p->rnic_info.udbell_physbase + - (wq->qpid << rdev_p->qpshift); - wq->rdev = rdev_p; - pr_debug("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n", - __func__, wq->qpid, wq->doorbell, (unsigned long long)wq->udb); - return 0; -err4: - kfree(wq->sq); -err3: - cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, rqsize); -err2: - kfree(wq->rq); -err1: - put_qpid(rdev_p, wq->qpid, uctx); - return -ENOMEM; -} - -void cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) -{ - cxio_hal_clear_cq_ctx(rdev_p, cq->cqid); - kfree(cq->sw_queue); - dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), - (1UL << (cq->size_log2)) - * sizeof(struct t3_cqe) + 1, cq->queue, - dma_unmap_addr(cq, mapping)); - cxio_hal_put_cqid(rdev_p->rscp, cq->cqid); -} - -int cxio_destroy_qp(struct cxio_rdev *rdev_p, struct t3_wq *wq, - struct cxio_ucontext *uctx) -{ - dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), - (1UL << (wq->size_log2)) - * sizeof(union t3_wr), wq->queue, - dma_unmap_addr(wq, mapping)); - kfree(wq->sq); - cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, (1UL << wq->rq_size_log2)); - kfree(wq->rq); - put_qpid(rdev_p, wq->qpid, uctx); - return 0; -} - -static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq) -{ - struct t3_cqe cqe; - - pr_debug("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__, - wq, cq, cq->sw_rptr, cq->sw_wptr); - memset(&cqe, 0, sizeof(cqe)); - cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) | - V_CQE_OPCODE(T3_SEND) | - V_CQE_TYPE(0) | - V_CQE_SWCQE(1) | - V_CQE_QPID(wq->qpid) | - V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr, - cq->size_log2))); - *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe; - cq->sw_wptr++; -} - -int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count) -{ - u32 ptr; - int flushed = 0; - - pr_debug("%s wq %p cq %p\n", __func__, wq, cq); - - /* flush RQ */ - pr_debug("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__, - wq->rq_rptr, wq->rq_wptr, count); - ptr = wq->rq_rptr + count; - while (ptr++ != wq->rq_wptr) { - insert_recv_cqe(wq, cq); - flushed++; - } - return flushed; -} - -static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq, - struct t3_swsq *sqp) -{ - struct t3_cqe cqe; - - pr_debug("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__, - wq, cq, cq->sw_rptr, cq->sw_wptr); - memset(&cqe, 0, sizeof(cqe)); - cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) | - V_CQE_OPCODE(sqp->opcode) | - V_CQE_TYPE(1) | - V_CQE_SWCQE(1) | - V_CQE_QPID(wq->qpid) | - V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr, - cq->size_log2))); - cqe.u.scqe.wrid_hi = sqp->sq_wptr; - - *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe; - cq->sw_wptr++; -} - -int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count) -{ - __u32 ptr = wq->sq_rptr + count; - int flushed = 0; - struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); - - while (ptr != wq->sq_wptr) { - sqp->signaled = 0; - insert_sq_cqe(wq, cq, sqp); - ptr++; - sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); - flushed++; - } - return flushed; -} - -/* - * Move all CQEs from the HWCQ into the SWCQ. - */ -void cxio_flush_hw_cq(struct t3_cq *cq) -{ - struct t3_cqe *cqe, *swcqe; - - pr_debug("%s cq %p cqid 0x%x\n", __func__, cq, cq->cqid); - cqe = cxio_next_hw_cqe(cq); - while (cqe) { - pr_debug("%s flushing hwcq rptr 0x%x to swcq wptr 0x%x\n", - __func__, cq->rptr, cq->sw_wptr); - swcqe = cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2); - *swcqe = *cqe; - swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1)); - cq->sw_wptr++; - cq->rptr++; - cqe = cxio_next_hw_cqe(cq); - } -} - -static int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq) -{ - if (CQE_OPCODE(*cqe) == T3_TERMINATE) - return 0; - - if ((CQE_OPCODE(*cqe) == T3_RDMA_WRITE) && RQ_TYPE(*cqe)) - return 0; - - if ((CQE_OPCODE(*cqe) == T3_READ_RESP) && SQ_TYPE(*cqe)) - return 0; - - if (CQE_SEND_OPCODE(*cqe) && RQ_TYPE(*cqe) && - Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) - return 0; - - return 1; -} - -void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count) -{ - struct t3_cqe *cqe; - u32 ptr; - - *count = 0; - ptr = cq->sw_rptr; - while (!Q_EMPTY(ptr, cq->sw_wptr)) { - cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2)); - if ((SQ_TYPE(*cqe) || - ((CQE_OPCODE(*cqe) == T3_READ_RESP) && wq->oldest_read)) && - (CQE_QPID(*cqe) == wq->qpid)) - (*count)++; - ptr++; - } - pr_debug("%s cq %p count %d\n", __func__, cq, *count); -} - -void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count) -{ - struct t3_cqe *cqe; - u32 ptr; - - *count = 0; - pr_debug("%s count zero %d\n", __func__, *count); - ptr = cq->sw_rptr; - while (!Q_EMPTY(ptr, cq->sw_wptr)) { - cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2)); - if (RQ_TYPE(*cqe) && (CQE_OPCODE(*cqe) != T3_READ_RESP) && - (CQE_QPID(*cqe) == wq->qpid) && cqe_completes_wr(cqe, wq)) - (*count)++; - ptr++; - } - pr_debug("%s cq %p count %d\n", __func__, cq, *count); -} - -static int cxio_hal_init_ctrl_cq(struct cxio_rdev *rdev_p) -{ - struct rdma_cq_setup setup; - setup.id = 0; - setup.base_addr = 0; /* NULL address */ - setup.size = 1; /* enable the CQ */ - setup.credits = 0; - - /* force SGE to redirect to RspQ and interrupt */ - setup.credit_thres = 0; - setup.ovfl_mode = 1; - return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); -} - -static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) -{ - int err; - u64 sge_cmd, ctx0, ctx1; - u64 base_addr; - struct t3_modify_qp_wr *wqe; - struct sk_buff *skb; - - skb = alloc_skb(sizeof(*wqe), GFP_KERNEL); - if (!skb) { - pr_debug("%s alloc_skb failed\n", __func__); - return -ENOMEM; - } - err = cxio_hal_init_ctrl_cq(rdev_p); - if (err) { - pr_debug("%s err %d initializing ctrl_cq\n", __func__, err); - goto err; - } - rdev_p->ctrl_qp.workq = dma_alloc_coherent( - &(rdev_p->rnic_info.pdev->dev), - (1 << T3_CTRL_QP_SIZE_LOG2) * - sizeof(union t3_wr), - &(rdev_p->ctrl_qp.dma_addr), - GFP_KERNEL); - if (!rdev_p->ctrl_qp.workq) { - pr_debug("%s dma_alloc_coherent failed\n", __func__); - err = -ENOMEM; - goto err; - } - dma_unmap_addr_set(&rdev_p->ctrl_qp, mapping, - rdev_p->ctrl_qp.dma_addr); - rdev_p->ctrl_qp.doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr; - - mutex_init(&rdev_p->ctrl_qp.lock); - init_waitqueue_head(&rdev_p->ctrl_qp.waitq); - - /* update HW Ctrl QP context */ - base_addr = rdev_p->ctrl_qp.dma_addr; - base_addr >>= 12; - ctx0 = (V_EC_SIZE((1 << T3_CTRL_QP_SIZE_LOG2)) | - V_EC_BASE_LO((u32) base_addr & 0xffff)); - ctx0 <<= 32; - ctx0 |= V_EC_CREDITS(FW_WR_NUM); - base_addr >>= 16; - ctx1 = (u32) base_addr; - base_addr >>= 32; - ctx1 |= ((u64) (V_EC_BASE_HI((u32) base_addr & 0xf) | V_EC_RESPQ(0) | - V_EC_TYPE(0) | V_EC_GEN(1) | - V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32; - wqe = skb_put_zero(skb, sizeof(*wqe)); - build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0, - T3_CTL_QP_TID, 7, T3_SOPEOP); - wqe->flags = cpu_to_be32(MODQP_WRITE_EC); - sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3; - wqe->sge_cmd = cpu_to_be64(sge_cmd); - wqe->ctx1 = cpu_to_be64(ctx1); - wqe->ctx0 = cpu_to_be64(ctx0); - pr_debug("CtrlQP dma_addr %pad workq %p size %d\n", - &rdev_p->ctrl_qp.dma_addr, rdev_p->ctrl_qp.workq, - 1 << T3_CTRL_QP_SIZE_LOG2); - skb->priority = CPL_PRIORITY_CONTROL; - return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); -err: - kfree_skb(skb); - return err; -} - -static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p) -{ - dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), - (1UL << T3_CTRL_QP_SIZE_LOG2) - * sizeof(union t3_wr), rdev_p->ctrl_qp.workq, - dma_unmap_addr(&rdev_p->ctrl_qp, mapping)); - return cxio_hal_clear_qp_ctx(rdev_p, T3_CTRL_QP_ID); -} - -/* write len bytes of data into addr (32B aligned address) - * If data is NULL, clear len byte of memory to zero. - * caller acquires the ctrl_qp lock before the call - */ -static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr, - u32 len, void *data) -{ - u32 i, nr_wqe, copy_len; - u8 *copy_data; - u8 wr_len, utx_len; /* length in 8 byte flit */ - enum t3_wr_flags flag; - __be64 *wqe; - u64 utx_cmd; - addr &= 0x7FFFFFF; - nr_wqe = len % 96 ? len / 96 + 1 : len / 96; /* 96B max per WQE */ - pr_debug("%s wptr 0x%x rptr 0x%x len %d, nr_wqe %d data %p addr 0x%0x\n", - __func__, rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len, - nr_wqe, data, addr); - utx_len = 3; /* in 32B unit */ - for (i = 0; i < nr_wqe; i++) { - if (Q_FULL(rdev_p->ctrl_qp.rptr, rdev_p->ctrl_qp.wptr, - T3_CTRL_QP_SIZE_LOG2)) { - pr_debug("%s ctrl_qp full wtpr 0x%0x rptr 0x%0x, wait for more space i %d\n", - __func__, - rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, i); - if (wait_event_interruptible(rdev_p->ctrl_qp.waitq, - !Q_FULL(rdev_p->ctrl_qp.rptr, - rdev_p->ctrl_qp.wptr, - T3_CTRL_QP_SIZE_LOG2))) { - pr_debug("%s ctrl_qp workq interrupted\n", - __func__); - return -ERESTARTSYS; - } - pr_debug("%s ctrl_qp wakeup, continue posting work request i %d\n", - __func__, i); - } - wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr % - (1 << T3_CTRL_QP_SIZE_LOG2))); - flag = 0; - if (i == (nr_wqe - 1)) { - /* last WQE */ - flag = T3_COMPLETION_FLAG; - if (len % 32) - utx_len = len / 32 + 1; - else - utx_len = len / 32; - } - - /* - * Force a CQE to return the credit to the workq in case - * we posted more than half the max QP size of WRs - */ - if ((i != 0) && - (i % (((1 << T3_CTRL_QP_SIZE_LOG2)) >> 1) == 0)) { - flag = T3_COMPLETION_FLAG; - pr_debug("%s force completion at i %d\n", __func__, i); - } - - /* build the utx mem command */ - wqe += (sizeof(struct t3_bypass_wr) >> 3); - utx_cmd = (T3_UTX_MEM_WRITE << 28) | (addr + i * 3); - utx_cmd <<= 32; - utx_cmd |= (utx_len << 28) | ((utx_len << 2) + 1); - *wqe = cpu_to_be64(utx_cmd); - wqe++; - copy_data = (u8 *) data + i * 96; - copy_len = len > 96 ? 96 : len; - - /* clear memory content if data is NULL */ - if (data) - memcpy(wqe, copy_data, copy_len); - else - memset(wqe, 0, copy_len); - if (copy_len % 32) - memset(((u8 *) wqe) + copy_len, 0, - 32 - (copy_len % 32)); - wr_len = ((sizeof(struct t3_bypass_wr)) >> 3) + 1 + - (utx_len << 2); - wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr % - (1 << T3_CTRL_QP_SIZE_LOG2))); - - /* wptr in the WRID[31:0] */ - ((union t3_wrid *)(wqe+1))->id0.low = rdev_p->ctrl_qp.wptr; - - /* - * This must be the last write with a memory barrier - * for the genbit - */ - build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_BP, flag, - Q_GENBIT(rdev_p->ctrl_qp.wptr, - T3_CTRL_QP_SIZE_LOG2), T3_CTRL_QP_ID, - wr_len, T3_SOPEOP); - if (flag == T3_COMPLETION_FLAG) - ring_doorbell(rdev_p->ctrl_qp.doorbell, T3_CTRL_QP_ID); - len -= 96; - rdev_p->ctrl_qp.wptr++; - } - return 0; -} - -/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl_size and pbl_addr - * OUT: stag index - * TBD: shared memory region support - */ -static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry, - u32 *stag, u8 stag_state, u32 pdid, - enum tpt_mem_type type, enum tpt_mem_perm perm, - u32 zbva, u64 to, u32 len, u8 page_size, - u32 pbl_size, u32 pbl_addr) -{ - int err; - struct tpt_entry tpt; - u32 stag_idx; - u32 wptr; - - if (cxio_fatal_error(rdev_p)) - return -EIO; - - stag_state = stag_state > 0; - stag_idx = (*stag) >> 8; - - if ((!reset_tpt_entry) && !(*stag != T3_STAG_UNSET)) { - stag_idx = cxio_hal_get_stag(rdev_p->rscp); - if (!stag_idx) - return -ENOMEM; - *stag = (stag_idx << 8) | ((*stag) & 0xFF); - } - pr_debug("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n", - __func__, stag_state, type, pdid, stag_idx); - - mutex_lock(&rdev_p->ctrl_qp.lock); - - /* write TPT entry */ - if (reset_tpt_entry) - memset(&tpt, 0, sizeof(tpt)); - else { - tpt.valid_stag_pdid = cpu_to_be32(F_TPT_VALID | - V_TPT_STAG_KEY((*stag) & M_TPT_STAG_KEY) | - V_TPT_STAG_STATE(stag_state) | - V_TPT_STAG_TYPE(type) | V_TPT_PDID(pdid)); - BUG_ON(page_size >= 28); - tpt.flags_pagesize_qpid = cpu_to_be32(V_TPT_PERM(perm) | - ((perm & TPT_MW_BIND) ? F_TPT_MW_BIND_ENABLE : 0) | - V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) | - V_TPT_PAGE_SIZE(page_size)); - tpt.rsvd_pbl_addr = cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3)); - tpt.len = cpu_to_be32(len); - tpt.va_hi = cpu_to_be32((u32) (to >> 32)); - tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL)); - tpt.rsvd_bind_cnt_or_pstag = 0; - tpt.rsvd_pbl_size = cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2)); - } - err = cxio_hal_ctrl_qp_write_mem(rdev_p, - stag_idx + - (rdev_p->rnic_info.tpt_base >> 5), - sizeof(tpt), &tpt); - - /* release the stag index to free pool */ - if (reset_tpt_entry) - cxio_hal_put_stag(rdev_p->rscp, stag_idx); - - wptr = rdev_p->ctrl_qp.wptr; - mutex_unlock(&rdev_p->ctrl_qp.lock); - if (!err) - if (wait_event_interruptible(rdev_p->ctrl_qp.waitq, - SEQ32_GE(rdev_p->ctrl_qp.rptr, - wptr))) - return -ERESTARTSYS; - return err; -} - -int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl, - u32 pbl_addr, u32 pbl_size) -{ - u32 wptr; - int err; - - pr_debug("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n", - __func__, pbl_addr, rdev_p->rnic_info.pbl_base, - pbl_size); - - mutex_lock(&rdev_p->ctrl_qp.lock); - err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3, - pbl); - wptr = rdev_p->ctrl_qp.wptr; - mutex_unlock(&rdev_p->ctrl_qp.lock); - if (err) - return err; - - if (wait_event_interruptible(rdev_p->ctrl_qp.waitq, - SEQ32_GE(rdev_p->ctrl_qp.rptr, - wptr))) - return -ERESTARTSYS; - - return 0; -} - -int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, - enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, u32 pbl_size, u32 pbl_addr) -{ - *stag = T3_STAG_UNSET; - return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm, - zbva, to, len, page_size, pbl_size, pbl_addr); -} - -int cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, - enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, u32 pbl_size, u32 pbl_addr) -{ - return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm, - zbva, to, len, page_size, pbl_size, pbl_addr); -} - -int cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size, - u32 pbl_addr) -{ - return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, - pbl_size, pbl_addr); -} - -int cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid) -{ - *stag = T3_STAG_UNSET; - return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0, - 0, 0); -} - -int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag) -{ - return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, - 0, 0); -} - -int cxio_allocate_stag(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr) -{ - *stag = T3_STAG_UNSET; - return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_NON_SHARED_MR, - 0, 0, 0ULL, 0, 0, pbl_size, pbl_addr); -} - -int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr) -{ - struct t3_rdma_init_wr *wqe; - struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_ATOMIC); - if (!skb) - return -ENOMEM; - pr_debug("%s rdev_p %p\n", __func__, rdev_p); - wqe = __skb_put(skb, sizeof(*wqe)); - wqe->wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_INIT)); - wqe->wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(attr->tid) | - V_FW_RIWR_LEN(sizeof(*wqe) >> 3)); - wqe->wrid.id1 = 0; - wqe->qpid = cpu_to_be32(attr->qpid); - wqe->pdid = cpu_to_be32(attr->pdid); - wqe->scqid = cpu_to_be32(attr->scqid); - wqe->rcqid = cpu_to_be32(attr->rcqid); - wqe->rq_addr = cpu_to_be32(attr->rq_addr - rdev_p->rnic_info.rqt_base); - wqe->rq_size = cpu_to_be32(attr->rq_size); - wqe->mpaattrs = attr->mpaattrs; - wqe->qpcaps = attr->qpcaps; - wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss); - wqe->rqe_count = cpu_to_be16(attr->rqe_count); - wqe->flags_rtr_type = cpu_to_be16(attr->flags | - V_RTR_TYPE(attr->rtr_type) | - V_CHAN(attr->chan)); - wqe->ord = cpu_to_be32(attr->ord); - wqe->ird = cpu_to_be32(attr->ird); - wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr); - wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size); - wqe->irs = cpu_to_be32(attr->irs); - skb->priority = 0; /* 0=>ToeQ; 1=>CtrlQ */ - return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); -} - -void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb) -{ - cxio_ev_cb = ev_cb; -} - -void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb) -{ - cxio_ev_cb = NULL; -} - -static int cxio_hal_ev_handler(struct t3cdev *t3cdev_p, struct sk_buff *skb) -{ - static int cnt; - struct cxio_rdev *rdev_p = NULL; - struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data; - pr_debug("%d: %s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x se %0x notify %0x cqbranch %0x creditth %0x\n", - cnt, __func__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg), - RSPQ_GENBIT(rsp_msg), RSPQ_OVERFLOW(rsp_msg), RSPQ_AN(rsp_msg), - RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg), - RSPQ_CREDIT_THRESH(rsp_msg)); - pr_debug("CQE: QPID 0x%0x genbit %0x type 0x%0x status 0x%0x opcode %d len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n", - CQE_QPID(rsp_msg->cqe), CQE_GENBIT(rsp_msg->cqe), - CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe), - CQE_OPCODE(rsp_msg->cqe), CQE_LEN(rsp_msg->cqe), - CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); - rdev_p = (struct cxio_rdev *)t3cdev_p->ulp; - if (!rdev_p) { - pr_debug("%s called by t3cdev %p with null ulp\n", __func__, - t3cdev_p); - return 0; - } - if (CQE_QPID(rsp_msg->cqe) == T3_CTRL_QP_ID) { - rdev_p->ctrl_qp.rptr = CQE_WRID_LOW(rsp_msg->cqe) + 1; - wake_up_interruptible(&rdev_p->ctrl_qp.waitq); - dev_kfree_skb_irq(skb); - } else if (CQE_QPID(rsp_msg->cqe) == 0xfff8) - dev_kfree_skb_irq(skb); - else if (cxio_ev_cb) - (*cxio_ev_cb) (rdev_p, skb); - else - dev_kfree_skb_irq(skb); - cnt++; - return 0; -} - -/* Caller takes care of locking if needed */ -int cxio_rdev_open(struct cxio_rdev *rdev_p) -{ - struct net_device *netdev_p = NULL; - int err = 0; - if (strlen(rdev_p->dev_name)) { - if (cxio_hal_find_rdev_by_name(rdev_p->dev_name)) { - return -EBUSY; - } - netdev_p = dev_get_by_name(&init_net, rdev_p->dev_name); - if (!netdev_p) { - return -EINVAL; - } - dev_put(netdev_p); - } else if (rdev_p->t3cdev_p) { - if (cxio_hal_find_rdev_by_t3cdev(rdev_p->t3cdev_p)) { - return -EBUSY; - } - netdev_p = rdev_p->t3cdev_p->lldev; - strncpy(rdev_p->dev_name, rdev_p->t3cdev_p->name, - T3_MAX_DEV_NAME_LEN); - } else { - pr_debug("%s t3cdev_p or dev_name must be set\n", __func__); - return -EINVAL; - } - - list_add_tail(&rdev_p->entry, &rdev_list); - - pr_debug("%s opening rnic dev %s\n", __func__, rdev_p->dev_name); - memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp)); - if (!rdev_p->t3cdev_p) - rdev_p->t3cdev_p = dev2t3cdev(netdev_p); - rdev_p->t3cdev_p->ulp = (void *) rdev_p; - - err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_EMBEDDED_INFO, - &(rdev_p->fw_info)); - if (err) { - pr_err("%s t3cdev_p(%p)->ctl returned error %d\n", - __func__, rdev_p->t3cdev_p, err); - goto err1; - } - if (G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers) != CXIO_FW_MAJ) { - pr_err("fatal firmware version mismatch: need version %u but adapter has version %u\n", - CXIO_FW_MAJ, - G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers)); - err = -EINVAL; - goto err1; - } - - err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS, - &(rdev_p->rnic_info)); - if (err) { - pr_err("%s t3cdev_p(%p)->ctl returned error %d\n", - __func__, rdev_p->t3cdev_p, err); - goto err1; - } - err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_PORTS, - &(rdev_p->port_info)); - if (err) { - pr_err("%s t3cdev_p(%p)->ctl returned error %d\n", - __func__, rdev_p->t3cdev_p, err); - goto err1; - } - - /* - * qpshift is the number of bits to shift the qpid left in order - * to get the correct address of the doorbell for that qp. - */ - cxio_init_ucontext(rdev_p, &rdev_p->uctx); - rdev_p->qpshift = PAGE_SHIFT - - ilog2(65536 >> - ilog2(rdev_p->rnic_info.udbell_len >> - PAGE_SHIFT)); - rdev_p->qpnr = rdev_p->rnic_info.udbell_len >> PAGE_SHIFT; - rdev_p->qpmask = (65536 >> ilog2(rdev_p->qpnr)) - 1; - pr_debug("%s rnic %s info: tpt_base 0x%0x tpt_top 0x%0x num stags %d pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x\n", - __func__, rdev_p->dev_name, rdev_p->rnic_info.tpt_base, - rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p), - rdev_p->rnic_info.pbl_base, - rdev_p->rnic_info.pbl_top, rdev_p->rnic_info.rqt_base, - rdev_p->rnic_info.rqt_top); - pr_debug("udbell_len 0x%0x udbell_physbase 0x%lx kdb_addr %p qpshift %lu qpnr %d qpmask 0x%x\n", - rdev_p->rnic_info.udbell_len, - rdev_p->rnic_info.udbell_physbase, rdev_p->rnic_info.kdb_addr, - rdev_p->qpshift, rdev_p->qpnr, rdev_p->qpmask); - - err = cxio_hal_init_ctrl_qp(rdev_p); - if (err) { - pr_err("%s error %d initializing ctrl_qp\n", __func__, err); - goto err1; - } - err = cxio_hal_init_resource(rdev_p, cxio_num_stags(rdev_p), 0, - 0, T3_MAX_NUM_QP, T3_MAX_NUM_CQ, - T3_MAX_NUM_PD); - if (err) { - pr_err("%s error %d initializing hal resources\n", - __func__, err); - goto err2; - } - err = cxio_hal_pblpool_create(rdev_p); - if (err) { - pr_err("%s error %d initializing pbl mem pool\n", - __func__, err); - goto err3; - } - err = cxio_hal_rqtpool_create(rdev_p); - if (err) { - pr_err("%s error %d initializing rqt mem pool\n", - __func__, err); - goto err4; - } - return 0; -err4: - cxio_hal_pblpool_destroy(rdev_p); -err3: - cxio_hal_destroy_resource(rdev_p->rscp); -err2: - cxio_hal_destroy_ctrl_qp(rdev_p); -err1: - rdev_p->t3cdev_p->ulp = NULL; - list_del(&rdev_p->entry); - return err; -} - -void cxio_rdev_close(struct cxio_rdev *rdev_p) -{ - if (rdev_p) { - cxio_hal_pblpool_destroy(rdev_p); - cxio_hal_rqtpool_destroy(rdev_p); - list_del(&rdev_p->entry); - cxio_hal_destroy_ctrl_qp(rdev_p); - cxio_hal_destroy_resource(rdev_p->rscp); - rdev_p->t3cdev_p->ulp = NULL; - } -} - -int __init cxio_hal_init(void) -{ - if (cxio_hal_init_rhdl_resource(T3_MAX_NUM_RI)) - return -ENOMEM; - t3_register_cpl_handler(CPL_ASYNC_NOTIF, cxio_hal_ev_handler); - return 0; -} - -void __exit cxio_hal_exit(void) -{ - struct cxio_rdev *rdev, *tmp; - - t3_register_cpl_handler(CPL_ASYNC_NOTIF, NULL); - list_for_each_entry_safe(rdev, tmp, &rdev_list, entry) - cxio_rdev_close(rdev); - cxio_hal_destroy_rhdl_resource(); -} - -static void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq) -{ - struct t3_swsq *sqp; - __u32 ptr = wq->sq_rptr; - int count = Q_COUNT(wq->sq_rptr, wq->sq_wptr); - - sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); - while (count--) - if (!sqp->signaled) { - ptr++; - sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); - } else if (sqp->complete) { - - /* - * Insert this completed cqe into the swcq. - */ - pr_debug("%s moving cqe into swcq sq idx %ld cq idx %ld\n", - __func__, Q_PTR2IDX(ptr, wq->sq_size_log2), - Q_PTR2IDX(cq->sw_wptr, cq->size_log2)); - sqp->cqe.header |= htonl(V_CQE_SWCQE(1)); - *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) - = sqp->cqe; - cq->sw_wptr++; - sqp->signaled = 0; - break; - } else - break; -} - -static void create_read_req_cqe(struct t3_wq *wq, struct t3_cqe *hw_cqe, - struct t3_cqe *read_cqe) -{ - read_cqe->u.scqe.wrid_hi = wq->oldest_read->sq_wptr; - read_cqe->len = wq->oldest_read->read_len; - read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(*hw_cqe)) | - V_CQE_SWCQE(SW_CQE(*hw_cqe)) | - V_CQE_OPCODE(T3_READ_REQ) | - V_CQE_TYPE(1)); -} - -/* - * Return a ptr to the next read wr in the SWSQ or NULL. - */ -static void advance_oldest_read(struct t3_wq *wq) -{ - - u32 rptr = wq->oldest_read - wq->sq + 1; - u32 wptr = Q_PTR2IDX(wq->sq_wptr, wq->sq_size_log2); - - while (Q_PTR2IDX(rptr, wq->sq_size_log2) != wptr) { - wq->oldest_read = wq->sq + Q_PTR2IDX(rptr, wq->sq_size_log2); - - if (wq->oldest_read->opcode == T3_READ_REQ) - return; - rptr++; - } - wq->oldest_read = NULL; -} - -/* - * cxio_poll_cq - * - * Caller must: - * check the validity of the first CQE, - * supply the wq assicated with the qpid. - * - * credit: cq credit to return to sge. - * cqe_flushed: 1 iff the CQE is flushed. - * cqe: copy of the polled CQE. - * - * return value: - * 0 CQE returned, - * -1 CQE skipped, try again. - */ -int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, - u8 *cqe_flushed, u64 *cookie, u32 *credit) -{ - int ret = 0; - struct t3_cqe *hw_cqe, read_cqe; - - *cqe_flushed = 0; - *credit = 0; - hw_cqe = cxio_next_cqe(cq); - - pr_debug("%s CQE OOO %d qpid 0x%0x genbit %d type %d status 0x%0x opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n", - __func__, CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe), - CQE_GENBIT(*hw_cqe), CQE_TYPE(*hw_cqe), CQE_STATUS(*hw_cqe), - CQE_OPCODE(*hw_cqe), CQE_LEN(*hw_cqe), CQE_WRID_HI(*hw_cqe), - CQE_WRID_LOW(*hw_cqe)); - - /* - * skip cqe's not affiliated with a QP. - */ - if (wq == NULL) { - ret = -1; - goto skip_cqe; - } - - /* - * Gotta tweak READ completions: - * 1) the cqe doesn't contain the sq_wptr from the wr. - * 2) opcode not reflected from the wr. - * 3) read_len not reflected from the wr. - * 4) cq_type is RQ_TYPE not SQ_TYPE. - */ - if (RQ_TYPE(*hw_cqe) && (CQE_OPCODE(*hw_cqe) == T3_READ_RESP)) { - - /* - * If this is an unsolicited read response, then the read - * was generated by the kernel driver as part of peer-2-peer - * connection setup. So ignore the completion. - */ - if (!wq->oldest_read) { - if (CQE_STATUS(*hw_cqe)) - wq->error = 1; - ret = -1; - goto skip_cqe; - } - - /* - * Don't write to the HWCQ, so create a new read req CQE - * in local memory. - */ - create_read_req_cqe(wq, hw_cqe, &read_cqe); - hw_cqe = &read_cqe; - advance_oldest_read(wq); - } - - /* - * T3A: Discard TERMINATE CQEs. - */ - if (CQE_OPCODE(*hw_cqe) == T3_TERMINATE) { - ret = -1; - wq->error = 1; - goto skip_cqe; - } - - if (CQE_STATUS(*hw_cqe) || wq->error) { - *cqe_flushed = wq->error; - wq->error = 1; - - /* - * T3A inserts errors into the CQE. We cannot return - * these as work completions. - */ - /* incoming write failures */ - if ((CQE_OPCODE(*hw_cqe) == T3_RDMA_WRITE) - && RQ_TYPE(*hw_cqe)) { - ret = -1; - goto skip_cqe; - } - /* incoming read request failures */ - if ((CQE_OPCODE(*hw_cqe) == T3_READ_RESP) && SQ_TYPE(*hw_cqe)) { - ret = -1; - goto skip_cqe; - } - - /* incoming SEND with no receive posted failures */ - if (CQE_SEND_OPCODE(*hw_cqe) && RQ_TYPE(*hw_cqe) && - Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) { - ret = -1; - goto skip_cqe; - } - BUG_ON((*cqe_flushed == 0) && !SW_CQE(*hw_cqe)); - goto proc_cqe; - } - - /* - * RECV completion. - */ - if (RQ_TYPE(*hw_cqe)) { - - /* - * HW only validates 4 bits of MSN. So we must validate that - * the MSN in the SEND is the next expected MSN. If its not, - * then we complete this with TPT_ERR_MSN and mark the wq in - * error. - */ - - if (Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) { - wq->error = 1; - ret = -1; - goto skip_cqe; - } - - if (unlikely((CQE_WRID_MSN(*hw_cqe) != (wq->rq_rptr + 1)))) { - wq->error = 1; - hw_cqe->header |= htonl(V_CQE_STATUS(TPT_ERR_MSN)); - goto proc_cqe; - } - goto proc_cqe; - } - - /* - * If we get here its a send completion. - * - * Handle out of order completion. These get stuffed - * in the SW SQ. Then the SW SQ is walked to move any - * now in-order completions into the SW CQ. This handles - * 2 cases: - * 1) reaping unsignaled WRs when the first subsequent - * signaled WR is completed. - * 2) out of order read completions. - */ - if (!SW_CQE(*hw_cqe) && (CQE_WRID_SQ_WPTR(*hw_cqe) != wq->sq_rptr)) { - struct t3_swsq *sqp; - - pr_debug("%s out of order completion going in swsq at idx %ld\n", - __func__, - Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), - wq->sq_size_log2)); - sqp = wq->sq + - Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2); - sqp->cqe = *hw_cqe; - sqp->complete = 1; - ret = -1; - goto flush_wq; - } - -proc_cqe: - *cqe = *hw_cqe; - - /* - * Reap the associated WR(s) that are freed up with this - * completion. - */ - if (SQ_TYPE(*hw_cqe)) { - wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe); - pr_debug("%s completing sq idx %ld\n", __func__, - Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)); - *cookie = wq->sq[Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)].wr_id; - wq->sq_rptr++; - } else { - pr_debug("%s completing rq idx %ld\n", __func__, - Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)); - *cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].wr_id; - if (wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr) - cxio_hal_pblpool_free(wq->rdev, - wq->rq[Q_PTR2IDX(wq->rq_rptr, - wq->rq_size_log2)].pbl_addr, T3_STAG0_PBL_SIZE); - BUG_ON(Q_EMPTY(wq->rq_rptr, wq->rq_wptr)); - wq->rq_rptr++; - } - -flush_wq: - /* - * Flush any completed cqes that are now in-order. - */ - flush_completed_wrs(wq, cq); - -skip_cqe: - if (SW_CQE(*hw_cqe)) { - pr_debug("%s cq %p cqid 0x%x skip sw cqe sw_rptr 0x%x\n", - __func__, cq, cq->cqid, cq->sw_rptr); - ++cq->sw_rptr; - } else { - pr_debug("%s cq %p cqid 0x%x skip hw cqe rptr 0x%x\n", - __func__, cq, cq->cqid, cq->rptr); - ++cq->rptr; - - /* - * T3A: compute credits. - */ - if (((cq->rptr - cq->wptr) > (1 << (cq->size_log2 - 1))) - || ((cq->rptr - cq->wptr) >= 128)) { - *credit = cq->rptr - cq->wptr; - cq->wptr = cq->rptr; - } - } - return ret; -} diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h deleted file mode 100644 index 40c029ffa425..000000000000 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.h +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __CXIO_HAL_H__ -#define __CXIO_HAL_H__ - -#include <linux/list.h> -#include <linux/mutex.h> -#include <linux/kfifo.h> - -#include "t3_cpl.h" -#include "t3cdev.h" -#include "cxgb3_ctl_defs.h" -#include "cxio_wr.h" - -#define T3_CTRL_QP_ID FW_RI_SGEEC_START -#define T3_CTL_QP_TID FW_RI_TID_START -#define T3_CTRL_QP_SIZE_LOG2 8 -#define T3_CTRL_CQ_ID 0 - -#define T3_MAX_NUM_RI (1<<15) -#define T3_MAX_NUM_QP (1<<15) -#define T3_MAX_NUM_CQ (1<<15) -#define T3_MAX_NUM_PD (1<<15) -#define T3_MAX_PBL_SIZE 256 -#define T3_MAX_RQ_SIZE 1024 -#define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1) -#define T3_MAX_CQ_DEPTH 65536 -#define T3_MAX_NUM_STAG (1<<15) -#define T3_MAX_MR_SIZE 0x100000000ULL -#define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */ - -#define T3_STAG_UNSET 0xffffffff - -#define T3_MAX_DEV_NAME_LEN 32 - -#define CXIO_FW_MAJ 7 - -struct cxio_hal_ctrl_qp { - u32 wptr; - u32 rptr; - struct mutex lock; /* for the wtpr, can sleep */ - wait_queue_head_t waitq;/* wait for RspQ/CQE msg */ - union t3_wr *workq; /* the work request queue */ - dma_addr_t dma_addr; /* pci bus address of the workq */ - DEFINE_DMA_UNMAP_ADDR(mapping); - void __iomem *doorbell; -}; - -struct cxio_hal_resource { - struct kfifo tpt_fifo; - spinlock_t tpt_fifo_lock; - struct kfifo qpid_fifo; - spinlock_t qpid_fifo_lock; - struct kfifo cqid_fifo; - spinlock_t cqid_fifo_lock; - struct kfifo pdid_fifo; - spinlock_t pdid_fifo_lock; -}; - -struct cxio_qpid_list { - struct list_head entry; - u32 qpid; -}; - -struct cxio_ucontext { - struct list_head qpids; - struct mutex lock; -}; - -struct cxio_rdev { - char dev_name[T3_MAX_DEV_NAME_LEN]; - struct t3cdev *t3cdev_p; - struct rdma_info rnic_info; - struct adap_ports port_info; - struct cxio_hal_resource *rscp; - struct cxio_hal_ctrl_qp ctrl_qp; - void *ulp; - unsigned long qpshift; - u32 qpnr; - u32 qpmask; - struct cxio_ucontext uctx; - struct gen_pool *pbl_pool; - struct gen_pool *rqt_pool; - struct list_head entry; - struct ch_embedded_info fw_info; - u32 flags; -#define CXIO_ERROR_FATAL 1 -}; - -static inline int cxio_fatal_error(struct cxio_rdev *rdev_p) -{ - return rdev_p->flags & CXIO_ERROR_FATAL; -} - -static inline int cxio_num_stags(struct cxio_rdev *rdev_p) -{ - return min((int)T3_MAX_NUM_STAG, (int)((rdev_p->rnic_info.tpt_top - rdev_p->rnic_info.tpt_base) >> 5)); -} - -typedef void (*cxio_hal_ev_callback_func_t) (struct cxio_rdev * rdev_p, - struct sk_buff * skb); - -#define RSPQ_CQID(rsp) (be32_to_cpu(rsp->cq_ptrid) & 0xffff) -#define RSPQ_CQPTR(rsp) ((be32_to_cpu(rsp->cq_ptrid) >> 16) & 0xffff) -#define RSPQ_GENBIT(rsp) ((be32_to_cpu(rsp->flags) >> 16) & 1) -#define RSPQ_OVERFLOW(rsp) ((be32_to_cpu(rsp->flags) >> 17) & 1) -#define RSPQ_AN(rsp) ((be32_to_cpu(rsp->flags) >> 18) & 1) -#define RSPQ_SE(rsp) ((be32_to_cpu(rsp->flags) >> 19) & 1) -#define RSPQ_NOTIFY(rsp) ((be32_to_cpu(rsp->flags) >> 20) & 1) -#define RSPQ_CQBRANCH(rsp) ((be32_to_cpu(rsp->flags) >> 21) & 1) -#define RSPQ_CREDIT_THRESH(rsp) ((be32_to_cpu(rsp->flags) >> 22) & 1) - -struct respQ_msg_t { - __be32 flags; /* flit 0 */ - __be32 cq_ptrid; - __be64 rsvd; /* flit 1 */ - struct t3_cqe cqe; /* flits 2-3 */ -}; - -enum t3_cq_opcode { - CQ_ARM_AN = 0x2, - CQ_ARM_SE = 0x6, - CQ_FORCE_AN = 0x3, - CQ_CREDIT_UPDATE = 0x7 -}; - -int cxio_rdev_open(struct cxio_rdev *rdev); -void cxio_rdev_close(struct cxio_rdev *rdev); -int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq, - enum t3_cq_opcode op, u32 credit); -int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq, int kernel); -void cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq); -void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx); -void cxio_init_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx); -int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq, - struct cxio_ucontext *uctx); -int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq, - struct cxio_ucontext *uctx); -int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode); -int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl, - u32 pbl_addr, u32 pbl_size); -int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, - enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, u32 pbl_size, u32 pbl_addr); -int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, - enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, u32 pbl_size, u32 pbl_addr); -int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size, - u32 pbl_addr); -int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid); -int cxio_allocate_stag(struct cxio_rdev *rdev, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr); -int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag); -int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr); -void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb); -void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb); -u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp); -void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid); -int __init cxio_hal_init(void); -void __exit cxio_hal_exit(void); -int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count); -int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count); -void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count); -void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count); -void cxio_flush_hw_cq(struct t3_cq *cq); -int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, - u8 *cqe_flushed, u64 *cookie, u32 *credit); -int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb); - -#ifdef pr_fmt -#undef pr_fmt -#endif - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#endif diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c deleted file mode 100644 index c6e7bc4420b6..000000000000 --- a/drivers/infiniband/hw/cxgb3/cxio_resource.c +++ /dev/null @@ -1,344 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -/* Crude resource management */ -#include <linux/kernel.h> -#include <linux/random.h> -#include <linux/slab.h> -#include <linux/kfifo.h> -#include <linux/spinlock.h> -#include <linux/errno.h> -#include "cxio_resource.h" -#include "cxio_hal.h" - -static struct kfifo rhdl_fifo; -static spinlock_t rhdl_fifo_lock; - -#define RANDOM_SIZE 16 - -static int __cxio_init_resource_fifo(struct kfifo *fifo, - spinlock_t *fifo_lock, - u32 nr, u32 skip_low, - u32 skip_high, - int random) -{ - u32 i, j, entry = 0, idx; - u32 random_bytes; - u32 rarray[16]; - spin_lock_init(fifo_lock); - - if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL)) - return -ENOMEM; - - for (i = 0; i < skip_low + skip_high; i++) - kfifo_in(fifo, (unsigned char *) &entry, sizeof(u32)); - if (random) { - j = 0; - random_bytes = prandom_u32(); - for (i = 0; i < RANDOM_SIZE; i++) - rarray[i] = i + skip_low; - for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) { - if (j >= RANDOM_SIZE) { - j = 0; - random_bytes = prandom_u32(); - } - idx = (random_bytes >> (j * 2)) & 0xF; - kfifo_in(fifo, - (unsigned char *) &rarray[idx], - sizeof(u32)); - rarray[idx] = i; - j++; - } - for (i = 0; i < RANDOM_SIZE; i++) - kfifo_in(fifo, - (unsigned char *) &rarray[i], - sizeof(u32)); - } else - for (i = skip_low; i < nr - skip_high; i++) - kfifo_in(fifo, (unsigned char *) &i, sizeof(u32)); - - for (i = 0; i < skip_low + skip_high; i++) - if (kfifo_out_locked(fifo, (unsigned char *) &entry, - sizeof(u32), fifo_lock) != sizeof(u32)) - break; - return 0; -} - -static int cxio_init_resource_fifo(struct kfifo *fifo, spinlock_t * fifo_lock, - u32 nr, u32 skip_low, u32 skip_high) -{ - return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low, - skip_high, 0)); -} - -static int cxio_init_resource_fifo_random(struct kfifo *fifo, - spinlock_t * fifo_lock, - u32 nr, u32 skip_low, u32 skip_high) -{ - - return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low, - skip_high, 1)); -} - -static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p) -{ - u32 i; - - spin_lock_init(&rdev_p->rscp->qpid_fifo_lock); - - if (kfifo_alloc(&rdev_p->rscp->qpid_fifo, T3_MAX_NUM_QP * sizeof(u32), - GFP_KERNEL)) - return -ENOMEM; - - for (i = 16; i < T3_MAX_NUM_QP; i++) - if (!(i & rdev_p->qpmask)) - kfifo_in(&rdev_p->rscp->qpid_fifo, - (unsigned char *) &i, sizeof(u32)); - return 0; -} - -int cxio_hal_init_rhdl_resource(u32 nr_rhdl) -{ - return cxio_init_resource_fifo(&rhdl_fifo, &rhdl_fifo_lock, nr_rhdl, 1, - 0); -} - -void cxio_hal_destroy_rhdl_resource(void) -{ - kfifo_free(&rhdl_fifo); -} - -/* nr_* must be power of 2 */ -int cxio_hal_init_resource(struct cxio_rdev *rdev_p, - u32 nr_tpt, u32 nr_pbl, - u32 nr_rqt, u32 nr_qpid, u32 nr_cqid, u32 nr_pdid) -{ - int err = 0; - struct cxio_hal_resource *rscp; - - rscp = kmalloc(sizeof(*rscp), GFP_KERNEL); - if (!rscp) - return -ENOMEM; - rdev_p->rscp = rscp; - err = cxio_init_resource_fifo_random(&rscp->tpt_fifo, - &rscp->tpt_fifo_lock, - nr_tpt, 1, 0); - if (err) - goto tpt_err; - err = cxio_init_qpid_fifo(rdev_p); - if (err) - goto qpid_err; - err = cxio_init_resource_fifo(&rscp->cqid_fifo, &rscp->cqid_fifo_lock, - nr_cqid, 1, 0); - if (err) - goto cqid_err; - err = cxio_init_resource_fifo(&rscp->pdid_fifo, &rscp->pdid_fifo_lock, - nr_pdid, 1, 0); - if (err) - goto pdid_err; - return 0; -pdid_err: - kfifo_free(&rscp->cqid_fifo); -cqid_err: - kfifo_free(&rscp->qpid_fifo); -qpid_err: - kfifo_free(&rscp->tpt_fifo); -tpt_err: - return -ENOMEM; -} - -/* - * returns 0 if no resource available - */ -static u32 cxio_hal_get_resource(struct kfifo *fifo, spinlock_t * lock) -{ - u32 entry; - if (kfifo_out_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)) - return entry; - else - return 0; /* fifo emptry */ -} - -static void cxio_hal_put_resource(struct kfifo *fifo, spinlock_t * lock, - u32 entry) -{ - BUG_ON( - kfifo_in_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock) - == 0); -} - -u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp) -{ - return cxio_hal_get_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock); -} - -void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag) -{ - cxio_hal_put_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock, stag); -} - -u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp) -{ - u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo, - &rscp->qpid_fifo_lock); - pr_debug("%s qpid 0x%x\n", __func__, qpid); - return qpid; -} - -void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid) -{ - pr_debug("%s qpid 0x%x\n", __func__, qpid); - cxio_hal_put_resource(&rscp->qpid_fifo, &rscp->qpid_fifo_lock, qpid); -} - -u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp) -{ - return cxio_hal_get_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock); -} - -void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid) -{ - cxio_hal_put_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock, cqid); -} - -u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp) -{ - return cxio_hal_get_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock); -} - -void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid) -{ - cxio_hal_put_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock, pdid); -} - -void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp) -{ - kfifo_free(&rscp->tpt_fifo); - kfifo_free(&rscp->cqid_fifo); - kfifo_free(&rscp->qpid_fifo); - kfifo_free(&rscp->pdid_fifo); - kfree(rscp); -} - -/* - * PBL Memory Manager. Uses Linux generic allocator. - */ - -#define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */ - -u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size) -{ - unsigned long addr = gen_pool_alloc(rdev_p->pbl_pool, size); - pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size); - return (u32)addr; -} - -void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size) -{ - pr_debug("%s addr 0x%x size %d\n", __func__, addr, size); - gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size); -} - -int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p) -{ - unsigned pbl_start, pbl_chunk; - - rdev_p->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1); - if (!rdev_p->pbl_pool) - return -ENOMEM; - - pbl_start = rdev_p->rnic_info.pbl_base; - pbl_chunk = rdev_p->rnic_info.pbl_top - pbl_start + 1; - - while (pbl_start < rdev_p->rnic_info.pbl_top) { - pbl_chunk = min(rdev_p->rnic_info.pbl_top - pbl_start + 1, - pbl_chunk); - if (gen_pool_add(rdev_p->pbl_pool, pbl_start, pbl_chunk, -1)) { - pr_debug("%s failed to add PBL chunk (%x/%x)\n", - __func__, pbl_start, pbl_chunk); - if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) { - pr_warn("%s: Failed to add all PBL chunks (%x/%x)\n", - __func__, pbl_start, - rdev_p->rnic_info.pbl_top - pbl_start); - return 0; - } - pbl_chunk >>= 1; - } else { - pr_debug("%s added PBL chunk (%x/%x)\n", - __func__, pbl_start, pbl_chunk); - pbl_start += pbl_chunk; - } - } - - return 0; -} - -void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p) -{ - gen_pool_destroy(rdev_p->pbl_pool); -} - -/* - * RQT Memory Manager. Uses Linux generic allocator. - */ - -#define MIN_RQT_SHIFT 10 /* 1KB == mini RQT size (16 entries) */ -#define RQT_CHUNK 2*1024*1024 - -u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size) -{ - unsigned long addr = gen_pool_alloc(rdev_p->rqt_pool, size << 6); - pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6); - return (u32)addr; -} - -void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size) -{ - pr_debug("%s addr 0x%x size %d\n", __func__, addr, size << 6); - gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6); -} - -int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p) -{ - unsigned long i; - rdev_p->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1); - if (rdev_p->rqt_pool) - for (i = rdev_p->rnic_info.rqt_base; - i <= rdev_p->rnic_info.rqt_top - RQT_CHUNK + 1; - i += RQT_CHUNK) - gen_pool_add(rdev_p->rqt_pool, i, RQT_CHUNK, -1); - return rdev_p->rqt_pool ? 0 : -ENOMEM; -} - -void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p) -{ - gen_pool_destroy(rdev_p->rqt_pool); -} diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.h b/drivers/infiniband/hw/cxgb3/cxio_resource.h deleted file mode 100644 index a2703a3d882d..000000000000 --- a/drivers/infiniband/hw/cxgb3/cxio_resource.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __CXIO_RESOURCE_H__ -#define __CXIO_RESOURCE_H__ - -#include <linux/kernel.h> -#include <linux/random.h> -#include <linux/slab.h> -#include <linux/kfifo.h> -#include <linux/spinlock.h> -#include <linux/errno.h> -#include <linux/genalloc.h> -#include "cxio_hal.h" - -extern int cxio_hal_init_rhdl_resource(u32 nr_rhdl); -extern void cxio_hal_destroy_rhdl_resource(void); -extern int cxio_hal_init_resource(struct cxio_rdev *rdev_p, - u32 nr_tpt, u32 nr_pbl, - u32 nr_rqt, u32 nr_qpid, u32 nr_cqid, - u32 nr_pdid); -extern u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp); -extern void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag); -extern u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp); -extern void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid); -extern u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp); -extern void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid); -extern void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp); - -#define PBL_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.pbl_base ) -extern int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p); -extern void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p); -extern u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size); -extern void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size); - -#define RQT_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.rqt_base ) -extern int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p); -extern void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p); -extern u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size); -extern void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size); -#endif diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h deleted file mode 100644 index 53aa5c36247a..000000000000 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ /dev/null @@ -1,802 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __CXIO_WR_H__ -#define __CXIO_WR_H__ - -#include <asm/io.h> -#include <linux/pci.h> -#include <linux/timer.h> -#include "firmware_exports.h" - -#define T3_MAX_SGE 4 -#define T3_MAX_INLINE 64 -#define T3_STAG0_PBL_SIZE (2 * T3_MAX_SGE << 3) -#define T3_STAG0_MAX_PBE_LEN (128 * 1024 * 1024) -#define T3_STAG0_PAGE_SHIFT 15 - -#define Q_EMPTY(rptr,wptr) ((rptr)==(wptr)) -#define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \ - ((rptr)!=(wptr)) ) -#define Q_GENBIT(ptr,size_log2) (!(((ptr)>>size_log2)&0x1)) -#define Q_FREECNT(rptr,wptr,size_log2) ((1UL<<size_log2)-((wptr)-(rptr))) -#define Q_COUNT(rptr,wptr) ((wptr)-(rptr)) -#define Q_PTR2IDX(ptr,size_log2) (ptr & ((1UL<<size_log2)-1)) - -static inline void ring_doorbell(void __iomem *doorbell, u32 qpid) -{ - writel(((1<<31) | qpid), doorbell); -} - -#define SEQ32_GE(x,y) (!( (((u32) (x)) - ((u32) (y))) & 0x80000000 )) - -enum t3_wr_flags { - T3_COMPLETION_FLAG = 0x01, - T3_NOTIFY_FLAG = 0x02, - T3_SOLICITED_EVENT_FLAG = 0x04, - T3_READ_FENCE_FLAG = 0x08, - T3_LOCAL_FENCE_FLAG = 0x10 -} __packed; - -enum t3_wr_opcode { - T3_WR_BP = FW_WROPCODE_RI_BYPASS, - T3_WR_SEND = FW_WROPCODE_RI_SEND, - T3_WR_WRITE = FW_WROPCODE_RI_RDMA_WRITE, - T3_WR_READ = FW_WROPCODE_RI_RDMA_READ, - T3_WR_INV_STAG = FW_WROPCODE_RI_LOCAL_INV, - T3_WR_BIND = FW_WROPCODE_RI_BIND_MW, - T3_WR_RCV = FW_WROPCODE_RI_RECEIVE, - T3_WR_INIT = FW_WROPCODE_RI_RDMA_INIT, - T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP, - T3_WR_FASTREG = FW_WROPCODE_RI_FASTREGISTER_MR -} __packed; - -enum t3_rdma_opcode { - T3_RDMA_WRITE, /* IETF RDMAP v1.0 ... */ - T3_READ_REQ, - T3_READ_RESP, - T3_SEND, - T3_SEND_WITH_INV, - T3_SEND_WITH_SE, - T3_SEND_WITH_SE_INV, - T3_TERMINATE, - T3_RDMA_INIT, /* CHELSIO RI specific ... */ - T3_BIND_MW, - T3_FAST_REGISTER, - T3_LOCAL_INV, - T3_QP_MOD, - T3_BYPASS, - T3_RDMA_READ_REQ_WITH_INV, -} __packed; - -static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop) -{ - switch (wrop) { - case T3_WR_BP: return T3_BYPASS; - case T3_WR_SEND: return T3_SEND; - case T3_WR_WRITE: return T3_RDMA_WRITE; - case T3_WR_READ: return T3_READ_REQ; - case T3_WR_INV_STAG: return T3_LOCAL_INV; - case T3_WR_BIND: return T3_BIND_MW; - case T3_WR_INIT: return T3_RDMA_INIT; - case T3_WR_QP_MOD: return T3_QP_MOD; - case T3_WR_FASTREG: return T3_FAST_REGISTER; - default: break; - } - return -1; -} - - -/* Work request id */ -union t3_wrid { - struct { - u32 hi; - u32 low; - } id0; - u64 id1; -}; - -#define WRID(wrid) (wrid.id1) -#define WRID_GEN(wrid) (wrid.id0.wr_gen) -#define WRID_IDX(wrid) (wrid.id0.wr_idx) -#define WRID_LO(wrid) (wrid.id0.wr_lo) - -struct fw_riwrh { - __be32 op_seop_flags; - __be32 gen_tid_len; -}; - -#define S_FW_RIWR_OP 24 -#define M_FW_RIWR_OP 0xff -#define V_FW_RIWR_OP(x) ((x) << S_FW_RIWR_OP) -#define G_FW_RIWR_OP(x) ((((x) >> S_FW_RIWR_OP)) & M_FW_RIWR_OP) - -#define S_FW_RIWR_SOPEOP 22 -#define M_FW_RIWR_SOPEOP 0x3 -#define V_FW_RIWR_SOPEOP(x) ((x) << S_FW_RIWR_SOPEOP) - -#define S_FW_RIWR_FLAGS 8 -#define M_FW_RIWR_FLAGS 0x3fffff -#define V_FW_RIWR_FLAGS(x) ((x) << S_FW_RIWR_FLAGS) -#define G_FW_RIWR_FLAGS(x) ((((x) >> S_FW_RIWR_FLAGS)) & M_FW_RIWR_FLAGS) - -#define S_FW_RIWR_TID 8 -#define V_FW_RIWR_TID(x) ((x) << S_FW_RIWR_TID) - -#define S_FW_RIWR_LEN 0 -#define V_FW_RIWR_LEN(x) ((x) << S_FW_RIWR_LEN) - -#define S_FW_RIWR_GEN 31 -#define V_FW_RIWR_GEN(x) ((x) << S_FW_RIWR_GEN) - -struct t3_sge { - __be32 stag; - __be32 len; - __be64 to; -}; - -/* If num_sgle is zero, flit 5+ contains immediate data.*/ -struct t3_send_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - - u8 rdmaop; /* 2 */ - u8 reserved[3]; - __be32 rem_stag; - __be32 plen; /* 3 */ - __be32 num_sgle; - struct t3_sge sgl[T3_MAX_SGE]; /* 4+ */ -}; - -#define T3_MAX_FASTREG_DEPTH 10 -#define T3_MAX_FASTREG_FRAG 10 - -struct t3_fastreg_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - __be32 stag; /* 2 */ - __be32 len; - __be32 va_base_hi; /* 3 */ - __be32 va_base_lo_fbo; - __be32 page_type_perms; /* 4 */ - __be32 reserved1; - __be64 pbl_addrs[0]; /* 5+ */ -}; - -/* - * If a fastreg wr spans multiple wqes, then the 2nd fragment look like this. - */ -struct t3_pbl_frag { - struct fw_riwrh wrh; /* 0 */ - __be64 pbl_addrs[14]; /* 1..14 */ -}; - -#define S_FR_PAGE_COUNT 24 -#define M_FR_PAGE_COUNT 0xff -#define V_FR_PAGE_COUNT(x) ((x) << S_FR_PAGE_COUNT) -#define G_FR_PAGE_COUNT(x) ((((x) >> S_FR_PAGE_COUNT)) & M_FR_PAGE_COUNT) - -#define S_FR_PAGE_SIZE 16 -#define M_FR_PAGE_SIZE 0x1f -#define V_FR_PAGE_SIZE(x) ((x) << S_FR_PAGE_SIZE) -#define G_FR_PAGE_SIZE(x) ((((x) >> S_FR_PAGE_SIZE)) & M_FR_PAGE_SIZE) - -#define S_FR_TYPE 8 -#define M_FR_TYPE 0x1 -#define V_FR_TYPE(x) ((x) << S_FR_TYPE) -#define G_FR_TYPE(x) ((((x) >> S_FR_TYPE)) & M_FR_TYPE) - -#define S_FR_PERMS 0 -#define M_FR_PERMS 0xff -#define V_FR_PERMS(x) ((x) << S_FR_PERMS) -#define G_FR_PERMS(x) ((((x) >> S_FR_PERMS)) & M_FR_PERMS) - -struct t3_local_inv_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - __be32 stag; /* 2 */ - __be32 reserved; -}; - -struct t3_rdma_write_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - u8 rdmaop; /* 2 */ - u8 reserved[3]; - __be32 stag_sink; - __be64 to_sink; /* 3 */ - __be32 plen; /* 4 */ - __be32 num_sgle; - struct t3_sge sgl[T3_MAX_SGE]; /* 5+ */ -}; - -struct t3_rdma_read_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - u8 rdmaop; /* 2 */ - u8 local_inv; - u8 reserved[2]; - __be32 rem_stag; - __be64 rem_to; /* 3 */ - __be32 local_stag; /* 4 */ - __be32 local_len; - __be64 local_to; /* 5 */ -}; - -struct t3_bind_mw_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - u16 reserved; /* 2 */ - u8 type; - u8 perms; - __be32 mr_stag; - __be32 mw_stag; /* 3 */ - __be32 mw_len; - __be64 mw_va; /* 4 */ - __be32 mr_pbl_addr; /* 5 */ - u8 reserved2[3]; - u8 mr_pagesz; -}; - -struct t3_receive_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - u8 pagesz[T3_MAX_SGE]; - __be32 num_sgle; /* 2 */ - struct t3_sge sgl[T3_MAX_SGE]; /* 3+ */ - __be32 pbl_addr[T3_MAX_SGE]; -}; - -struct t3_bypass_wr { - struct fw_riwrh wrh; - union t3_wrid wrid; /* 1 */ -}; - -struct t3_modify_qp_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - __be32 flags; /* 2 */ - __be32 quiesce; /* 2 */ - __be32 max_ird; /* 3 */ - __be32 max_ord; /* 3 */ - __be64 sge_cmd; /* 4 */ - __be64 ctx1; /* 5 */ - __be64 ctx0; /* 6 */ -}; - -enum t3_modify_qp_flags { - MODQP_QUIESCE = 0x01, - MODQP_MAX_IRD = 0x02, - MODQP_MAX_ORD = 0x04, - MODQP_WRITE_EC = 0x08, - MODQP_READ_EC = 0x10, -}; - - -enum t3_mpa_attrs { - uP_RI_MPA_RX_MARKER_ENABLE = 0x1, - uP_RI_MPA_TX_MARKER_ENABLE = 0x2, - uP_RI_MPA_CRC_ENABLE = 0x4, - uP_RI_MPA_IETF_ENABLE = 0x8 -} __packed; - -enum t3_qp_caps { - uP_RI_QP_RDMA_READ_ENABLE = 0x01, - uP_RI_QP_RDMA_WRITE_ENABLE = 0x02, - uP_RI_QP_BIND_ENABLE = 0x04, - uP_RI_QP_FAST_REGISTER_ENABLE = 0x08, - uP_RI_QP_STAG0_ENABLE = 0x10 -} __packed; - -enum rdma_init_rtr_types { - RTR_READ = 1, - RTR_WRITE = 2, - RTR_SEND = 3, -}; - -#define S_RTR_TYPE 2 -#define M_RTR_TYPE 0x3 -#define V_RTR_TYPE(x) ((x) << S_RTR_TYPE) -#define G_RTR_TYPE(x) ((((x) >> S_RTR_TYPE)) & M_RTR_TYPE) - -#define S_CHAN 4 -#define M_CHAN 0x3 -#define V_CHAN(x) ((x) << S_CHAN) -#define G_CHAN(x) ((((x) >> S_CHAN)) & M_CHAN) - -struct t3_rdma_init_attr { - u32 tid; - u32 qpid; - u32 pdid; - u32 scqid; - u32 rcqid; - u32 rq_addr; - u32 rq_size; - enum t3_mpa_attrs mpaattrs; - enum t3_qp_caps qpcaps; - u16 tcp_emss; - u32 ord; - u32 ird; - u64 qp_dma_addr; - u32 qp_dma_size; - enum rdma_init_rtr_types rtr_type; - u16 flags; - u16 rqe_count; - u32 irs; - u32 chan; -}; - -struct t3_rdma_init_wr { - struct fw_riwrh wrh; /* 0 */ - union t3_wrid wrid; /* 1 */ - __be32 qpid; /* 2 */ - __be32 pdid; - __be32 scqid; /* 3 */ - __be32 rcqid; - __be32 rq_addr; /* 4 */ - __be32 rq_size; - u8 mpaattrs; /* 5 */ - u8 qpcaps; - __be16 ulpdu_size; - __be16 flags_rtr_type; - __be16 rqe_count; - __be32 ord; /* 6 */ - __be32 ird; - __be64 qp_dma_addr; /* 7 */ - __be32 qp_dma_size; /* 8 */ - __be32 irs; -}; - -struct t3_genbit { - u64 flit[15]; - __be64 genbit; -}; - -struct t3_wq_in_err { - u64 flit[13]; - u64 err; -}; - -enum rdma_init_wr_flags { - MPA_INITIATOR = (1<<0), - PRIV_QP = (1<<1), -}; - -union t3_wr { - struct t3_send_wr send; - struct t3_rdma_write_wr write; - struct t3_rdma_read_wr read; - struct t3_receive_wr recv; - struct t3_fastreg_wr fastreg; - struct t3_pbl_frag pbl_frag; - struct t3_local_inv_wr local_inv; - struct t3_bind_mw_wr bind; - struct t3_bypass_wr bypass; - struct t3_rdma_init_wr init; - struct t3_modify_qp_wr qp_mod; - struct t3_genbit genbit; - struct t3_wq_in_err wq_in_err; - __be64 flit[16]; -}; - -#define T3_SQ_CQE_FLIT 13 -#define T3_SQ_COOKIE_FLIT 14 - -#define T3_RQ_COOKIE_FLIT 13 -#define T3_RQ_CQE_FLIT 14 - -static inline enum t3_wr_opcode fw_riwrh_opcode(struct fw_riwrh *wqe) -{ - return G_FW_RIWR_OP(be32_to_cpu(wqe->op_seop_flags)); -} - -enum t3_wr_hdr_bits { - T3_EOP = 1, - T3_SOP = 2, - T3_SOPEOP = T3_EOP|T3_SOP, -}; - -static inline void build_fw_riwrh(struct fw_riwrh *wqe, enum t3_wr_opcode op, - enum t3_wr_flags flags, u8 genbit, u32 tid, - u8 len, u8 sopeop) -{ - wqe->op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(op) | - V_FW_RIWR_SOPEOP(sopeop) | - V_FW_RIWR_FLAGS(flags)); - wmb(); - wqe->gen_tid_len = cpu_to_be32(V_FW_RIWR_GEN(genbit) | - V_FW_RIWR_TID(tid) | - V_FW_RIWR_LEN(len)); - /* 2nd gen bit... */ - ((union t3_wr *)wqe)->genbit.genbit = cpu_to_be64(genbit); -} - -/* - * T3 ULP2_TX commands - */ -enum t3_utx_mem_op { - T3_UTX_MEM_READ = 2, - T3_UTX_MEM_WRITE = 3 -}; - -/* T3 MC7 RDMA TPT entry format */ - -enum tpt_mem_type { - TPT_NON_SHARED_MR = 0x0, - TPT_SHARED_MR = 0x1, - TPT_MW = 0x2, - TPT_MW_RELAXED_PROTECTION = 0x3 -}; - -enum tpt_addr_type { - TPT_ZBTO = 0, - TPT_VATO = 1 -}; - -enum tpt_mem_perm { - TPT_MW_BIND = 0x10, - TPT_LOCAL_READ = 0x8, - TPT_LOCAL_WRITE = 0x4, - TPT_REMOTE_READ = 0x2, - TPT_REMOTE_WRITE = 0x1 -}; - -struct tpt_entry { - __be32 valid_stag_pdid; - __be32 flags_pagesize_qpid; - - __be32 rsvd_pbl_addr; - __be32 len; - __be32 va_hi; - __be32 va_low_or_fbo; - - __be32 rsvd_bind_cnt_or_pstag; - __be32 rsvd_pbl_size; -}; - -#define S_TPT_VALID 31 -#define V_TPT_VALID(x) ((x) << S_TPT_VALID) -#define F_TPT_VALID V_TPT_VALID(1U) - -#define S_TPT_STAG_KEY 23 -#define M_TPT_STAG_KEY 0xFF -#define V_TPT_STAG_KEY(x) ((x) << S_TPT_STAG_KEY) -#define G_TPT_STAG_KEY(x) (((x) >> S_TPT_STAG_KEY) & M_TPT_STAG_KEY) - -#define S_TPT_STAG_STATE 22 -#define V_TPT_STAG_STATE(x) ((x) << S_TPT_STAG_STATE) -#define F_TPT_STAG_STATE V_TPT_STAG_STATE(1U) - -#define S_TPT_STAG_TYPE 20 -#define M_TPT_STAG_TYPE 0x3 -#define V_TPT_STAG_TYPE(x) ((x) << S_TPT_STAG_TYPE) -#define G_TPT_STAG_TYPE(x) (((x) >> S_TPT_STAG_TYPE) & M_TPT_STAG_TYPE) - -#define S_TPT_PDID 0 -#define M_TPT_PDID 0xFFFFF -#define V_TPT_PDID(x) ((x) << S_TPT_PDID) -#define G_TPT_PDID(x) (((x) >> S_TPT_PDID) & M_TPT_PDID) - -#define S_TPT_PERM 28 -#define M_TPT_PERM 0xF -#define V_TPT_PERM(x) ((x) << S_TPT_PERM) -#define G_TPT_PERM(x) (((x) >> S_TPT_PERM) & M_TPT_PERM) - -#define S_TPT_REM_INV_DIS 27 -#define V_TPT_REM_INV_DIS(x) ((x) << S_TPT_REM_INV_DIS) -#define F_TPT_REM_INV_DIS V_TPT_REM_INV_DIS(1U) - -#define S_TPT_ADDR_TYPE 26 -#define V_TPT_ADDR_TYPE(x) ((x) << S_TPT_ADDR_TYPE) -#define F_TPT_ADDR_TYPE V_TPT_ADDR_TYPE(1U) - -#define S_TPT_MW_BIND_ENABLE 25 -#define V_TPT_MW_BIND_ENABLE(x) ((x) << S_TPT_MW_BIND_ENABLE) -#define F_TPT_MW_BIND_ENABLE V_TPT_MW_BIND_ENABLE(1U) - -#define S_TPT_PAGE_SIZE 20 -#define M_TPT_PAGE_SIZE 0x1F -#define V_TPT_PAGE_SIZE(x) ((x) << S_TPT_PAGE_SIZE) -#define G_TPT_PAGE_SIZE(x) (((x) >> S_TPT_PAGE_SIZE) & M_TPT_PAGE_SIZE) - -#define S_TPT_PBL_ADDR 0 -#define M_TPT_PBL_ADDR 0x1FFFFFFF -#define V_TPT_PBL_ADDR(x) ((x) << S_TPT_PBL_ADDR) -#define G_TPT_PBL_ADDR(x) (((x) >> S_TPT_PBL_ADDR) & M_TPT_PBL_ADDR) - -#define S_TPT_QPID 0 -#define M_TPT_QPID 0xFFFFF -#define V_TPT_QPID(x) ((x) << S_TPT_QPID) -#define G_TPT_QPID(x) (((x) >> S_TPT_QPID) & M_TPT_QPID) - -#define S_TPT_PSTAG 0 -#define M_TPT_PSTAG 0xFFFFFF -#define V_TPT_PSTAG(x) ((x) << S_TPT_PSTAG) -#define G_TPT_PSTAG(x) (((x) >> S_TPT_PSTAG) & M_TPT_PSTAG) - -#define S_TPT_PBL_SIZE 0 -#define M_TPT_PBL_SIZE 0xFFFFF -#define V_TPT_PBL_SIZE(x) ((x) << S_TPT_PBL_SIZE) -#define G_TPT_PBL_SIZE(x) (((x) >> S_TPT_PBL_SIZE) & M_TPT_PBL_SIZE) - -/* - * CQE defs - */ -struct t3_cqe { - __be32 header; - __be32 len; - union { - struct { - __be32 stag; - __be32 msn; - } rcqe; - struct { - u32 wrid_hi; - u32 wrid_low; - } scqe; - } u; -}; - -#define S_CQE_OOO 31 -#define M_CQE_OOO 0x1 -#define G_CQE_OOO(x) ((((x) >> S_CQE_OOO)) & M_CQE_OOO) -#define V_CEQ_OOO(x) ((x)<<S_CQE_OOO) - -#define S_CQE_QPID 12 -#define M_CQE_QPID 0x7FFFF -#define G_CQE_QPID(x) ((((x) >> S_CQE_QPID)) & M_CQE_QPID) -#define V_CQE_QPID(x) ((x)<<S_CQE_QPID) - -#define S_CQE_SWCQE 11 -#define M_CQE_SWCQE 0x1 -#define G_CQE_SWCQE(x) ((((x) >> S_CQE_SWCQE)) & M_CQE_SWCQE) -#define V_CQE_SWCQE(x) ((x)<<S_CQE_SWCQE) - -#define S_CQE_GENBIT 10 -#define M_CQE_GENBIT 0x1 -#define G_CQE_GENBIT(x) (((x) >> S_CQE_GENBIT) & M_CQE_GENBIT) -#define V_CQE_GENBIT(x) ((x)<<S_CQE_GENBIT) - -#define S_CQE_STATUS 5 -#define M_CQE_STATUS 0x1F -#define G_CQE_STATUS(x) ((((x) >> S_CQE_STATUS)) & M_CQE_STATUS) -#define V_CQE_STATUS(x) ((x)<<S_CQE_STATUS) - -#define S_CQE_TYPE 4 -#define M_CQE_TYPE 0x1 -#define G_CQE_TYPE(x) ((((x) >> S_CQE_TYPE)) & M_CQE_TYPE) -#define V_CQE_TYPE(x) ((x)<<S_CQE_TYPE) - -#define S_CQE_OPCODE 0 -#define M_CQE_OPCODE 0xF -#define G_CQE_OPCODE(x) ((((x) >> S_CQE_OPCODE)) & M_CQE_OPCODE) -#define V_CQE_OPCODE(x) ((x)<<S_CQE_OPCODE) - -#define SW_CQE(x) (G_CQE_SWCQE(be32_to_cpu((x).header))) -#define CQE_OOO(x) (G_CQE_OOO(be32_to_cpu((x).header))) -#define CQE_QPID(x) (G_CQE_QPID(be32_to_cpu((x).header))) -#define CQE_GENBIT(x) (G_CQE_GENBIT(be32_to_cpu((x).header))) -#define CQE_TYPE(x) (G_CQE_TYPE(be32_to_cpu((x).header))) -#define SQ_TYPE(x) (CQE_TYPE((x))) -#define RQ_TYPE(x) (!CQE_TYPE((x))) -#define CQE_STATUS(x) (G_CQE_STATUS(be32_to_cpu((x).header))) -#define CQE_OPCODE(x) (G_CQE_OPCODE(be32_to_cpu((x).header))) - -#define CQE_SEND_OPCODE(x)( \ - (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND) || \ - (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE) || \ - (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_INV) || \ - (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE_INV)) - -#define CQE_LEN(x) (be32_to_cpu((x).len)) - -/* used for RQ completion processing */ -#define CQE_WRID_STAG(x) (be32_to_cpu((x).u.rcqe.stag)) -#define CQE_WRID_MSN(x) (be32_to_cpu((x).u.rcqe.msn)) - -/* used for SQ completion processing */ -#define CQE_WRID_SQ_WPTR(x) ((x).u.scqe.wrid_hi) -#define CQE_WRID_WPTR(x) ((x).u.scqe.wrid_low) - -/* generic accessor macros */ -#define CQE_WRID_HI(x) ((x).u.scqe.wrid_hi) -#define CQE_WRID_LOW(x) ((x).u.scqe.wrid_low) - -#define TPT_ERR_SUCCESS 0x0 -#define TPT_ERR_STAG 0x1 /* STAG invalid: either the */ - /* STAG is offlimt, being 0, */ - /* or STAG_key mismatch */ -#define TPT_ERR_PDID 0x2 /* PDID mismatch */ -#define TPT_ERR_QPID 0x3 /* QPID mismatch */ -#define TPT_ERR_ACCESS 0x4 /* Invalid access right */ -#define TPT_ERR_WRAP 0x5 /* Wrap error */ -#define TPT_ERR_BOUND 0x6 /* base and bounds voilation */ -#define TPT_ERR_INVALIDATE_SHARED_MR 0x7 /* attempt to invalidate a */ - /* shared memory region */ -#define TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND 0x8 /* attempt to invalidate a */ - /* shared memory region */ -#define TPT_ERR_ECC 0x9 /* ECC error detected */ -#define TPT_ERR_ECC_PSTAG 0xA /* ECC error detected when */ - /* reading PSTAG for a MW */ - /* Invalidate */ -#define TPT_ERR_PBL_ADDR_BOUND 0xB /* pbl addr out of bounds: */ - /* software error */ -#define TPT_ERR_SWFLUSH 0xC /* SW FLUSHED */ -#define TPT_ERR_CRC 0x10 /* CRC error */ -#define TPT_ERR_MARKER 0x11 /* Marker error */ -#define TPT_ERR_PDU_LEN_ERR 0x12 /* invalid PDU length */ -#define TPT_ERR_OUT_OF_RQE 0x13 /* out of RQE */ -#define TPT_ERR_DDP_VERSION 0x14 /* wrong DDP version */ -#define TPT_ERR_RDMA_VERSION 0x15 /* wrong RDMA version */ -#define TPT_ERR_OPCODE 0x16 /* invalid rdma opcode */ -#define TPT_ERR_DDP_QUEUE_NUM 0x17 /* invalid ddp queue number */ -#define TPT_ERR_MSN 0x18 /* MSN error */ -#define TPT_ERR_TBIT 0x19 /* tag bit not set correctly */ -#define TPT_ERR_MO 0x1A /* MO not 0 for TERMINATE */ - /* or READ_REQ */ -#define TPT_ERR_MSN_GAP 0x1B -#define TPT_ERR_MSN_RANGE 0x1C -#define TPT_ERR_IRD_OVERFLOW 0x1D -#define TPT_ERR_RQE_ADDR_BOUND 0x1E /* RQE addr out of bounds: */ - /* software error */ -#define TPT_ERR_INTERNAL_ERR 0x1F /* internal error (opcode */ - /* mismatch) */ - -struct t3_swsq { - __u64 wr_id; - struct t3_cqe cqe; - __u32 sq_wptr; - __be32 read_len; - int opcode; - int complete; - int signaled; -}; - -struct t3_swrq { - __u64 wr_id; - __u32 pbl_addr; -}; - -/* - * A T3 WQ implements both the SQ and RQ. - */ -struct t3_wq { - union t3_wr *queue; /* DMA accessible memory */ - dma_addr_t dma_addr; /* DMA address for HW */ - DEFINE_DMA_UNMAP_ADDR(mapping); /* unmap kruft */ - u32 error; /* 1 once we go to ERROR */ - u32 qpid; - u32 wptr; /* idx to next available WR slot */ - u32 size_log2; /* total wq size */ - struct t3_swsq *sq; /* SW SQ */ - struct t3_swsq *oldest_read; /* tracks oldest pending read */ - u32 sq_wptr; /* sq_wptr - sq_rptr == count of */ - u32 sq_rptr; /* pending wrs */ - u32 sq_size_log2; /* sq size */ - struct t3_swrq *rq; /* SW RQ (holds consumer wr_ids */ - u32 rq_wptr; /* rq_wptr - rq_rptr == count of */ - u32 rq_rptr; /* pending wrs */ - struct t3_swrq *rq_oldest_wr; /* oldest wr on the SW RQ */ - u32 rq_size_log2; /* rq size */ - u32 rq_addr; /* rq adapter address */ - void __iomem *doorbell; /* kernel db */ - u64 udb; /* user db if any */ - struct cxio_rdev *rdev; -}; - -struct t3_cq { - u32 cqid; - u32 rptr; - u32 wptr; - u32 size_log2; - dma_addr_t dma_addr; - DEFINE_DMA_UNMAP_ADDR(mapping); - struct t3_cqe *queue; - struct t3_cqe *sw_queue; - u32 sw_rptr; - u32 sw_wptr; -}; - -#define CQ_VLD_ENTRY(ptr,size_log2,cqe) (Q_GENBIT(ptr,size_log2) == \ - CQE_GENBIT(*cqe)) - -struct t3_cq_status_page { - u32 cq_err; -}; - -static inline int cxio_cq_in_error(struct t3_cq *cq) -{ - return ((struct t3_cq_status_page *) - &cq->queue[1 << cq->size_log2])->cq_err; -} - -static inline void cxio_set_cq_in_error(struct t3_cq *cq) -{ - ((struct t3_cq_status_page *) - &cq->queue[1 << cq->size_log2])->cq_err = 1; -} - -static inline void cxio_set_wq_in_error(struct t3_wq *wq) -{ - wq->queue->wq_in_err.err |= 1; -} - -static inline void cxio_disable_wq_db(struct t3_wq *wq) -{ - wq->queue->wq_in_err.err |= 2; -} - -static inline void cxio_enable_wq_db(struct t3_wq *wq) -{ - wq->queue->wq_in_err.err &= ~2; -} - -static inline int cxio_wq_db_enabled(struct t3_wq *wq) -{ - return !(wq->queue->wq_in_err.err & 2); -} - -static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq) -{ - struct t3_cqe *cqe; - - cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2)); - if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe)) - return cqe; - return NULL; -} - -static inline struct t3_cqe *cxio_next_sw_cqe(struct t3_cq *cq) -{ - struct t3_cqe *cqe; - - if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) { - cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2)); - return cqe; - } - return NULL; -} - -static inline struct t3_cqe *cxio_next_cqe(struct t3_cq *cq) -{ - struct t3_cqe *cqe; - - if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) { - cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2)); - return cqe; - } - cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2)); - if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe)) - return cqe; - return NULL; -} - -#endif diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c deleted file mode 100644 index 56a8ab6210cf..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ /dev/null @@ -1,282 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <linux/module.h> -#include <linux/moduleparam.h> - -#include <rdma/ib_verbs.h> - -#include "cxgb3_offload.h" -#include "iwch_provider.h" -#include <rdma/cxgb3-abi.h> -#include "iwch.h" -#include "iwch_cm.h" - -#define DRV_VERSION "1.1" - -MODULE_AUTHOR("Boyd Faulkner, Steve Wise"); -MODULE_DESCRIPTION("Chelsio T3 RDMA Driver"); -MODULE_LICENSE("Dual BSD/GPL"); - -static void open_rnic_dev(struct t3cdev *); -static void close_rnic_dev(struct t3cdev *); -static void iwch_event_handler(struct t3cdev *, u32, u32); - -struct cxgb3_client t3c_client = { - .name = "iw_cxgb3", - .add = open_rnic_dev, - .remove = close_rnic_dev, - .handlers = t3c_handlers, - .redirect = iwch_ep_redirect, - .event_handler = iwch_event_handler -}; - -static LIST_HEAD(dev_list); -static DEFINE_MUTEX(dev_mutex); - -static void disable_dbs(struct iwch_dev *rnicp) -{ - unsigned long index; - struct iwch_qp *qhp; - - xa_lock_irq(&rnicp->qps); - xa_for_each(&rnicp->qps, index, qhp) - cxio_disable_wq_db(&qhp->wq); - xa_unlock_irq(&rnicp->qps); -} - -static void enable_dbs(struct iwch_dev *rnicp, int ring_db) -{ - unsigned long index; - struct iwch_qp *qhp; - - xa_lock_irq(&rnicp->qps); - xa_for_each(&rnicp->qps, index, qhp) { - if (ring_db) - ring_doorbell(qhp->rhp->rdev.ctrl_qp.doorbell, - qhp->wq.qpid); - cxio_enable_wq_db(&qhp->wq); - } - xa_unlock_irq(&rnicp->qps); -} - -static void iwch_db_drop_task(struct work_struct *work) -{ - struct iwch_dev *rnicp = container_of(work, struct iwch_dev, - db_drop_task.work); - enable_dbs(rnicp, 1); -} - -static void rnic_init(struct iwch_dev *rnicp) -{ - pr_debug("%s iwch_dev %p\n", __func__, rnicp); - xa_init_flags(&rnicp->cqs, XA_FLAGS_LOCK_IRQ); - xa_init_flags(&rnicp->qps, XA_FLAGS_LOCK_IRQ); - xa_init_flags(&rnicp->mrs, XA_FLAGS_LOCK_IRQ); - INIT_DELAYED_WORK(&rnicp->db_drop_task, iwch_db_drop_task); - - rnicp->attr.max_qps = T3_MAX_NUM_QP - 32; - rnicp->attr.max_wrs = T3_MAX_QP_DEPTH; - rnicp->attr.max_sge_per_wr = T3_MAX_SGE; - rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE; - rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1; - rnicp->attr.max_cqes_per_cq = T3_MAX_CQ_DEPTH; - rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev); - rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE; - rnicp->attr.max_pds = T3_MAX_NUM_PD - 1; - rnicp->attr.mem_pgsizes_bitmask = T3_PAGESIZE_MASK; - rnicp->attr.max_mr_size = T3_MAX_MR_SIZE; - rnicp->attr.can_resize_wq = 0; - rnicp->attr.max_rdma_reads_per_qp = 8; - rnicp->attr.max_rdma_read_resources = - rnicp->attr.max_rdma_reads_per_qp * rnicp->attr.max_qps; - rnicp->attr.max_rdma_read_qp_depth = 8; /* IRD */ - rnicp->attr.max_rdma_read_depth = - rnicp->attr.max_rdma_read_qp_depth * rnicp->attr.max_qps; - rnicp->attr.rq_overflow_handled = 0; - rnicp->attr.can_modify_ird = 0; - rnicp->attr.can_modify_ord = 0; - rnicp->attr.max_mem_windows = rnicp->attr.max_mem_regs - 1; - rnicp->attr.stag0_value = 1; - rnicp->attr.zbva_support = 1; - rnicp->attr.local_invalidate_fence = 1; - rnicp->attr.cq_overflow_detection = 1; - return; -} - -static void open_rnic_dev(struct t3cdev *tdev) -{ - struct iwch_dev *rnicp; - - pr_debug("%s t3cdev %p\n", __func__, tdev); - pr_info_once("Chelsio T3 RDMA Driver - version %s\n", DRV_VERSION); - rnicp = ib_alloc_device(iwch_dev, ibdev); - if (!rnicp) { - pr_err("Cannot allocate ib device\n"); - return; - } - rnicp->rdev.ulp = rnicp; - rnicp->rdev.t3cdev_p = tdev; - - mutex_lock(&dev_mutex); - - if (cxio_rdev_open(&rnicp->rdev)) { - mutex_unlock(&dev_mutex); - pr_err("Unable to open CXIO rdev\n"); - ib_dealloc_device(&rnicp->ibdev); - return; - } - - rnic_init(rnicp); - - list_add_tail(&rnicp->entry, &dev_list); - mutex_unlock(&dev_mutex); - - if (iwch_register_device(rnicp)) { - pr_err("Unable to register device\n"); - close_rnic_dev(tdev); - } - pr_info("Initialized device %s\n", - pci_name(rnicp->rdev.rnic_info.pdev)); - return; -} - -static void close_rnic_dev(struct t3cdev *tdev) -{ - struct iwch_dev *dev, *tmp; - pr_debug("%s t3cdev %p\n", __func__, tdev); - mutex_lock(&dev_mutex); - list_for_each_entry_safe(dev, tmp, &dev_list, entry) { - if (dev->rdev.t3cdev_p == tdev) { - dev->rdev.flags = CXIO_ERROR_FATAL; - synchronize_net(); - cancel_delayed_work_sync(&dev->db_drop_task); - list_del(&dev->entry); - iwch_unregister_device(dev); - cxio_rdev_close(&dev->rdev); - WARN_ON(!xa_empty(&dev->cqs)); - WARN_ON(!xa_empty(&dev->qps)); - WARN_ON(!xa_empty(&dev->mrs)); - ib_dealloc_device(&dev->ibdev); - break; - } - } - mutex_unlock(&dev_mutex); -} - -static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id) -{ - struct cxio_rdev *rdev = tdev->ulp; - struct iwch_dev *rnicp; - struct ib_event event; - u32 portnum = port_id + 1; - int dispatch = 0; - - if (!rdev) - return; - rnicp = rdev_to_iwch_dev(rdev); - switch (evt) { - case OFFLOAD_STATUS_DOWN: { - rdev->flags = CXIO_ERROR_FATAL; - synchronize_net(); - event.event = IB_EVENT_DEVICE_FATAL; - dispatch = 1; - break; - } - case OFFLOAD_PORT_DOWN: { - event.event = IB_EVENT_PORT_ERR; - dispatch = 1; - break; - } - case OFFLOAD_PORT_UP: { - event.event = IB_EVENT_PORT_ACTIVE; - dispatch = 1; - break; - } - case OFFLOAD_DB_FULL: { - disable_dbs(rnicp); - break; - } - case OFFLOAD_DB_EMPTY: { - enable_dbs(rnicp, 1); - break; - } - case OFFLOAD_DB_DROP: { - unsigned long delay = 1000; - unsigned short r; - - disable_dbs(rnicp); - get_random_bytes(&r, 2); - delay += r & 1023; - - /* - * delay is between 1000-2023 usecs. - */ - schedule_delayed_work(&rnicp->db_drop_task, - usecs_to_jiffies(delay)); - break; - } - } - - if (dispatch) { - event.device = &rnicp->ibdev; - event.element.port_num = portnum; - ib_dispatch_event(&event); - } - - return; -} - -static int __init iwch_init_module(void) -{ - int err; - - err = cxio_hal_init(); - if (err) - return err; - err = iwch_cm_init(); - if (err) - return err; - cxio_register_ev_cb(iwch_ev_dispatch); - cxgb3_register_client(&t3c_client); - return 0; -} - -static void __exit iwch_exit_module(void) -{ - cxgb3_unregister_client(&t3c_client); - cxio_unregister_ev_cb(iwch_ev_dispatch); - iwch_cm_term(); - cxio_hal_exit(); -} - -module_init(iwch_init_module); -module_exit(iwch_exit_module); diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h deleted file mode 100644 index 310a937bffcf..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __IWCH_H__ -#define __IWCH_H__ - -#include <linux/mutex.h> -#include <linux/list.h> -#include <linux/spinlock.h> -#include <linux/xarray.h> -#include <linux/workqueue.h> - -#include <rdma/ib_verbs.h> - -#include "cxio_hal.h" -#include "cxgb3_offload.h" - -struct iwch_pd; -struct iwch_cq; -struct iwch_qp; -struct iwch_mr; - -struct iwch_rnic_attributes { - u32 max_qps; - u32 max_wrs; /* Max for any SQ/RQ */ - u32 max_sge_per_wr; - u32 max_sge_per_rdma_write_wr; /* for RDMA Write WR */ - u32 max_cqs; - u32 max_cqes_per_cq; - u32 max_mem_regs; - u32 max_phys_buf_entries; /* for phys buf list */ - u32 max_pds; - - /* - * The memory page sizes supported by this RNIC. - * Bit position i in bitmap indicates page of - * size (4k)^i. Phys block list mode unsupported. - */ - u32 mem_pgsizes_bitmask; - u64 max_mr_size; - u8 can_resize_wq; - - /* - * The maximum number of RDMA Reads that can be outstanding - * per QP with this RNIC as the target. - */ - u32 max_rdma_reads_per_qp; - - /* - * The maximum number of resources used for RDMA Reads - * by this RNIC with this RNIC as the target. - */ - u32 max_rdma_read_resources; - - /* - * The max depth per QP for initiation of RDMA Read - * by this RNIC. - */ - u32 max_rdma_read_qp_depth; - - /* - * The maximum depth for initiation of RDMA Read - * operations by this RNIC on all QPs - */ - u32 max_rdma_read_depth; - u8 rq_overflow_handled; - u32 can_modify_ird; - u32 can_modify_ord; - u32 max_mem_windows; - u32 stag0_value; - u8 zbva_support; - u8 local_invalidate_fence; - u32 cq_overflow_detection; -}; - -struct iwch_dev { - struct ib_device ibdev; - struct cxio_rdev rdev; - u32 device_cap_flags; - struct iwch_rnic_attributes attr; - struct xarray cqs; - struct xarray qps; - struct xarray mrs; - struct list_head entry; - struct delayed_work db_drop_task; -}; - -static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev) -{ - return container_of(ibdev, struct iwch_dev, ibdev); -} - -static inline struct iwch_dev *rdev_to_iwch_dev(struct cxio_rdev *rdev) -{ - return container_of(rdev, struct iwch_dev, rdev); -} - -static inline int t3b_device(const struct iwch_dev *rhp) -{ - return rhp->rdev.t3cdev_p->type == T3B; -} - -static inline int t3a_device(const struct iwch_dev *rhp) -{ - return rhp->rdev.t3cdev_p->type == T3A; -} - -static inline struct iwch_cq *get_chp(struct iwch_dev *rhp, u32 cqid) -{ - return xa_load(&rhp->cqs, cqid); -} - -static inline struct iwch_qp *get_qhp(struct iwch_dev *rhp, u32 qpid) -{ - return xa_load(&rhp->qps, qpid); -} - -static inline struct iwch_mr *get_mhp(struct iwch_dev *rhp, u32 mmid) -{ - return xa_load(&rhp->mrs, mmid); -} - -extern struct cxgb3_client t3c_client; -extern cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS]; -extern void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb); - -#endif diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c deleted file mode 100644 index 0bca72cb4d9a..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ /dev/null @@ -1,2258 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <linux/module.h> -#include <linux/list.h> -#include <linux/slab.h> -#include <linux/workqueue.h> -#include <linux/skbuff.h> -#include <linux/timer.h> -#include <linux/notifier.h> -#include <linux/inetdevice.h> - -#include <net/neighbour.h> -#include <net/netevent.h> -#include <net/route.h> - -#include "tcb.h" -#include "cxgb3_offload.h" -#include "iwch.h" -#include "iwch_provider.h" -#include "iwch_cm.h" - -static char *states[] = { - "idle", - "listen", - "connecting", - "mpa_wait_req", - "mpa_req_sent", - "mpa_req_rcvd", - "mpa_rep_sent", - "fpdu_mode", - "aborting", - "closing", - "moribund", - "dead", - NULL, -}; - -int peer2peer = 0; -module_param(peer2peer, int, 0644); -MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)"); - -static int ep_timeout_secs = 60; -module_param(ep_timeout_secs, int, 0644); -MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout " - "in seconds (default=60)"); - -static int mpa_rev = 1; -module_param(mpa_rev, int, 0644); -MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, " - "1 is spec compliant. (default=1)"); - -static int markers_enabled = 0; -module_param(markers_enabled, int, 0644); -MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)"); - -static int crc_enabled = 1; -module_param(crc_enabled, int, 0644); -MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)"); - -static int rcv_win = 256 * 1024; -module_param(rcv_win, int, 0644); -MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256)"); - -static int snd_win = 32 * 1024; -module_param(snd_win, int, 0644); -MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=32KB)"); - -static unsigned int nocong = 0; -module_param(nocong, uint, 0644); -MODULE_PARM_DESC(nocong, "Turn off congestion control (default=0)"); - -static unsigned int cong_flavor = 1; -module_param(cong_flavor, uint, 0644); -MODULE_PARM_DESC(cong_flavor, "TCP Congestion control flavor (default=1)"); - -static struct workqueue_struct *workq; - -static struct sk_buff_head rxq; - -static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp); -static void ep_timeout(struct timer_list *t); -static void connect_reply_upcall(struct iwch_ep *ep, int status); - -static void start_ep_timer(struct iwch_ep *ep) -{ - pr_debug("%s ep %p\n", __func__, ep); - if (timer_pending(&ep->timer)) { - pr_debug("%s stopped / restarted timer ep %p\n", __func__, ep); - del_timer_sync(&ep->timer); - } else - get_ep(&ep->com); - ep->timer.expires = jiffies + ep_timeout_secs * HZ; - add_timer(&ep->timer); -} - -static void stop_ep_timer(struct iwch_ep *ep) -{ - pr_debug("%s ep %p\n", __func__, ep); - if (!timer_pending(&ep->timer)) { - WARN(1, "%s timer stopped when its not running! ep %p state %u\n", - __func__, ep, ep->com.state); - return; - } - del_timer_sync(&ep->timer); - put_ep(&ep->com); -} - -static int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_entry *l2e) -{ - int error = 0; - struct cxio_rdev *rdev; - - rdev = (struct cxio_rdev *)tdev->ulp; - if (cxio_fatal_error(rdev)) { - kfree_skb(skb); - return -EIO; - } - error = l2t_send(tdev, skb, l2e); - if (error < 0) - kfree_skb(skb); - return error < 0 ? error : 0; -} - -int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb) -{ - int error = 0; - struct cxio_rdev *rdev; - - rdev = (struct cxio_rdev *)tdev->ulp; - if (cxio_fatal_error(rdev)) { - kfree_skb(skb); - return -EIO; - } - error = cxgb3_ofld_send(tdev, skb); - if (error < 0) - kfree_skb(skb); - return error < 0 ? error : 0; -} - -static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb) -{ - struct cpl_tid_release *req; - - skb = get_skb(skb, sizeof(*req), GFP_KERNEL); - if (!skb) - return; - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid)); - skb->priority = CPL_PRIORITY_SETUP; - iwch_cxgb3_ofld_send(tdev, skb); - return; -} - -int iwch_quiesce_tid(struct iwch_ep *ep) -{ - struct cpl_set_tcb_field *req; - struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - - if (!skb) - return -ENOMEM; - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid)); - req->reply = 0; - req->cpu_idx = 0; - req->word = htons(W_TCB_RX_QUIESCE); - req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE); - req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE); - - skb->priority = CPL_PRIORITY_DATA; - return iwch_cxgb3_ofld_send(ep->com.tdev, skb); -} - -int iwch_resume_tid(struct iwch_ep *ep) -{ - struct cpl_set_tcb_field *req; - struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - - if (!skb) - return -ENOMEM; - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid)); - req->reply = 0; - req->cpu_idx = 0; - req->word = htons(W_TCB_RX_QUIESCE); - req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE); - req->val = 0; - - skb->priority = CPL_PRIORITY_DATA; - return iwch_cxgb3_ofld_send(ep->com.tdev, skb); -} - -static void set_emss(struct iwch_ep *ep, u16 opt) -{ - pr_debug("%s ep %p opt %u\n", __func__, ep, opt); - ep->emss = T3C_DATA(ep->com.tdev)->mtus[G_TCPOPT_MSS(opt)] - 40; - if (G_TCPOPT_TSTAMP(opt)) - ep->emss -= 12; - if (ep->emss < 128) - ep->emss = 128; - pr_debug("emss=%d\n", ep->emss); -} - -static enum iwch_ep_state state_read(struct iwch_ep_common *epc) -{ - unsigned long flags; - enum iwch_ep_state state; - - spin_lock_irqsave(&epc->lock, flags); - state = epc->state; - spin_unlock_irqrestore(&epc->lock, flags); - return state; -} - -static void __state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) -{ - epc->state = new; -} - -static void state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) -{ - unsigned long flags; - - spin_lock_irqsave(&epc->lock, flags); - pr_debug("%s - %s -> %s\n", __func__, states[epc->state], states[new]); - __state_set(epc, new); - spin_unlock_irqrestore(&epc->lock, flags); - return; -} - -static void *alloc_ep(int size, gfp_t gfp) -{ - struct iwch_ep_common *epc; - - epc = kzalloc(size, gfp); - if (epc) { - kref_init(&epc->kref); - spin_lock_init(&epc->lock); - init_waitqueue_head(&epc->waitq); - } - pr_debug("%s alloc ep %p\n", __func__, epc); - return epc; -} - -void __free_ep(struct kref *kref) -{ - struct iwch_ep *ep; - ep = container_of(container_of(kref, struct iwch_ep_common, kref), - struct iwch_ep, com); - pr_debug("%s ep %p state %s\n", - __func__, ep, states[state_read(&ep->com)]); - if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) { - cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid); - dst_release(ep->dst); - l2t_release(ep->com.tdev, ep->l2t); - } - kfree(ep); -} - -static void release_ep_resources(struct iwch_ep *ep) -{ - pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid); - set_bit(RELEASE_RESOURCES, &ep->com.flags); - put_ep(&ep->com); -} - -static int status2errno(int status) -{ - switch (status) { - case CPL_ERR_NONE: - return 0; - case CPL_ERR_CONN_RESET: - return -ECONNRESET; - case CPL_ERR_ARP_MISS: - return -EHOSTUNREACH; - case CPL_ERR_CONN_TIMEDOUT: - return -ETIMEDOUT; - case CPL_ERR_TCAM_FULL: - return -ENOMEM; - case CPL_ERR_CONN_EXIST: - return -EADDRINUSE; - default: - return -EIO; - } -} - -/* - * Try and reuse skbs already allocated... - */ -static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp) -{ - if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) { - skb_trim(skb, 0); - skb_get(skb); - } else { - skb = alloc_skb(len, gfp); - } - return skb; -} - -static struct rtable *find_route(struct t3cdev *dev, __be32 local_ip, - __be32 peer_ip, __be16 local_port, - __be16 peer_port, u8 tos) -{ - struct rtable *rt; - struct flowi4 fl4; - - rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip, - peer_port, local_port, IPPROTO_TCP, - tos, 0); - if (IS_ERR(rt)) - return NULL; - return rt; -} - -static unsigned int find_best_mtu(const struct t3c_data *d, unsigned short mtu) -{ - int i = 0; - - while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu) - ++i; - return i; -} - -static void arp_failure_discard(struct t3cdev *dev, struct sk_buff *skb) -{ - pr_debug("%s t3cdev %p\n", __func__, dev); - kfree_skb(skb); -} - -/* - * Handle an ARP failure for an active open. - */ -static void act_open_req_arp_failure(struct t3cdev *dev, struct sk_buff *skb) -{ - pr_err("ARP failure during connect\n"); - kfree_skb(skb); -} - -/* - * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant - * and send it along. - */ -static void abort_arp_failure(struct t3cdev *dev, struct sk_buff *skb) -{ - struct cpl_abort_req *req = cplhdr(skb); - - pr_debug("%s t3cdev %p\n", __func__, dev); - req->cmd = CPL_ABORT_NO_RST; - iwch_cxgb3_ofld_send(dev, skb); -} - -static int send_halfclose(struct iwch_ep *ep, gfp_t gfp) -{ - struct cpl_close_con_req *req; - struct sk_buff *skb; - - pr_debug("%s ep %p\n", __func__, ep); - skb = get_skb(NULL, sizeof(*req), gfp); - if (!skb) { - pr_err("%s - failed to alloc skb\n", __func__); - return -ENOMEM; - } - skb->priority = CPL_PRIORITY_DATA; - set_arp_failure_handler(skb, arp_failure_discard); - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON)); - req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, ep->hwtid)); - return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); -} - -static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp) -{ - struct cpl_abort_req *req; - - pr_debug("%s ep %p\n", __func__, ep); - skb = get_skb(skb, sizeof(*req), gfp); - if (!skb) { - pr_err("%s - failed to alloc skb\n", __func__); - return -ENOMEM; - } - skb->priority = CPL_PRIORITY_DATA; - set_arp_failure_handler(skb, abort_arp_failure); - req = skb_put_zero(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ)); - req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid)); - req->cmd = CPL_ABORT_SEND_RST; - return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); -} - -static int send_connect(struct iwch_ep *ep) -{ - struct cpl_act_open_req *req; - struct sk_buff *skb; - u32 opt0h, opt0l, opt2; - unsigned int mtu_idx; - int wscale; - - pr_debug("%s ep %p\n", __func__, ep); - - skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("%s - failed to alloc skb\n", __func__); - return -ENOMEM; - } - mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst)); - wscale = compute_wscale(rcv_win); - opt0h = V_NAGLE(0) | - V_NO_CONG(nocong) | - V_KEEP_ALIVE(1) | - F_TCAM_BYPASS | - V_WND_SCALE(wscale) | - V_MSS_IDX(mtu_idx) | - V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx); - opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10); - opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) | - V_CONG_CONTROL_FLAVOR(cong_flavor); - skb->priority = CPL_PRIORITY_SETUP; - set_arp_failure_handler(skb, act_open_req_arp_failure); - - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, ep->atid)); - req->local_port = ep->com.local_addr.sin_port; - req->peer_port = ep->com.remote_addr.sin_port; - req->local_ip = ep->com.local_addr.sin_addr.s_addr; - req->peer_ip = ep->com.remote_addr.sin_addr.s_addr; - req->opt0h = htonl(opt0h); - req->opt0l = htonl(opt0l); - req->params = 0; - req->opt2 = htonl(opt2); - return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); -} - -static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb) -{ - int mpalen; - struct tx_data_wr *req; - struct mpa_message *mpa; - int len; - - pr_debug("%s ep %p pd_len %d\n", __func__, ep, ep->plen); - - BUG_ON(skb_cloned(skb)); - - mpalen = sizeof(*mpa) + ep->plen; - if (skb->data + mpalen + sizeof(*req) > skb_end_pointer(skb)) { - kfree_skb(skb); - skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL); - if (!skb) { - connect_reply_upcall(ep, -ENOMEM); - return; - } - } - skb_trim(skb, 0); - skb_reserve(skb, sizeof(*req)); - skb_put(skb, mpalen); - skb->priority = CPL_PRIORITY_DATA; - mpa = (struct mpa_message *) skb->data; - memset(mpa, 0, sizeof(*mpa)); - memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); - mpa->flags = (crc_enabled ? MPA_CRC : 0) | - (markers_enabled ? MPA_MARKERS : 0); - mpa->private_data_size = htons(ep->plen); - mpa->revision = mpa_rev; - - if (ep->plen) - memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen); - - /* - * Reference the mpa skb. This ensures the data area - * will remain in memory until the hw acks the tx. - * Function tx_ack() will deref it. - */ - skb_get(skb); - set_arp_failure_handler(skb, arp_failure_discard); - skb_reset_transport_header(skb); - len = skb->len; - req = skb_push(skb, sizeof(*req)); - req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL); - req->wr_lo = htonl(V_WR_TID(ep->hwtid)); - req->len = htonl(len); - req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | - V_TX_SNDBUF(snd_win>>15)); - req->flags = htonl(F_TX_INIT); - req->sndseq = htonl(ep->snd_seq); - BUG_ON(ep->mpa_skb); - ep->mpa_skb = skb; - iwch_l2t_send(ep->com.tdev, skb, ep->l2t); - start_ep_timer(ep); - state_set(&ep->com, MPA_REQ_SENT); - return; -} - -static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen) -{ - int mpalen; - struct tx_data_wr *req; - struct mpa_message *mpa; - struct sk_buff *skb; - - pr_debug("%s ep %p plen %d\n", __func__, ep, plen); - - mpalen = sizeof(*mpa) + plen; - - skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("%s - cannot alloc skb!\n", __func__); - return -ENOMEM; - } - skb_reserve(skb, sizeof(*req)); - mpa = skb_put(skb, mpalen); - memset(mpa, 0, sizeof(*mpa)); - memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); - mpa->flags = MPA_REJECT; - mpa->revision = mpa_rev; - mpa->private_data_size = htons(plen); - if (plen) - memcpy(mpa->private_data, pdata, plen); - - /* - * Reference the mpa skb again. This ensures the data area - * will remain in memory until the hw acks the tx. - * Function tx_ack() will deref it. - */ - skb_get(skb); - skb->priority = CPL_PRIORITY_DATA; - set_arp_failure_handler(skb, arp_failure_discard); - skb_reset_transport_header(skb); - req = skb_push(skb, sizeof(*req)); - req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL); - req->wr_lo = htonl(V_WR_TID(ep->hwtid)); - req->len = htonl(mpalen); - req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | - V_TX_SNDBUF(snd_win>>15)); - req->flags = htonl(F_TX_INIT); - req->sndseq = htonl(ep->snd_seq); - BUG_ON(ep->mpa_skb); - ep->mpa_skb = skb; - return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); -} - -static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) -{ - int mpalen; - struct tx_data_wr *req; - struct mpa_message *mpa; - int len; - struct sk_buff *skb; - - pr_debug("%s ep %p plen %d\n", __func__, ep, plen); - - mpalen = sizeof(*mpa) + plen; - - skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("%s - cannot alloc skb!\n", __func__); - return -ENOMEM; - } - skb->priority = CPL_PRIORITY_DATA; - skb_reserve(skb, sizeof(*req)); - mpa = skb_put(skb, mpalen); - memset(mpa, 0, sizeof(*mpa)); - memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); - mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | - (markers_enabled ? MPA_MARKERS : 0); - mpa->revision = mpa_rev; - mpa->private_data_size = htons(plen); - if (plen) - memcpy(mpa->private_data, pdata, plen); - - /* - * Reference the mpa skb. This ensures the data area - * will remain in memory until the hw acks the tx. - * Function tx_ack() will deref it. - */ - skb_get(skb); - set_arp_failure_handler(skb, arp_failure_discard); - skb_reset_transport_header(skb); - len = skb->len; - req = skb_push(skb, sizeof(*req)); - req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL); - req->wr_lo = htonl(V_WR_TID(ep->hwtid)); - req->len = htonl(len); - req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | - V_TX_SNDBUF(snd_win>>15)); - req->flags = htonl(F_TX_INIT); - req->sndseq = htonl(ep->snd_seq); - ep->mpa_skb = skb; - state_set(&ep->com, MPA_REP_SENT); - return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); -} - -static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct cpl_act_establish *req = cplhdr(skb); - unsigned int tid = GET_TID(req); - - pr_debug("%s ep %p tid %d\n", __func__, ep, tid); - - dst_confirm(ep->dst); - - /* setup the hwtid for this connection */ - ep->hwtid = tid; - cxgb3_insert_tid(ep->com.tdev, &t3c_client, ep, tid); - - ep->snd_seq = ntohl(req->snd_isn); - ep->rcv_seq = ntohl(req->rcv_isn); - - set_emss(ep, ntohs(req->tcp_opt)); - - /* dealloc the atid */ - cxgb3_free_atid(ep->com.tdev, ep->atid); - - /* start MPA negotiation */ - send_mpa_req(ep, skb); - - return 0; -} - -static void abort_connection(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp) -{ - pr_debug("%s ep %p\n", __FILE__, ep); - state_set(&ep->com, ABORTING); - send_abort(ep, skb, gfp); -} - -static void close_complete_upcall(struct iwch_ep *ep) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p\n", __func__, ep); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_CLOSE; - if (ep->com.cm_id) { - pr_debug("close complete delivered ep %p cm_id %p tid %d\n", - ep, ep->com.cm_id, ep->hwtid); - ep->com.cm_id->event_handler(ep->com.cm_id, &event); - ep->com.cm_id->rem_ref(ep->com.cm_id); - ep->com.cm_id = NULL; - ep->com.qp = NULL; - } -} - -static void peer_close_upcall(struct iwch_ep *ep) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p\n", __func__, ep); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_DISCONNECT; - if (ep->com.cm_id) { - pr_debug("peer close delivered ep %p cm_id %p tid %d\n", - ep, ep->com.cm_id, ep->hwtid); - ep->com.cm_id->event_handler(ep->com.cm_id, &event); - } -} - -static void peer_abort_upcall(struct iwch_ep *ep) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p\n", __func__, ep); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_CLOSE; - event.status = -ECONNRESET; - if (ep->com.cm_id) { - pr_debug("abort delivered ep %p cm_id %p tid %d\n", ep, - ep->com.cm_id, ep->hwtid); - ep->com.cm_id->event_handler(ep->com.cm_id, &event); - ep->com.cm_id->rem_ref(ep->com.cm_id); - ep->com.cm_id = NULL; - ep->com.qp = NULL; - } -} - -static void connect_reply_upcall(struct iwch_ep *ep, int status) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p status %d\n", __func__, ep, status); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_CONNECT_REPLY; - event.status = status; - memcpy(&event.local_addr, &ep->com.local_addr, - sizeof(ep->com.local_addr)); - memcpy(&event.remote_addr, &ep->com.remote_addr, - sizeof(ep->com.remote_addr)); - - if ((status == 0) || (status == -ECONNREFUSED)) { - event.private_data_len = ep->plen; - event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); - } - if (ep->com.cm_id) { - pr_debug("%s ep %p tid %d status %d\n", __func__, ep, - ep->hwtid, status); - ep->com.cm_id->event_handler(ep->com.cm_id, &event); - } - if (status < 0) { - ep->com.cm_id->rem_ref(ep->com.cm_id); - ep->com.cm_id = NULL; - ep->com.qp = NULL; - } -} - -static void connect_request_upcall(struct iwch_ep *ep) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_CONNECT_REQUEST; - memcpy(&event.local_addr, &ep->com.local_addr, - sizeof(ep->com.local_addr)); - memcpy(&event.remote_addr, &ep->com.remote_addr, - sizeof(ep->com.local_addr)); - event.private_data_len = ep->plen; - event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); - event.provider_data = ep; - /* - * Until ird/ord negotiation via MPAv2 support is added, send max - * supported values - */ - event.ird = event.ord = 8; - if (state_read(&ep->parent_ep->com) != DEAD) { - get_ep(&ep->com); - ep->parent_ep->com.cm_id->event_handler( - ep->parent_ep->com.cm_id, - &event); - } - put_ep(&ep->parent_ep->com); - ep->parent_ep = NULL; -} - -static void established_upcall(struct iwch_ep *ep) -{ - struct iw_cm_event event; - - pr_debug("%s ep %p\n", __func__, ep); - memset(&event, 0, sizeof(event)); - event.event = IW_CM_EVENT_ESTABLISHED; - /* - * Until ird/ord negotiation via MPAv2 support is added, send max - * supported values - */ - event.ird = event.ord = 8; - if (ep->com.cm_id) { - pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid); - ep->com.cm_id->event_handler(ep->com.cm_id, &event); - } -} - -static int update_rx_credits(struct iwch_ep *ep, u32 credits) -{ - struct cpl_rx_data_ack *req; - struct sk_buff *skb; - - pr_debug("%s ep %p credits %u\n", __func__, ep, credits); - skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("update_rx_credits - cannot alloc skb!\n"); - return 0; - } - - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid)); - req->credit_dack = htonl(V_RX_CREDITS(credits) | V_RX_FORCE_ACK(1)); - skb->priority = CPL_PRIORITY_ACK; - iwch_cxgb3_ofld_send(ep->com.tdev, skb); - return credits; -} - -static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb) -{ - struct mpa_message *mpa; - u16 plen; - struct iwch_qp_attributes attrs; - enum iwch_qp_attr_mask mask; - int err; - - pr_debug("%s ep %p\n", __func__, ep); - - /* - * Stop mpa timer. If it expired, then the state has - * changed and we bail since ep_timeout already aborted - * the connection. - */ - stop_ep_timer(ep); - if (state_read(&ep->com) != MPA_REQ_SENT) - return; - - /* - * If we get more than the supported amount of private data - * then we must fail this connection. - */ - if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { - err = -EINVAL; - goto err; - } - - /* - * copy the new data into our accumulation buffer. - */ - skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]), - skb->len); - ep->mpa_pkt_len += skb->len; - - /* - * if we don't even have the mpa message, then bail. - */ - if (ep->mpa_pkt_len < sizeof(*mpa)) - return; - mpa = (struct mpa_message *) ep->mpa_pkt; - - /* Validate MPA header. */ - if (mpa->revision != mpa_rev) { - err = -EPROTO; - goto err; - } - if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { - err = -EPROTO; - goto err; - } - - plen = ntohs(mpa->private_data_size); - - /* - * Fail if there's too much private data. - */ - if (plen > MPA_MAX_PRIVATE_DATA) { - err = -EPROTO; - goto err; - } - - /* - * If plen does not account for pkt size - */ - if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { - err = -EPROTO; - goto err; - } - - ep->plen = (u8) plen; - - /* - * If we don't have all the pdata yet, then bail. - * We'll continue process when more data arrives. - */ - if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) - return; - - if (mpa->flags & MPA_REJECT) { - err = -ECONNREFUSED; - goto err; - } - - /* - * If we get here we have accumulated the entire mpa - * start reply message including private data. And - * the MPA header is valid. - */ - state_set(&ep->com, FPDU_MODE); - ep->mpa_attr.initiator = 1; - ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; - ep->mpa_attr.recv_marker_enabled = markers_enabled; - ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; - ep->mpa_attr.version = mpa_rev; - pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d\n", - __func__, - ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, - ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); - - attrs.mpa_attr = ep->mpa_attr; - attrs.max_ird = ep->ird; - attrs.max_ord = ep->ord; - attrs.llp_stream_handle = ep; - attrs.next_state = IWCH_QP_STATE_RTS; - - mask = IWCH_QP_ATTR_NEXT_STATE | - IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR | - IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD; - - /* bind QP and TID with INIT_WR */ - err = iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, mask, &attrs, 1); - if (err) - goto err; - - if (peer2peer && iwch_rqes_posted(ep->com.qp) == 0) { - iwch_post_zb_read(ep); - } - - goto out; -err: - abort_connection(ep, skb, GFP_KERNEL); -out: - connect_reply_upcall(ep, err); - return; -} - -static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb) -{ - struct mpa_message *mpa; - u16 plen; - - pr_debug("%s ep %p\n", __func__, ep); - - /* - * Stop mpa timer. If it expired, then the state has - * changed and we bail since ep_timeout already aborted - * the connection. - */ - stop_ep_timer(ep); - if (state_read(&ep->com) != MPA_REQ_WAIT) - return; - - /* - * If we get more than the supported amount of private data - * then we must fail this connection. - */ - if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { - abort_connection(ep, skb, GFP_KERNEL); - return; - } - - pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); - - /* - * Copy the new data into our accumulation buffer. - */ - skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]), - skb->len); - ep->mpa_pkt_len += skb->len; - - /* - * If we don't even have the mpa message, then bail. - * We'll continue process when more data arrives. - */ - if (ep->mpa_pkt_len < sizeof(*mpa)) - return; - pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); - mpa = (struct mpa_message *) ep->mpa_pkt; - - /* - * Validate MPA Header. - */ - if (mpa->revision != mpa_rev) { - abort_connection(ep, skb, GFP_KERNEL); - return; - } - - if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) { - abort_connection(ep, skb, GFP_KERNEL); - return; - } - - plen = ntohs(mpa->private_data_size); - - /* - * Fail if there's too much private data. - */ - if (plen > MPA_MAX_PRIVATE_DATA) { - abort_connection(ep, skb, GFP_KERNEL); - return; - } - - /* - * If plen does not account for pkt size - */ - if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { - abort_connection(ep, skb, GFP_KERNEL); - return; - } - ep->plen = (u8) plen; - - /* - * If we don't have all the pdata yet, then bail. - */ - if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) - return; - - /* - * If we get here we have accumulated the entire mpa - * start reply message including private data. - */ - ep->mpa_attr.initiator = 0; - ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; - ep->mpa_attr.recv_marker_enabled = markers_enabled; - ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; - ep->mpa_attr.version = mpa_rev; - pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d\n", - __func__, - ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, - ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); - - state_set(&ep->com, MPA_REQ_RCVD); - - /* drive upcall */ - connect_request_upcall(ep); - return; -} - -static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct cpl_rx_data *hdr = cplhdr(skb); - unsigned int dlen = ntohs(hdr->len); - - pr_debug("%s ep %p dlen %u\n", __func__, ep, dlen); - - skb_pull(skb, sizeof(*hdr)); - skb_trim(skb, dlen); - - ep->rcv_seq += dlen; - BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen)); - - switch (state_read(&ep->com)) { - case MPA_REQ_SENT: - process_mpa_reply(ep, skb); - break; - case MPA_REQ_WAIT: - process_mpa_request(ep, skb); - break; - case MPA_REP_SENT: - break; - default: - pr_err("%s Unexpected streaming data. ep %p state %d tid %d\n", - __func__, ep, state_read(&ep->com), ep->hwtid); - - /* - * The ep will timeout and inform the ULP of the failure. - * See ep_timeout(). - */ - break; - } - - /* update RX credits */ - update_rx_credits(ep, dlen); - - return CPL_RET_BUF_DONE; -} - -/* - * Upcall from the adapter indicating data has been transmitted. - * For us its just the single MPA request or reply. We can now free - * the skb holding the mpa message. - */ -static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct cpl_wr_ack *hdr = cplhdr(skb); - unsigned int credits = ntohs(hdr->credits); - unsigned long flags; - int post_zb = 0; - - pr_debug("%s ep %p credits %u\n", __func__, ep, credits); - - if (credits == 0) { - pr_debug("%s 0 credit ack ep %p state %u\n", - __func__, ep, state_read(&ep->com)); - return CPL_RET_BUF_DONE; - } - - spin_lock_irqsave(&ep->com.lock, flags); - BUG_ON(credits != 1); - dst_confirm(ep->dst); - if (!ep->mpa_skb) { - pr_debug("%s rdma_init wr_ack ep %p state %u\n", - __func__, ep, ep->com.state); - if (ep->mpa_attr.initiator) { - pr_debug("%s initiator ep %p state %u\n", - __func__, ep, ep->com.state); - if (peer2peer && ep->com.state == FPDU_MODE) - post_zb = 1; - } else { - pr_debug("%s responder ep %p state %u\n", - __func__, ep, ep->com.state); - if (ep->com.state == MPA_REQ_RCVD) { - ep->com.rpl_done = 1; - wake_up(&ep->com.waitq); - } - } - } else { - pr_debug("%s lsm ack ep %p state %u freeing skb\n", - __func__, ep, ep->com.state); - kfree_skb(ep->mpa_skb); - ep->mpa_skb = NULL; - } - spin_unlock_irqrestore(&ep->com.lock, flags); - if (post_zb) - iwch_post_zb_read(ep); - return CPL_RET_BUF_DONE; -} - -static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - unsigned long flags; - int release = 0; - - pr_debug("%s ep %p\n", __func__, ep); - BUG_ON(!ep); - - /* - * We get 2 abort replies from the HW. The first one must - * be ignored except for scribbling that we need one more. - */ - if (!test_and_set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) { - return CPL_RET_BUF_DONE; - } - - spin_lock_irqsave(&ep->com.lock, flags); - switch (ep->com.state) { - case ABORTING: - close_complete_upcall(ep); - __state_set(&ep->com, DEAD); - release = 1; - break; - default: - pr_err("%s ep %p state %d\n", __func__, ep, ep->com.state); - break; - } - spin_unlock_irqrestore(&ep->com.lock, flags); - - if (release) - release_ep_resources(ep); - return CPL_RET_BUF_DONE; -} - -/* - * Return whether a failed active open has allocated a TID - */ -static inline int act_open_has_tid(int status) -{ - return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && - status != CPL_ERR_ARP_MISS; -} - -static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct cpl_act_open_rpl *rpl = cplhdr(skb); - - pr_debug("%s ep %p status %u errno %d\n", __func__, ep, rpl->status, - status2errno(rpl->status)); - connect_reply_upcall(ep, status2errno(rpl->status)); - state_set(&ep->com, DEAD); - if (ep->com.tdev->type != T3A && act_open_has_tid(rpl->status)) - release_tid(ep->com.tdev, GET_TID(rpl), NULL); - cxgb3_free_atid(ep->com.tdev, ep->atid); - dst_release(ep->dst); - l2t_release(ep->com.tdev, ep->l2t); - put_ep(&ep->com); - return CPL_RET_BUF_DONE; -} - -static int listen_start(struct iwch_listen_ep *ep) -{ - struct sk_buff *skb; - struct cpl_pass_open_req *req; - - pr_debug("%s ep %p\n", __func__, ep); - skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("t3c_listen_start failed to alloc skb!\n"); - return -ENOMEM; - } - - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, ep->stid)); - req->local_port = ep->com.local_addr.sin_port; - req->local_ip = ep->com.local_addr.sin_addr.s_addr; - req->peer_port = 0; - req->peer_ip = 0; - req->peer_netmask = 0; - req->opt0h = htonl(F_DELACK | F_TCAM_BYPASS); - req->opt0l = htonl(V_RCV_BUFSIZ(rcv_win>>10)); - req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK)); - - skb->priority = 1; - return iwch_cxgb3_ofld_send(ep->com.tdev, skb); -} - -static int pass_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_listen_ep *ep = ctx; - struct cpl_pass_open_rpl *rpl = cplhdr(skb); - - pr_debug("%s ep %p status %d error %d\n", __func__, ep, - rpl->status, status2errno(rpl->status)); - ep->com.rpl_err = status2errno(rpl->status); - ep->com.rpl_done = 1; - wake_up(&ep->com.waitq); - - return CPL_RET_BUF_DONE; -} - -static int listen_stop(struct iwch_listen_ep *ep) -{ - struct sk_buff *skb; - struct cpl_close_listserv_req *req; - - pr_debug("%s ep %p\n", __func__, ep); - skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - if (!skb) { - pr_err("%s - failed to alloc skb\n", __func__); - return -ENOMEM; - } - req = skb_put(skb, sizeof(*req)); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - req->cpu_idx = 0; - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid)); - skb->priority = 1; - return iwch_cxgb3_ofld_send(ep->com.tdev, skb); -} - -static int close_listsrv_rpl(struct t3cdev *tdev, struct sk_buff *skb, - void *ctx) -{ - struct iwch_listen_ep *ep = ctx; - struct cpl_close_listserv_rpl *rpl = cplhdr(skb); - - pr_debug("%s ep %p\n", __func__, ep); - ep->com.rpl_err = status2errno(rpl->status); - ep->com.rpl_done = 1; - wake_up(&ep->com.waitq); - return CPL_RET_BUF_DONE; -} - -static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb) -{ - struct cpl_pass_accept_rpl *rpl; - unsigned int mtu_idx; - u32 opt0h, opt0l, opt2; - int wscale; - - pr_debug("%s ep %p\n", __func__, ep); - BUG_ON(skb_cloned(skb)); - skb_trim(skb, sizeof(*rpl)); - skb_get(skb); - mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst)); - wscale = compute_wscale(rcv_win); - opt0h = V_NAGLE(0) | - V_NO_CONG(nocong) | - V_KEEP_ALIVE(1) | - F_TCAM_BYPASS | - V_WND_SCALE(wscale) | - V_MSS_IDX(mtu_idx) | - V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx); - opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10); - opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) | - V_CONG_CONTROL_FLAVOR(cong_flavor); - - rpl = cplhdr(skb); - rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, ep->hwtid)); - rpl->peer_ip = peer_ip; - rpl->opt0h = htonl(opt0h); - rpl->opt0l_status = htonl(opt0l | CPL_PASS_OPEN_ACCEPT); - rpl->opt2 = htonl(opt2); - rpl->rsvd = rpl->opt2; /* workaround for HW bug */ - skb->priority = CPL_PRIORITY_SETUP; - iwch_l2t_send(ep->com.tdev, skb, ep->l2t); - - return; -} - -static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip, - struct sk_buff *skb) -{ - pr_debug("%s t3cdev %p tid %u peer_ip %x\n", __func__, tdev, hwtid, - peer_ip); - BUG_ON(skb_cloned(skb)); - skb_trim(skb, sizeof(struct cpl_tid_release)); - skb_get(skb); - - if (tdev->type != T3A) - release_tid(tdev, hwtid, skb); - else { - struct cpl_pass_accept_rpl *rpl; - - rpl = cplhdr(skb); - skb->priority = CPL_PRIORITY_SETUP; - rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, - hwtid)); - rpl->peer_ip = peer_ip; - rpl->opt0h = htonl(F_TCAM_BYPASS); - rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT); - rpl->opt2 = 0; - rpl->rsvd = rpl->opt2; - iwch_cxgb3_ofld_send(tdev, skb); - } -} - -static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *child_ep, *parent_ep = ctx; - struct cpl_pass_accept_req *req = cplhdr(skb); - unsigned int hwtid = GET_TID(req); - struct dst_entry *dst; - struct l2t_entry *l2t; - struct rtable *rt; - struct iff_mac tim; - - pr_debug("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid); - - if (state_read(&parent_ep->com) != LISTEN) { - pr_err("%s - listening ep not in LISTEN\n", __func__); - goto reject; - } - - /* - * Find the netdev for this connection request. - */ - tim.mac_addr = req->dst_mac; - tim.vlan_tag = ntohs(req->vlan_tag); - if (tdev->ctl(tdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) { - pr_err("%s bad dst mac %pM\n", __func__, req->dst_mac); - goto reject; - } - - /* Find output route */ - rt = find_route(tdev, - req->local_ip, - req->peer_ip, - req->local_port, - req->peer_port, G_PASS_OPEN_TOS(ntohl(req->tos_tid))); - if (!rt) { - pr_err("%s - failed to find dst entry!\n", __func__); - goto reject; - } - dst = &rt->dst; - l2t = t3_l2t_get(tdev, dst, NULL, &req->peer_ip); - if (!l2t) { - pr_err("%s - failed to allocate l2t entry!\n", __func__); - dst_release(dst); - goto reject; - } - child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL); - if (!child_ep) { - pr_err("%s - failed to allocate ep entry!\n", __func__); - l2t_release(tdev, l2t); - dst_release(dst); - goto reject; - } - state_set(&child_ep->com, CONNECTING); - child_ep->com.tdev = tdev; - child_ep->com.cm_id = NULL; - child_ep->com.local_addr.sin_family = AF_INET; - child_ep->com.local_addr.sin_port = req->local_port; - child_ep->com.local_addr.sin_addr.s_addr = req->local_ip; - child_ep->com.remote_addr.sin_family = AF_INET; - child_ep->com.remote_addr.sin_port = req->peer_port; - child_ep->com.remote_addr.sin_addr.s_addr = req->peer_ip; - get_ep(&parent_ep->com); - child_ep->parent_ep = parent_ep; - child_ep->tos = G_PASS_OPEN_TOS(ntohl(req->tos_tid)); - child_ep->l2t = l2t; - child_ep->dst = dst; - child_ep->hwtid = hwtid; - timer_setup(&child_ep->timer, ep_timeout, 0); - cxgb3_insert_tid(tdev, &t3c_client, child_ep, hwtid); - accept_cr(child_ep, req->peer_ip, skb); - goto out; -reject: - reject_cr(tdev, hwtid, req->peer_ip, skb); -out: - return CPL_RET_BUF_DONE; -} - -static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct cpl_pass_establish *req = cplhdr(skb); - - pr_debug("%s ep %p\n", __func__, ep); - ep->snd_seq = ntohl(req->snd_isn); - ep->rcv_seq = ntohl(req->rcv_isn); - - set_emss(ep, ntohs(req->tcp_opt)); - - dst_confirm(ep->dst); - state_set(&ep->com, MPA_REQ_WAIT); - start_ep_timer(ep); - - return CPL_RET_BUF_DONE; -} - -static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct iwch_qp_attributes attrs; - unsigned long flags; - int disconnect = 1; - int release = 0; - - pr_debug("%s ep %p\n", __func__, ep); - dst_confirm(ep->dst); - - spin_lock_irqsave(&ep->com.lock, flags); - switch (ep->com.state) { - case MPA_REQ_WAIT: - __state_set(&ep->com, CLOSING); - break; - case MPA_REQ_SENT: - __state_set(&ep->com, CLOSING); - connect_reply_upcall(ep, -ECONNRESET); - break; - case MPA_REQ_RCVD: - - /* - * We're gonna mark this puppy DEAD, but keep - * the reference on it until the ULP accepts or - * rejects the CR. Also wake up anyone waiting - * in rdma connection migration (see iwch_accept_cr()). - */ - __state_set(&ep->com, CLOSING); - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; - pr_debug("waking up ep %p\n", ep); - wake_up(&ep->com.waitq); - break; - case MPA_REP_SENT: - __state_set(&ep->com, CLOSING); - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; - pr_debug("waking up ep %p\n", ep); - wake_up(&ep->com.waitq); - break; - case FPDU_MODE: - start_ep_timer(ep); - __state_set(&ep->com, CLOSING); - attrs.next_state = IWCH_QP_STATE_CLOSING; - iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, - IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); - peer_close_upcall(ep); - break; - case ABORTING: - disconnect = 0; - break; - case CLOSING: - __state_set(&ep->com, MORIBUND); - disconnect = 0; - break; - case MORIBUND: - stop_ep_timer(ep); - if (ep->com.cm_id && ep->com.qp) { - attrs.next_state = IWCH_QP_STATE_IDLE; - iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, - IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); - } - close_complete_upcall(ep); - __state_set(&ep->com, DEAD); - release = 1; - disconnect = 0; - break; - case DEAD: - disconnect = 0; - break; - default: - BUG_ON(1); - } - spin_unlock_irqrestore(&ep->com.lock, flags); - if (disconnect) - iwch_ep_disconnect(ep, 0, GFP_KERNEL); - if (release) - release_ep_resources(ep); - return CPL_RET_BUF_DONE; -} - -/* - * Returns whether an ABORT_REQ_RSS message is a negative advice. - */ -static int is_neg_adv_abort(unsigned int status) -{ - return status == CPL_ERR_RTX_NEG_ADVICE || - status == CPL_ERR_PERSIST_NEG_ADVICE; -} - -static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct cpl_abort_req_rss *req = cplhdr(skb); - struct iwch_ep *ep = ctx; - struct cpl_abort_rpl *rpl; - struct sk_buff *rpl_skb; - struct iwch_qp_attributes attrs; - int ret; - int release = 0; - unsigned long flags; - - if (is_neg_adv_abort(req->status)) { - pr_debug("%s neg_adv_abort ep %p tid %d\n", __func__, ep, - ep->hwtid); - t3_l2t_send_event(ep->com.tdev, ep->l2t); - return CPL_RET_BUF_DONE; - } - - /* - * We get 2 peer aborts from the HW. The first one must - * be ignored except for scribbling that we need one more. - */ - if (!test_and_set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) { - return CPL_RET_BUF_DONE; - } - - spin_lock_irqsave(&ep->com.lock, flags); - pr_debug("%s ep %p state %u\n", __func__, ep, ep->com.state); - switch (ep->com.state) { - case CONNECTING: - break; - case MPA_REQ_WAIT: - stop_ep_timer(ep); - break; - case MPA_REQ_SENT: - stop_ep_timer(ep); - connect_reply_upcall(ep, -ECONNRESET); - break; - case MPA_REP_SENT: - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; - pr_debug("waking up ep %p\n", ep); - wake_up(&ep->com.waitq); - break; - case MPA_REQ_RCVD: - - /* - * We're gonna mark this puppy DEAD, but keep - * the reference on it until the ULP accepts or - * rejects the CR. Also wake up anyone waiting - * in rdma connection migration (see iwch_accept_cr()). - */ - ep->com.rpl_done = 1; - ep->com.rpl_err = -ECONNRESET; - pr_debug("waking up ep %p\n", ep); - wake_up(&ep->com.waitq); - break; - case MORIBUND: - case CLOSING: - stop_ep_timer(ep); - /*FALLTHROUGH*/ - case FPDU_MODE: - if (ep->com.cm_id && ep->com.qp) { - attrs.next_state = IWCH_QP_STATE_ERROR; - ret = iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - if (ret) - pr_err("%s - qp <- error failed!\n", __func__); - } - peer_abort_upcall(ep); - break; - case ABORTING: - break; - case DEAD: - pr_debug("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__); - spin_unlock_irqrestore(&ep->com.lock, flags); - return CPL_RET_BUF_DONE; - default: - BUG_ON(1); - break; - } - dst_confirm(ep->dst); - if (ep->com.state != ABORTING) { - __state_set(&ep->com, DEAD); - release = 1; - } - spin_unlock_irqrestore(&ep->com.lock, flags); - - rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL); - if (!rpl_skb) { - pr_err("%s - cannot allocate skb!\n", __func__); - release = 1; - goto out; - } - rpl_skb->priority = CPL_PRIORITY_DATA; - rpl = skb_put(rpl_skb, sizeof(*rpl)); - rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL)); - rpl->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); - OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid)); - rpl->cmd = CPL_ABORT_NO_RST; - iwch_cxgb3_ofld_send(ep->com.tdev, rpl_skb); -out: - if (release) - release_ep_resources(ep); - return CPL_RET_BUF_DONE; -} - -static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - struct iwch_qp_attributes attrs; - unsigned long flags; - int release = 0; - - pr_debug("%s ep %p\n", __func__, ep); - BUG_ON(!ep); - - /* The cm_id may be null if we failed to connect */ - spin_lock_irqsave(&ep->com.lock, flags); - switch (ep->com.state) { - case CLOSING: - __state_set(&ep->com, MORIBUND); - break; - case MORIBUND: - stop_ep_timer(ep); - if ((ep->com.cm_id) && (ep->com.qp)) { - attrs.next_state = IWCH_QP_STATE_IDLE; - iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, - IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - } - close_complete_upcall(ep); - __state_set(&ep->com, DEAD); - release = 1; - break; - case ABORTING: - case DEAD: - break; - default: - BUG_ON(1); - break; - } - spin_unlock_irqrestore(&ep->com.lock, flags); - if (release) - release_ep_resources(ep); - return CPL_RET_BUF_DONE; -} - -/* - * T3A does 3 things when a TERM is received: - * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet - * 2) generate an async event on the QP with the TERMINATE opcode - * 3) post a TERMINATE opcode cqe into the associated CQ. - * - * For (1), we save the message in the qp for later consumer consumption. - * For (2), we move the QP into TERMINATE, post a QP event and disconnect. - * For (3), we toss the CQE in cxio_poll_cq(). - * - * terminate() handles case (1)... - */ -static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep *ep = ctx; - - if (state_read(&ep->com) != FPDU_MODE) - return CPL_RET_BUF_DONE; - - pr_debug("%s ep %p\n", __func__, ep); - skb_pull(skb, sizeof(struct cpl_rdma_terminate)); - pr_debug("%s saving %d bytes of term msg\n", __func__, skb->len); - skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer, - skb->len); - ep->com.qp->attr.terminate_msg_len = skb->len; - ep->com.qp->attr.is_terminate_local = 0; - return CPL_RET_BUF_DONE; -} - -static int ec_status(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct cpl_rdma_ec_status *rep = cplhdr(skb); - struct iwch_ep *ep = ctx; - - pr_debug("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid, - rep->status); - if (rep->status) { - struct iwch_qp_attributes attrs; - - pr_err("%s BAD CLOSE - Aborting tid %u\n", - __func__, ep->hwtid); - stop_ep_timer(ep); - attrs.next_state = IWCH_QP_STATE_ERROR; - iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - abort_connection(ep, NULL, GFP_KERNEL); - } - return CPL_RET_BUF_DONE; -} - -static void ep_timeout(struct timer_list *t) -{ - struct iwch_ep *ep = from_timer(ep, t, timer); - struct iwch_qp_attributes attrs; - unsigned long flags; - int abort = 1; - - spin_lock_irqsave(&ep->com.lock, flags); - pr_debug("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid, - ep->com.state); - switch (ep->com.state) { - case MPA_REQ_SENT: - __state_set(&ep->com, ABORTING); - connect_reply_upcall(ep, -ETIMEDOUT); - break; - case MPA_REQ_WAIT: - __state_set(&ep->com, ABORTING); - break; - case CLOSING: - case MORIBUND: - if (ep->com.cm_id && ep->com.qp) { - attrs.next_state = IWCH_QP_STATE_ERROR; - iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - } - __state_set(&ep->com, ABORTING); - break; - default: - WARN(1, "%s unexpected state ep %p state %u\n", - __func__, ep, ep->com.state); - abort = 0; - } - spin_unlock_irqrestore(&ep->com.lock, flags); - if (abort) - abort_connection(ep, NULL, GFP_ATOMIC); - put_ep(&ep->com); -} - -int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) -{ - struct iwch_ep *ep = to_ep(cm_id); - - pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - - if (state_read(&ep->com) == DEAD) { - put_ep(&ep->com); - return -ECONNRESET; - } - BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); - if (mpa_rev == 0) - abort_connection(ep, NULL, GFP_KERNEL); - else { - send_mpa_reject(ep, pdata, pdata_len); - iwch_ep_disconnect(ep, 0, GFP_KERNEL); - } - put_ep(&ep->com); - return 0; -} - -int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) -{ - int err; - struct iwch_qp_attributes attrs; - enum iwch_qp_attr_mask mask; - struct iwch_ep *ep = to_ep(cm_id); - struct iwch_dev *h = to_iwch_dev(cm_id->device); - struct iwch_qp *qp = get_qhp(h, conn_param->qpn); - - pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - if (state_read(&ep->com) == DEAD) { - err = -ECONNRESET; - goto err; - } - - BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); - BUG_ON(!qp); - - if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) || - (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) { - abort_connection(ep, NULL, GFP_KERNEL); - err = -EINVAL; - goto err; - } - - cm_id->add_ref(cm_id); - ep->com.cm_id = cm_id; - ep->com.qp = qp; - - ep->ird = conn_param->ird; - ep->ord = conn_param->ord; - - if (peer2peer && ep->ird == 0) - ep->ird = 1; - - pr_debug("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord); - - /* bind QP to EP and move to RTS */ - attrs.mpa_attr = ep->mpa_attr; - attrs.max_ird = ep->ird; - attrs.max_ord = ep->ord; - attrs.llp_stream_handle = ep; - attrs.next_state = IWCH_QP_STATE_RTS; - - /* bind QP and TID with INIT_WR */ - mask = IWCH_QP_ATTR_NEXT_STATE | - IWCH_QP_ATTR_LLP_STREAM_HANDLE | - IWCH_QP_ATTR_MPA_ATTR | - IWCH_QP_ATTR_MAX_IRD | - IWCH_QP_ATTR_MAX_ORD; - - err = iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, mask, &attrs, 1); - if (err) - goto err1; - - /* if needed, wait for wr_ack */ - if (iwch_rqes_posted(qp)) { - wait_event(ep->com.waitq, ep->com.rpl_done); - err = ep->com.rpl_err; - if (err) - goto err1; - } - - err = send_mpa_reply(ep, conn_param->private_data, - conn_param->private_data_len); - if (err) - goto err1; - - - state_set(&ep->com, FPDU_MODE); - established_upcall(ep); - put_ep(&ep->com); - return 0; -err1: - ep->com.cm_id = NULL; - ep->com.qp = NULL; - cm_id->rem_ref(cm_id); -err: - put_ep(&ep->com); - return err; -} - -static int is_loopback_dst(struct iw_cm_id *cm_id) -{ - struct net_device *dev; - struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; - - dev = ip_dev_find(&init_net, raddr->sin_addr.s_addr); - if (!dev) - return 0; - dev_put(dev); - return 1; -} - -int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) -{ - struct iwch_dev *h = to_iwch_dev(cm_id->device); - struct iwch_ep *ep; - struct rtable *rt; - int err = 0; - struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr; - struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; - - if (cm_id->m_remote_addr.ss_family != PF_INET) { - err = -ENOSYS; - goto out; - } - - if (is_loopback_dst(cm_id)) { - err = -ENOSYS; - goto out; - } - - ep = alloc_ep(sizeof(*ep), GFP_KERNEL); - if (!ep) { - pr_err("%s - cannot alloc ep\n", __func__); - err = -ENOMEM; - goto out; - } - timer_setup(&ep->timer, ep_timeout, 0); - ep->plen = conn_param->private_data_len; - if (ep->plen) - memcpy(ep->mpa_pkt + sizeof(struct mpa_message), - conn_param->private_data, ep->plen); - ep->ird = conn_param->ird; - ep->ord = conn_param->ord; - - if (peer2peer && ep->ord == 0) - ep->ord = 1; - - ep->com.tdev = h->rdev.t3cdev_p; - - cm_id->add_ref(cm_id); - ep->com.cm_id = cm_id; - ep->com.qp = get_qhp(h, conn_param->qpn); - BUG_ON(!ep->com.qp); - pr_debug("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn, - ep->com.qp, cm_id); - - /* - * Allocate an active TID to initiate a TCP connection. - */ - ep->atid = cxgb3_alloc_atid(h->rdev.t3cdev_p, &t3c_client, ep); - if (ep->atid == -1) { - pr_err("%s - cannot alloc atid\n", __func__); - err = -ENOMEM; - goto fail2; - } - - /* find a route */ - rt = find_route(h->rdev.t3cdev_p, laddr->sin_addr.s_addr, - raddr->sin_addr.s_addr, laddr->sin_port, - raddr->sin_port, IPTOS_LOWDELAY); - if (!rt) { - pr_err("%s - cannot find route\n", __func__); - err = -EHOSTUNREACH; - goto fail3; - } - ep->dst = &rt->dst; - ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL, - &raddr->sin_addr.s_addr); - if (!ep->l2t) { - pr_err("%s - cannot alloc l2e\n", __func__); - err = -ENOMEM; - goto fail4; - } - - state_set(&ep->com, CONNECTING); - ep->tos = IPTOS_LOWDELAY; - memcpy(&ep->com.local_addr, &cm_id->m_local_addr, - sizeof(ep->com.local_addr)); - memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr, - sizeof(ep->com.remote_addr)); - - /* send connect request to rnic */ - err = send_connect(ep); - if (!err) - goto out; - - l2t_release(h->rdev.t3cdev_p, ep->l2t); -fail4: - dst_release(ep->dst); -fail3: - cxgb3_free_atid(ep->com.tdev, ep->atid); -fail2: - cm_id->rem_ref(cm_id); - put_ep(&ep->com); -out: - return err; -} - -int iwch_create_listen(struct iw_cm_id *cm_id, int backlog) -{ - int err = 0; - struct iwch_dev *h = to_iwch_dev(cm_id->device); - struct iwch_listen_ep *ep; - - - might_sleep(); - - if (cm_id->m_local_addr.ss_family != PF_INET) { - err = -ENOSYS; - goto fail1; - } - - ep = alloc_ep(sizeof(*ep), GFP_KERNEL); - if (!ep) { - pr_err("%s - cannot alloc ep\n", __func__); - err = -ENOMEM; - goto fail1; - } - pr_debug("%s ep %p\n", __func__, ep); - ep->com.tdev = h->rdev.t3cdev_p; - cm_id->add_ref(cm_id); - ep->com.cm_id = cm_id; - ep->backlog = backlog; - memcpy(&ep->com.local_addr, &cm_id->m_local_addr, - sizeof(ep->com.local_addr)); - - /* - * Allocate a server TID. - */ - ep->stid = cxgb3_alloc_stid(h->rdev.t3cdev_p, &t3c_client, ep); - if (ep->stid == -1) { - pr_err("%s - cannot alloc atid\n", __func__); - err = -ENOMEM; - goto fail2; - } - - state_set(&ep->com, LISTEN); - err = listen_start(ep); - if (err) - goto fail3; - - /* wait for pass_open_rpl */ - wait_event(ep->com.waitq, ep->com.rpl_done); - err = ep->com.rpl_err; - if (!err) { - cm_id->provider_data = ep; - goto out; - } -fail3: - cxgb3_free_stid(ep->com.tdev, ep->stid); -fail2: - cm_id->rem_ref(cm_id); - put_ep(&ep->com); -fail1: -out: - return err; -} - -int iwch_destroy_listen(struct iw_cm_id *cm_id) -{ - int err; - struct iwch_listen_ep *ep = to_listen_ep(cm_id); - - pr_debug("%s ep %p\n", __func__, ep); - - might_sleep(); - state_set(&ep->com, DEAD); - ep->com.rpl_done = 0; - ep->com.rpl_err = 0; - err = listen_stop(ep); - if (err) - goto done; - wait_event(ep->com.waitq, ep->com.rpl_done); - cxgb3_free_stid(ep->com.tdev, ep->stid); -done: - err = ep->com.rpl_err; - cm_id->rem_ref(cm_id); - put_ep(&ep->com); - return err; -} - -int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp) -{ - int ret=0; - unsigned long flags; - int close = 0; - int fatal = 0; - struct t3cdev *tdev; - struct cxio_rdev *rdev; - - spin_lock_irqsave(&ep->com.lock, flags); - - pr_debug("%s ep %p state %s, abrupt %d\n", __func__, ep, - states[ep->com.state], abrupt); - - tdev = (struct t3cdev *)ep->com.tdev; - rdev = (struct cxio_rdev *)tdev->ulp; - if (cxio_fatal_error(rdev)) { - fatal = 1; - close_complete_upcall(ep); - ep->com.state = DEAD; - } - switch (ep->com.state) { - case MPA_REQ_WAIT: - case MPA_REQ_SENT: - case MPA_REQ_RCVD: - case MPA_REP_SENT: - case FPDU_MODE: - close = 1; - if (abrupt) - ep->com.state = ABORTING; - else { - ep->com.state = CLOSING; - start_ep_timer(ep); - } - set_bit(CLOSE_SENT, &ep->com.flags); - break; - case CLOSING: - if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) { - close = 1; - if (abrupt) { - stop_ep_timer(ep); - ep->com.state = ABORTING; - } else - ep->com.state = MORIBUND; - } - break; - case MORIBUND: - case ABORTING: - case DEAD: - pr_debug("%s ignoring disconnect ep %p state %u\n", - __func__, ep, ep->com.state); - break; - default: - BUG(); - break; - } - - spin_unlock_irqrestore(&ep->com.lock, flags); - if (close) { - if (abrupt) - ret = send_abort(ep, NULL, gfp); - else - ret = send_halfclose(ep, gfp); - if (ret) - fatal = 1; - } - if (fatal) - release_ep_resources(ep); - return ret; -} - -int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, - struct l2t_entry *l2t) -{ - struct iwch_ep *ep = ctx; - - if (ep->dst != old) - return 0; - - pr_debug("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new, - l2t); - dst_hold(new); - l2t_release(ep->com.tdev, ep->l2t); - ep->l2t = l2t; - dst_release(old); - ep->dst = new; - return 1; -} - -/* - * All the CM events are handled on a work queue to have a safe context. - * These are the real handlers that are called from the work queue. - */ -static const cxgb3_cpl_handler_func work_handlers[NUM_CPL_CMDS] = { - [CPL_ACT_ESTABLISH] = act_establish, - [CPL_ACT_OPEN_RPL] = act_open_rpl, - [CPL_RX_DATA] = rx_data, - [CPL_TX_DMA_ACK] = tx_ack, - [CPL_ABORT_RPL_RSS] = abort_rpl, - [CPL_ABORT_RPL] = abort_rpl, - [CPL_PASS_OPEN_RPL] = pass_open_rpl, - [CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl, - [CPL_PASS_ACCEPT_REQ] = pass_accept_req, - [CPL_PASS_ESTABLISH] = pass_establish, - [CPL_PEER_CLOSE] = peer_close, - [CPL_ABORT_REQ_RSS] = peer_abort, - [CPL_CLOSE_CON_RPL] = close_con_rpl, - [CPL_RDMA_TERMINATE] = terminate, - [CPL_RDMA_EC_STATUS] = ec_status, -}; - -static void process_work(struct work_struct *work) -{ - struct sk_buff *skb = NULL; - void *ep; - struct t3cdev *tdev; - int ret; - - while ((skb = skb_dequeue(&rxq))) { - ep = *((void **) (skb->cb)); - tdev = *((struct t3cdev **) (skb->cb + sizeof(void *))); - ret = work_handlers[G_OPCODE(ntohl((__force __be32)skb->csum))](tdev, skb, ep); - if (ret & CPL_RET_BUF_DONE) - kfree_skb(skb); - - /* - * ep was referenced in sched(), and is freed here. - */ - put_ep((struct iwch_ep_common *)ep); - } -} - -static DECLARE_WORK(skb_work, process_work); - -static int sched(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct iwch_ep_common *epc = ctx; - - get_ep(epc); - - /* - * Save ctx and tdev in the skb->cb area. - */ - *((void **) skb->cb) = ctx; - *((struct t3cdev **) (skb->cb + sizeof(void *))) = tdev; - - /* - * Queue the skb and schedule the worker thread. - */ - skb_queue_tail(&rxq, skb); - queue_work(workq, &skb_work); - return 0; -} - -static int set_tcb_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) -{ - struct cpl_set_tcb_rpl *rpl = cplhdr(skb); - - if (rpl->status != CPL_ERR_NONE) { - pr_err("Unexpected SET_TCB_RPL status %u for tid %u\n", - rpl->status, GET_TID(rpl)); - } - return CPL_RET_BUF_DONE; -} - -/* - * All upcalls from the T3 Core go to sched() to schedule the - * processing on a work queue. - */ -cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS] = { - [CPL_ACT_ESTABLISH] = sched, - [CPL_ACT_OPEN_RPL] = sched, - [CPL_RX_DATA] = sched, - [CPL_TX_DMA_ACK] = sched, - [CPL_ABORT_RPL_RSS] = sched, - [CPL_ABORT_RPL] = sched, - [CPL_PASS_OPEN_RPL] = sched, - [CPL_CLOSE_LISTSRV_RPL] = sched, - [CPL_PASS_ACCEPT_REQ] = sched, - [CPL_PASS_ESTABLISH] = sched, - [CPL_PEER_CLOSE] = sched, - [CPL_CLOSE_CON_RPL] = sched, - [CPL_ABORT_REQ_RSS] = sched, - [CPL_RDMA_TERMINATE] = sched, - [CPL_RDMA_EC_STATUS] = sched, - [CPL_SET_TCB_RPL] = set_tcb_rpl, -}; - -int __init iwch_cm_init(void) -{ - skb_queue_head_init(&rxq); - - workq = alloc_ordered_workqueue("iw_cxgb3", WQ_MEM_RECLAIM); - if (!workq) - return -ENOMEM; - - return 0; -} - -void __exit iwch_cm_term(void) -{ - flush_workqueue(workq); - destroy_workqueue(workq); -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h deleted file mode 100644 index cc7fe644d260..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.h +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _IWCH_CM_H_ -#define _IWCH_CM_H_ - -#include <linux/inet.h> -#include <linux/wait.h> -#include <linux/spinlock.h> -#include <linux/kref.h> - -#include <rdma/ib_verbs.h> -#include <rdma/iw_cm.h> - -#include "cxgb3_offload.h" -#include "iwch_provider.h" - -#define MPA_KEY_REQ "MPA ID Req Frame" -#define MPA_KEY_REP "MPA ID Rep Frame" - -#define MPA_MAX_PRIVATE_DATA 256 -#define MPA_REV 0 /* XXX - amso1100 uses rev 0 ! */ -#define MPA_REJECT 0x20 -#define MPA_CRC 0x40 -#define MPA_MARKERS 0x80 -#define MPA_FLAGS_MASK 0xE0 - -#define put_ep(ep) { \ - pr_debug("put_ep (via %s:%u) ep %p refcnt %d\n", \ - __func__, __LINE__, ep, kref_read(&((ep)->kref))); \ - WARN_ON(kref_read(&((ep)->kref)) < 1); \ - kref_put(&((ep)->kref), __free_ep); \ -} - -#define get_ep(ep) { \ - pr_debug("get_ep (via %s:%u) ep %p, refcnt %d\n", \ - __func__, __LINE__, ep, kref_read(&((ep)->kref))); \ - kref_get(&((ep)->kref)); \ -} - -struct mpa_message { - u8 key[16]; - u8 flags; - u8 revision; - __be16 private_data_size; - u8 private_data[0]; -}; - -struct terminate_message { - u8 layer_etype; - u8 ecode; - __be16 hdrct_rsvd; - u8 len_hdrs[0]; -}; - -#define TERM_MAX_LENGTH (sizeof(struct terminate_message) + 2 + 18 + 28) - -enum iwch_layers_types { - LAYER_RDMAP = 0x00, - LAYER_DDP = 0x10, - LAYER_MPA = 0x20, - RDMAP_LOCAL_CATA = 0x00, - RDMAP_REMOTE_PROT = 0x01, - RDMAP_REMOTE_OP = 0x02, - DDP_LOCAL_CATA = 0x00, - DDP_TAGGED_ERR = 0x01, - DDP_UNTAGGED_ERR = 0x02, - DDP_LLP = 0x03 -}; - -enum iwch_rdma_ecodes { - RDMAP_INV_STAG = 0x00, - RDMAP_BASE_BOUNDS = 0x01, - RDMAP_ACC_VIOL = 0x02, - RDMAP_STAG_NOT_ASSOC = 0x03, - RDMAP_TO_WRAP = 0x04, - RDMAP_INV_VERS = 0x05, - RDMAP_INV_OPCODE = 0x06, - RDMAP_STREAM_CATA = 0x07, - RDMAP_GLOBAL_CATA = 0x08, - RDMAP_CANT_INV_STAG = 0x09, - RDMAP_UNSPECIFIED = 0xff -}; - -enum iwch_ddp_ecodes { - DDPT_INV_STAG = 0x00, - DDPT_BASE_BOUNDS = 0x01, - DDPT_STAG_NOT_ASSOC = 0x02, - DDPT_TO_WRAP = 0x03, - DDPT_INV_VERS = 0x04, - DDPU_INV_QN = 0x01, - DDPU_INV_MSN_NOBUF = 0x02, - DDPU_INV_MSN_RANGE = 0x03, - DDPU_INV_MO = 0x04, - DDPU_MSG_TOOBIG = 0x05, - DDPU_INV_VERS = 0x06 -}; - -enum iwch_mpa_ecodes { - MPA_CRC_ERR = 0x02, - MPA_MARKER_ERR = 0x03 -}; - -enum iwch_ep_state { - IDLE = 0, - LISTEN, - CONNECTING, - MPA_REQ_WAIT, - MPA_REQ_SENT, - MPA_REQ_RCVD, - MPA_REP_SENT, - FPDU_MODE, - ABORTING, - CLOSING, - MORIBUND, - DEAD, -}; - -enum iwch_ep_flags { - PEER_ABORT_IN_PROGRESS = 0, - ABORT_REQ_IN_PROGRESS = 1, - RELEASE_RESOURCES = 2, - CLOSE_SENT = 3, -}; - -struct iwch_ep_common { - struct iw_cm_id *cm_id; - struct iwch_qp *qp; - struct t3cdev *tdev; - enum iwch_ep_state state; - struct kref kref; - spinlock_t lock; - struct sockaddr_in local_addr; - struct sockaddr_in remote_addr; - wait_queue_head_t waitq; - int rpl_done; - int rpl_err; - unsigned long flags; -}; - -struct iwch_listen_ep { - struct iwch_ep_common com; - unsigned int stid; - int backlog; -}; - -struct iwch_ep { - struct iwch_ep_common com; - struct iwch_ep *parent_ep; - struct timer_list timer; - unsigned int atid; - u32 hwtid; - u32 snd_seq; - u32 rcv_seq; - struct l2t_entry *l2t; - struct dst_entry *dst; - struct sk_buff *mpa_skb; - struct iwch_mpa_attributes mpa_attr; - unsigned int mpa_pkt_len; - u8 mpa_pkt[sizeof(struct mpa_message) + MPA_MAX_PRIVATE_DATA]; - u8 tos; - u16 emss; - u16 plen; - u32 ird; - u32 ord; -}; - -static inline struct iwch_ep *to_ep(struct iw_cm_id *cm_id) -{ - return cm_id->provider_data; -} - -static inline struct iwch_listen_ep *to_listen_ep(struct iw_cm_id *cm_id) -{ - return cm_id->provider_data; -} - -static inline int compute_wscale(int win) -{ - int wscale = 0; - - while (wscale < 14 && (65535<<wscale) < win) - wscale++; - return wscale; -} - -/* CM prototypes */ - -int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); -int iwch_create_listen(struct iw_cm_id *cm_id, int backlog); -int iwch_destroy_listen(struct iw_cm_id *cm_id); -int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); -int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); -int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp); -int iwch_quiesce_tid(struct iwch_ep *ep); -int iwch_resume_tid(struct iwch_ep *ep); -void __free_ep(struct kref *kref); -void iwch_rearp(struct iwch_ep *ep); -int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, struct l2t_entry *l2t); - -int __init iwch_cm_init(void); -void __exit iwch_cm_term(void); -extern int peer2peer; - -#endif /* _IWCH_CM_H_ */ diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c deleted file mode 100644 index a098c0140580..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_cq.c +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "iwch_provider.h" -#include "iwch.h" - -static int __iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, - struct iwch_qp *qhp, struct ib_wc *wc) -{ - struct t3_wq *wq = qhp ? &qhp->wq : NULL; - struct t3_cqe cqe; - u32 credit = 0; - u8 cqe_flushed; - u64 cookie; - int ret = 1; - - ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, - &credit); - if (t3a_device(chp->rhp) && credit) { - pr_debug("%s updating %d cq credits on id %d\n", __func__, - credit, chp->cq.cqid); - cxio_hal_cq_op(&rhp->rdev, &chp->cq, CQ_CREDIT_UPDATE, credit); - } - - if (ret) { - ret = -EAGAIN; - goto out; - } - ret = 1; - - wc->wr_id = cookie; - wc->qp = qhp ? &qhp->ibqp : NULL; - wc->vendor_err = CQE_STATUS(cqe); - wc->wc_flags = 0; - - pr_debug("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x lo 0x%x cookie 0x%llx\n", - __func__, - CQE_QPID(cqe), CQE_TYPE(cqe), - CQE_OPCODE(cqe), CQE_STATUS(cqe), CQE_WRID_HI(cqe), - CQE_WRID_LOW(cqe), (unsigned long long)cookie); - - if (CQE_TYPE(cqe) == 0) { - if (!CQE_STATUS(cqe)) - wc->byte_len = CQE_LEN(cqe); - else - wc->byte_len = 0; - wc->opcode = IB_WC_RECV; - if (CQE_OPCODE(cqe) == T3_SEND_WITH_INV || - CQE_OPCODE(cqe) == T3_SEND_WITH_SE_INV) { - wc->ex.invalidate_rkey = CQE_WRID_STAG(cqe); - wc->wc_flags |= IB_WC_WITH_INVALIDATE; - } - } else { - switch (CQE_OPCODE(cqe)) { - case T3_RDMA_WRITE: - wc->opcode = IB_WC_RDMA_WRITE; - break; - case T3_READ_REQ: - wc->opcode = IB_WC_RDMA_READ; - wc->byte_len = CQE_LEN(cqe); - break; - case T3_SEND: - case T3_SEND_WITH_SE: - case T3_SEND_WITH_INV: - case T3_SEND_WITH_SE_INV: - wc->opcode = IB_WC_SEND; - break; - case T3_LOCAL_INV: - wc->opcode = IB_WC_LOCAL_INV; - break; - case T3_FAST_REGISTER: - wc->opcode = IB_WC_REG_MR; - break; - default: - pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n", - CQE_OPCODE(cqe), CQE_QPID(cqe)); - ret = -EINVAL; - goto out; - } - } - - if (cqe_flushed) - wc->status = IB_WC_WR_FLUSH_ERR; - else { - - switch (CQE_STATUS(cqe)) { - case TPT_ERR_SUCCESS: - wc->status = IB_WC_SUCCESS; - break; - case TPT_ERR_STAG: - wc->status = IB_WC_LOC_ACCESS_ERR; - break; - case TPT_ERR_PDID: - wc->status = IB_WC_LOC_PROT_ERR; - break; - case TPT_ERR_QPID: - case TPT_ERR_ACCESS: - wc->status = IB_WC_LOC_ACCESS_ERR; - break; - case TPT_ERR_WRAP: - wc->status = IB_WC_GENERAL_ERR; - break; - case TPT_ERR_BOUND: - wc->status = IB_WC_LOC_LEN_ERR; - break; - case TPT_ERR_INVALIDATE_SHARED_MR: - case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: - wc->status = IB_WC_MW_BIND_ERR; - break; - case TPT_ERR_CRC: - case TPT_ERR_MARKER: - case TPT_ERR_PDU_LEN_ERR: - case TPT_ERR_OUT_OF_RQE: - case TPT_ERR_DDP_VERSION: - case TPT_ERR_RDMA_VERSION: - case TPT_ERR_DDP_QUEUE_NUM: - case TPT_ERR_MSN: - case TPT_ERR_TBIT: - case TPT_ERR_MO: - case TPT_ERR_MSN_RANGE: - case TPT_ERR_IRD_OVERFLOW: - case TPT_ERR_OPCODE: - wc->status = IB_WC_FATAL_ERR; - break; - case TPT_ERR_SWFLUSH: - wc->status = IB_WC_WR_FLUSH_ERR; - break; - default: - pr_err("Unexpected cqe_status 0x%x for QPID=0x%0x\n", - CQE_STATUS(cqe), CQE_QPID(cqe)); - ret = -EINVAL; - } - } -out: - return ret; -} - -/* - * Get one cq entry from cxio and map it to openib. - * - * Returns: - * 0 EMPTY; - * 1 cqe returned - * -EAGAIN caller must try again - * any other -errno fatal error - */ -static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, - struct ib_wc *wc) -{ - struct iwch_qp *qhp; - struct t3_cqe *rd_cqe; - int ret; - - rd_cqe = cxio_next_cqe(&chp->cq); - - if (!rd_cqe) - return 0; - - qhp = get_qhp(rhp, CQE_QPID(*rd_cqe)); - if (qhp) { - spin_lock(&qhp->lock); - ret = __iwch_poll_cq_one(rhp, chp, qhp, wc); - spin_unlock(&qhp->lock); - } else { - ret = __iwch_poll_cq_one(rhp, chp, NULL, wc); - } - return ret; -} - -int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) -{ - struct iwch_dev *rhp; - struct iwch_cq *chp; - unsigned long flags; - int npolled; - int err = 0; - - chp = to_iwch_cq(ibcq); - rhp = chp->rhp; - - spin_lock_irqsave(&chp->lock, flags); - for (npolled = 0; npolled < num_entries; ++npolled) { - - /* - * Because T3 can post CQEs that are _not_ associated - * with a WR, we might have to poll again after removing - * one of these. - */ - do { - err = iwch_poll_cq_one(rhp, chp, wc + npolled); - } while (err == -EAGAIN); - if (err <= 0) - break; - } - spin_unlock_irqrestore(&chp->lock, flags); - - if (err < 0) - return err; - else { - return npolled; - } -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c deleted file mode 100644 index 9d356c1301c7..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_ev.c +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <linux/gfp.h> -#include <linux/mman.h> -#include <net/sock.h> -#include "iwch_provider.h" -#include "iwch.h" -#include "iwch_cm.h" -#include "cxio_hal.h" -#include "cxio_wr.h" - -static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, - struct respQ_msg_t *rsp_msg, - enum ib_event_type ib_event, - int send_term) -{ - struct ib_event event; - struct iwch_qp_attributes attrs; - struct iwch_qp *qhp; - unsigned long flag; - - xa_lock(&rnicp->qps); - qhp = xa_load(&rnicp->qps, CQE_QPID(rsp_msg->cqe)); - - if (!qhp) { - pr_err("%s unaffiliated error 0x%x qpid 0x%x\n", - __func__, CQE_STATUS(rsp_msg->cqe), - CQE_QPID(rsp_msg->cqe)); - xa_unlock(&rnicp->qps); - return; - } - - if ((qhp->attr.state == IWCH_QP_STATE_ERROR) || - (qhp->attr.state == IWCH_QP_STATE_TERMINATE)) { - pr_debug("%s AE received after RTS - qp state %d qpid 0x%x status 0x%x\n", - __func__, - qhp->attr.state, qhp->wq.qpid, - CQE_STATUS(rsp_msg->cqe)); - xa_unlock(&rnicp->qps); - return; - } - - pr_err("%s - AE qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n", - __func__, - CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe), - CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe), - CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); - - atomic_inc(&qhp->refcnt); - xa_unlock(&rnicp->qps); - - if (qhp->attr.state == IWCH_QP_STATE_RTS) { - attrs.next_state = IWCH_QP_STATE_TERMINATE; - iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - if (send_term) - iwch_post_terminate(qhp, rsp_msg); - } - - event.event = ib_event; - event.device = chp->ibcq.device; - if (ib_event == IB_EVENT_CQ_ERR) - event.element.cq = &chp->ibcq; - else - event.element.qp = &qhp->ibqp; - - if (qhp->ibqp.event_handler) - (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context); - - spin_lock_irqsave(&chp->comp_handler_lock, flag); - (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); - spin_unlock_irqrestore(&chp->comp_handler_lock, flag); - - if (atomic_dec_and_test(&qhp->refcnt)) - wake_up(&qhp->wait); -} - -void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb) -{ - struct iwch_dev *rnicp; - struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data; - struct iwch_cq *chp; - struct iwch_qp *qhp; - u32 cqid = RSPQ_CQID(rsp_msg); - unsigned long flag; - - rnicp = (struct iwch_dev *) rdev_p->ulp; - xa_lock(&rnicp->qps); - chp = get_chp(rnicp, cqid); - qhp = xa_load(&rnicp->qps, CQE_QPID(rsp_msg->cqe)); - if (!chp || !qhp) { - pr_err("BAD AE cqid 0x%x qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n", - cqid, CQE_QPID(rsp_msg->cqe), - CQE_OPCODE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe), - CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe), - CQE_WRID_LOW(rsp_msg->cqe)); - xa_unlock(&rnicp->qps); - goto out; - } - iwch_qp_add_ref(&qhp->ibqp); - atomic_inc(&chp->refcnt); - xa_unlock(&rnicp->qps); - - /* - * 1) completion of our sending a TERMINATE. - * 2) incoming TERMINATE message. - */ - if ((CQE_OPCODE(rsp_msg->cqe) == T3_TERMINATE) && - (CQE_STATUS(rsp_msg->cqe) == 0)) { - if (SQ_TYPE(rsp_msg->cqe)) { - pr_debug("%s QPID 0x%x ep %p disconnecting\n", - __func__, qhp->wq.qpid, qhp->ep); - iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC); - } else { - pr_debug("%s post REQ_ERR AE QPID 0x%x\n", __func__, - qhp->wq.qpid); - post_qp_event(rnicp, chp, rsp_msg, - IB_EVENT_QP_REQ_ERR, 0); - iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC); - } - goto done; - } - - /* Bad incoming Read request */ - if (SQ_TYPE(rsp_msg->cqe) && - (CQE_OPCODE(rsp_msg->cqe) == T3_READ_RESP)) { - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1); - goto done; - } - - /* Bad incoming write */ - if (RQ_TYPE(rsp_msg->cqe) && - (CQE_OPCODE(rsp_msg->cqe) == T3_RDMA_WRITE)) { - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1); - goto done; - } - - switch (CQE_STATUS(rsp_msg->cqe)) { - - /* Completion Events */ - case TPT_ERR_SUCCESS: - - /* - * Confirm the destination entry if this is a RECV completion. - */ - if (qhp->ep && SQ_TYPE(rsp_msg->cqe)) - dst_confirm(qhp->ep->dst); - spin_lock_irqsave(&chp->comp_handler_lock, flag); - (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); - spin_unlock_irqrestore(&chp->comp_handler_lock, flag); - break; - - case TPT_ERR_STAG: - case TPT_ERR_PDID: - case TPT_ERR_QPID: - case TPT_ERR_ACCESS: - case TPT_ERR_WRAP: - case TPT_ERR_BOUND: - case TPT_ERR_INVALIDATE_SHARED_MR: - case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_ACCESS_ERR, 1); - break; - - /* Device Fatal Errors */ - case TPT_ERR_ECC: - case TPT_ERR_ECC_PSTAG: - case TPT_ERR_INTERNAL_ERR: - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_DEVICE_FATAL, 1); - break; - - /* QP Fatal Errors */ - case TPT_ERR_OUT_OF_RQE: - case TPT_ERR_PBL_ADDR_BOUND: - case TPT_ERR_CRC: - case TPT_ERR_MARKER: - case TPT_ERR_PDU_LEN_ERR: - case TPT_ERR_DDP_VERSION: - case TPT_ERR_RDMA_VERSION: - case TPT_ERR_OPCODE: - case TPT_ERR_DDP_QUEUE_NUM: - case TPT_ERR_MSN: - case TPT_ERR_TBIT: - case TPT_ERR_MO: - case TPT_ERR_MSN_GAP: - case TPT_ERR_MSN_RANGE: - case TPT_ERR_RQE_ADDR_BOUND: - case TPT_ERR_IRD_OVERFLOW: - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1); - break; - - default: - pr_err("Unknown T3 status 0x%x QPID 0x%x\n", - CQE_STATUS(rsp_msg->cqe), qhp->wq.qpid); - post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1); - break; - } -done: - if (atomic_dec_and_test(&chp->refcnt)) - wake_up(&chp->wait); - iwch_qp_rem_ref(&qhp->ibqp); -out: - dev_kfree_skb_irq(skb); -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c deleted file mode 100644 index ce0f2741821d..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_mem.c +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <linux/slab.h> -#include <asm/byteorder.h> - -#include <rdma/iw_cm.h> -#include <rdma/ib_verbs.h> - -#include "cxio_hal.h" -#include "cxio_resource.h" -#include "iwch.h" -#include "iwch_provider.h" - -static int iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag) -{ - u32 mmid; - - mhp->attr.state = 1; - mhp->attr.stag = stag; - mmid = stag >> 8; - mhp->ibmr.rkey = mhp->ibmr.lkey = stag; - pr_debug("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp); - return xa_insert_irq(&mhp->rhp->mrs, mmid, mhp, GFP_KERNEL); -} - -int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, - struct iwch_mr *mhp, int shift) -{ - u32 stag; - int ret; - - if (cxio_register_phys_mem(&rhp->rdev, - &stag, mhp->attr.pdid, - mhp->attr.perms, - mhp->attr.zbva, - mhp->attr.va_fbo, - mhp->attr.len, - shift - 12, - mhp->attr.pbl_size, mhp->attr.pbl_addr)) - return -ENOMEM; - - ret = iwch_finish_mem_reg(mhp, stag); - if (ret) - cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); - return ret; -} - -int iwch_alloc_pbl(struct iwch_mr *mhp, int npages) -{ - mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev, - npages << 3); - - if (!mhp->attr.pbl_addr) - return -ENOMEM; - - mhp->attr.pbl_size = npages; - - return 0; -} - -void iwch_free_pbl(struct iwch_mr *mhp) -{ - cxio_hal_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, - mhp->attr.pbl_size << 3); -} - -int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset) -{ - return cxio_write_pbl(&mhp->rhp->rdev, pages, - mhp->attr.pbl_addr + (offset << 3), npages); -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c deleted file mode 100644 index dcf02ec02810..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ /dev/null @@ -1,1321 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/device.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/delay.h> -#include <linux/errno.h> -#include <linux/list.h> -#include <linux/sched/mm.h> -#include <linux/spinlock.h> -#include <linux/ethtool.h> -#include <linux/rtnetlink.h> -#include <linux/inetdevice.h> -#include <linux/slab.h> - -#include <asm/io.h> -#include <asm/irq.h> -#include <asm/byteorder.h> - -#include <rdma/iw_cm.h> -#include <rdma/ib_verbs.h> -#include <rdma/ib_smi.h> -#include <rdma/ib_umem.h> -#include <rdma/ib_user_verbs.h> -#include <rdma/uverbs_ioctl.h> - -#include "cxio_hal.h" -#include "iwch.h" -#include "iwch_provider.h" -#include "iwch_cm.h" -#include <rdma/cxgb3-abi.h> -#include "common.h" - -static void iwch_dealloc_ucontext(struct ib_ucontext *context) -{ - struct iwch_dev *rhp = to_iwch_dev(context->device); - struct iwch_ucontext *ucontext = to_iwch_ucontext(context); - struct iwch_mm_entry *mm, *tmp; - - pr_debug("%s context %p\n", __func__, context); - list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry) - kfree(mm); - cxio_release_ucontext(&rhp->rdev, &ucontext->uctx); -} - -static int iwch_alloc_ucontext(struct ib_ucontext *ucontext, - struct ib_udata *udata) -{ - struct ib_device *ibdev = ucontext->device; - struct iwch_ucontext *context = to_iwch_ucontext(ucontext); - struct iwch_dev *rhp = to_iwch_dev(ibdev); - - pr_debug("%s ibdev %p\n", __func__, ibdev); - cxio_init_ucontext(&rhp->rdev, &context->uctx); - INIT_LIST_HEAD(&context->mmaps); - spin_lock_init(&context->mmap_lock); - return 0; -} - -static void iwch_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) -{ - struct iwch_cq *chp; - - pr_debug("%s ib_cq %p\n", __func__, ib_cq); - chp = to_iwch_cq(ib_cq); - - xa_erase_irq(&chp->rhp->cqs, chp->cq.cqid); - atomic_dec(&chp->refcnt); - wait_event(chp->wait, !atomic_read(&chp->refcnt)); - - cxio_destroy_cq(&chp->rhp->rdev, &chp->cq); -} - -static int iwch_create_cq(struct ib_cq *ibcq, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata) -{ - struct ib_device *ibdev = ibcq->device; - int entries = attr->cqe; - struct iwch_dev *rhp = to_iwch_dev(ibcq->device); - struct iwch_cq *chp = to_iwch_cq(ibcq); - struct iwch_create_cq_resp uresp; - struct iwch_create_cq_req ureq; - static int warned; - size_t resplen; - - pr_debug("%s ib_dev %p entries %d\n", __func__, ibdev, entries); - if (attr->flags) - return -EINVAL; - - if (udata) { - if (!t3a_device(rhp)) { - if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) - return -EFAULT; - - chp->user_rptr_addr = (u32 __user *)(unsigned long)ureq.user_rptr_addr; - } - } - - if (t3a_device(rhp)) { - - /* - * T3A: Add some fluff to handle extra CQEs inserted - * for various errors. - * Additional CQE possibilities: - * TERMINATE, - * incoming RDMA WRITE Failures - * incoming RDMA READ REQUEST FAILUREs - * NOTE: We cannot ensure the CQ won't overflow. - */ - entries += 16; - } - entries = roundup_pow_of_two(entries); - chp->cq.size_log2 = ilog2(entries); - - if (cxio_create_cq(&rhp->rdev, &chp->cq, !udata)) - return -ENOMEM; - - chp->rhp = rhp; - chp->ibcq.cqe = 1 << chp->cq.size_log2; - spin_lock_init(&chp->lock); - spin_lock_init(&chp->comp_handler_lock); - atomic_set(&chp->refcnt, 1); - init_waitqueue_head(&chp->wait); - if (xa_store_irq(&rhp->cqs, chp->cq.cqid, chp, GFP_KERNEL)) { - cxio_destroy_cq(&chp->rhp->rdev, &chp->cq); - return -ENOMEM; - } - - if (udata) { - struct iwch_mm_entry *mm; - struct iwch_ucontext *ucontext = rdma_udata_to_drv_context( - udata, struct iwch_ucontext, ibucontext); - - mm = kmalloc(sizeof(*mm), GFP_KERNEL); - if (!mm) { - iwch_destroy_cq(&chp->ibcq, udata); - return -ENOMEM; - } - uresp.cqid = chp->cq.cqid; - uresp.size_log2 = chp->cq.size_log2; - spin_lock(&ucontext->mmap_lock); - uresp.key = ucontext->key; - ucontext->key += PAGE_SIZE; - spin_unlock(&ucontext->mmap_lock); - mm->key = uresp.key; - mm->addr = virt_to_phys(chp->cq.queue); - if (udata->outlen < sizeof(uresp)) { - if (!warned++) - pr_warn("Warning - downlevel libcxgb3 (non-fatal)\n"); - mm->len = PAGE_ALIGN((1UL << uresp.size_log2) * - sizeof(struct t3_cqe)); - resplen = sizeof(struct iwch_create_cq_resp_v0); - } else { - mm->len = PAGE_ALIGN(((1UL << uresp.size_log2) + 1) * - sizeof(struct t3_cqe)); - uresp.memsize = mm->len; - uresp.reserved = 0; - resplen = sizeof(uresp); - } - if (ib_copy_to_udata(udata, &uresp, resplen)) { - kfree(mm); - iwch_destroy_cq(&chp->ibcq, udata); - return -EFAULT; - } - insert_mmap(ucontext, mm); - } - pr_debug("created cqid 0x%0x chp %p size 0x%0x, dma_addr %pad\n", - chp->cq.cqid, chp, (1 << chp->cq.size_log2), - &chp->cq.dma_addr); - return 0; -} - -static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) -{ - struct iwch_dev *rhp; - struct iwch_cq *chp; - enum t3_cq_opcode cq_op; - int err; - unsigned long flag; - u32 rptr; - - chp = to_iwch_cq(ibcq); - rhp = chp->rhp; - if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED) - cq_op = CQ_ARM_SE; - else - cq_op = CQ_ARM_AN; - if (chp->user_rptr_addr) { - if (get_user(rptr, chp->user_rptr_addr)) - return -EFAULT; - spin_lock_irqsave(&chp->lock, flag); - chp->cq.rptr = rptr; - } else - spin_lock_irqsave(&chp->lock, flag); - pr_debug("%s rptr 0x%x\n", __func__, chp->cq.rptr); - err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0); - spin_unlock_irqrestore(&chp->lock, flag); - if (err < 0) - pr_err("Error %d rearming CQID 0x%x\n", err, chp->cq.cqid); - if (err > 0 && !(flags & IB_CQ_REPORT_MISSED_EVENTS)) - err = 0; - return err; -} - -static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) -{ - int len = vma->vm_end - vma->vm_start; - u32 key = vma->vm_pgoff << PAGE_SHIFT; - struct cxio_rdev *rdev_p; - int ret = 0; - struct iwch_mm_entry *mm; - struct iwch_ucontext *ucontext; - u64 addr; - - pr_debug("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff, - key, len); - - if (vma->vm_start & (PAGE_SIZE-1)) { - return -EINVAL; - } - - rdev_p = &(to_iwch_dev(context->device)->rdev); - ucontext = to_iwch_ucontext(context); - - mm = remove_mmap(ucontext, key, len); - if (!mm) - return -EINVAL; - addr = mm->addr; - kfree(mm); - - if ((addr >= rdev_p->rnic_info.udbell_physbase) && - (addr < (rdev_p->rnic_info.udbell_physbase + - rdev_p->rnic_info.udbell_len))) { - - /* - * Map T3 DB register. - */ - if (vma->vm_flags & VM_READ) { - return -EPERM; - } - - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; - vma->vm_flags &= ~VM_MAYREAD; - ret = io_remap_pfn_range(vma, vma->vm_start, - addr >> PAGE_SHIFT, - len, vma->vm_page_prot); - } else { - - /* - * Map WQ or CQ contig dma memory... - */ - ret = remap_pfn_range(vma, vma->vm_start, - addr >> PAGE_SHIFT, - len, vma->vm_page_prot); - } - - return ret; -} - -static void iwch_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_pd *php; - - php = to_iwch_pd(pd); - rhp = php->rhp; - pr_debug("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid); - cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid); -} - -static int iwch_allocate_pd(struct ib_pd *pd, struct ib_udata *udata) -{ - struct iwch_pd *php = to_iwch_pd(pd); - struct ib_device *ibdev = pd->device; - u32 pdid; - struct iwch_dev *rhp; - - pr_debug("%s ibdev %p\n", __func__, ibdev); - rhp = (struct iwch_dev *) ibdev; - pdid = cxio_hal_get_pdid(rhp->rdev.rscp); - if (!pdid) - return -EINVAL; - - php->pdid = pdid; - php->rhp = rhp; - if (udata) { - struct iwch_alloc_pd_resp resp = {.pdid = php->pdid}; - - if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { - iwch_deallocate_pd(&php->ibpd, udata); - return -EFAULT; - } - } - pr_debug("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php); - return 0; -} - -static int iwch_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_mr *mhp; - u32 mmid; - - pr_debug("%s ib_mr %p\n", __func__, ib_mr); - - mhp = to_iwch_mr(ib_mr); - kfree(mhp->pages); - rhp = mhp->rhp; - mmid = mhp->attr.stag >> 8; - cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); - iwch_free_pbl(mhp); - xa_erase_irq(&rhp->mrs, mmid); - if (mhp->kva) - kfree((void *) (unsigned long) mhp->kva); - ib_umem_release(mhp->umem); - pr_debug("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp); - kfree(mhp); - return 0; -} - -static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc) -{ - const u64 total_size = 0xffffffff; - const u64 mask = (total_size + PAGE_SIZE - 1) & PAGE_MASK; - struct iwch_pd *php = to_iwch_pd(pd); - struct iwch_dev *rhp = php->rhp; - struct iwch_mr *mhp; - __be64 *page_list; - int shift = 26, npages, ret, i; - - pr_debug("%s ib_pd %p\n", __func__, pd); - - /* - * T3 only supports 32 bits of size. - */ - if (sizeof(phys_addr_t) > 4) { - pr_warn_once("Cannot support dma_mrs on this platform\n"); - return ERR_PTR(-ENOTSUPP); - } - - mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); - if (!mhp) - return ERR_PTR(-ENOMEM); - - mhp->rhp = rhp; - - npages = (total_size + (1ULL << shift) - 1) >> shift; - if (!npages) { - ret = -EINVAL; - goto err; - } - - page_list = kmalloc_array(npages, sizeof(u64), GFP_KERNEL); - if (!page_list) { - ret = -ENOMEM; - goto err; - } - - for (i = 0; i < npages; i++) - page_list[i] = cpu_to_be64((u64)i << shift); - - pr_debug("%s mask 0x%llx shift %d len %lld pbl_size %d\n", - __func__, mask, shift, total_size, npages); - - ret = iwch_alloc_pbl(mhp, npages); - if (ret) { - kfree(page_list); - goto err_pbl; - } - - ret = iwch_write_pbl(mhp, page_list, npages, 0); - kfree(page_list); - if (ret) - goto err_pbl; - - mhp->attr.pdid = php->pdid; - mhp->attr.zbva = 0; - - mhp->attr.perms = iwch_ib_to_tpt_access(acc); - mhp->attr.va_fbo = 0; - mhp->attr.page_size = shift - 12; - - mhp->attr.len = (u32) total_size; - mhp->attr.pbl_size = npages; - ret = iwch_register_mem(rhp, php, mhp, shift); - if (ret) - goto err_pbl; - - return &mhp->ibmr; - -err_pbl: - iwch_free_pbl(mhp); - -err: - kfree(mhp); - return ERR_PTR(ret); -} - -static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt, int acc, struct ib_udata *udata) -{ - __be64 *pages; - int shift, n, i; - int err = 0; - struct iwch_dev *rhp; - struct iwch_pd *php; - struct iwch_mr *mhp; - struct iwch_reg_user_mr_resp uresp; - struct sg_dma_page_iter sg_iter; - pr_debug("%s ib_pd %p\n", __func__, pd); - - php = to_iwch_pd(pd); - rhp = php->rhp; - mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); - if (!mhp) - return ERR_PTR(-ENOMEM); - - mhp->rhp = rhp; - - mhp->umem = ib_umem_get(udata, start, length, acc, 0); - if (IS_ERR(mhp->umem)) { - err = PTR_ERR(mhp->umem); - kfree(mhp); - return ERR_PTR(err); - } - - shift = PAGE_SHIFT; - - n = ib_umem_num_pages(mhp->umem); - - err = iwch_alloc_pbl(mhp, n); - if (err) - goto err; - - pages = (__be64 *) __get_free_page(GFP_KERNEL); - if (!pages) { - err = -ENOMEM; - goto err_pbl; - } - - i = n = 0; - - for_each_sg_dma_page(mhp->umem->sg_head.sgl, &sg_iter, mhp->umem->nmap, 0) { - pages[i++] = cpu_to_be64(sg_page_iter_dma_address(&sg_iter)); - if (i == PAGE_SIZE / sizeof(*pages)) { - err = iwch_write_pbl(mhp, pages, i, n); - if (err) - goto pbl_done; - n += i; - i = 0; - } - } - - if (i) - err = iwch_write_pbl(mhp, pages, i, n); - -pbl_done: - free_page((unsigned long) pages); - if (err) - goto err_pbl; - - mhp->attr.pdid = php->pdid; - mhp->attr.zbva = 0; - mhp->attr.perms = iwch_ib_to_tpt_access(acc); - mhp->attr.va_fbo = virt; - mhp->attr.page_size = shift - 12; - mhp->attr.len = (u32) length; - - err = iwch_register_mem(rhp, php, mhp, shift); - if (err) - goto err_pbl; - - if (udata && !t3a_device(rhp)) { - uresp.pbl_addr = (mhp->attr.pbl_addr - - rhp->rdev.rnic_info.pbl_base) >> 3; - pr_debug("%s user resp pbl_addr 0x%x\n", __func__, - uresp.pbl_addr); - - if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { - iwch_dereg_mr(&mhp->ibmr, udata); - err = -EFAULT; - goto err; - } - } - - return &mhp->ibmr; - -err_pbl: - iwch_free_pbl(mhp); - -err: - ib_umem_release(mhp->umem); - kfree(mhp); - return ERR_PTR(err); -} - -static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, - struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_pd *php; - struct iwch_mw *mhp; - u32 mmid; - u32 stag = 0; - int ret; - - if (type != IB_MW_TYPE_1) - return ERR_PTR(-EINVAL); - - php = to_iwch_pd(pd); - rhp = php->rhp; - mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); - if (!mhp) - return ERR_PTR(-ENOMEM); - ret = cxio_allocate_window(&rhp->rdev, &stag, php->pdid); - if (ret) { - kfree(mhp); - return ERR_PTR(ret); - } - mhp->rhp = rhp; - mhp->attr.pdid = php->pdid; - mhp->attr.type = TPT_MW; - mhp->attr.stag = stag; - mmid = (stag) >> 8; - mhp->ibmw.rkey = stag; - if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) { - cxio_deallocate_window(&rhp->rdev, mhp->attr.stag); - kfree(mhp); - return ERR_PTR(-ENOMEM); - } - pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); - return &(mhp->ibmw); -} - -static int iwch_dealloc_mw(struct ib_mw *mw) -{ - struct iwch_dev *rhp; - struct iwch_mw *mhp; - u32 mmid; - - mhp = to_iwch_mw(mw); - rhp = mhp->rhp; - mmid = (mw->rkey) >> 8; - cxio_deallocate_window(&rhp->rdev, mhp->attr.stag); - xa_erase_irq(&rhp->mrs, mmid); - pr_debug("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp); - kfree(mhp); - return 0; -} - -static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_pd *php; - struct iwch_mr *mhp; - u32 mmid; - u32 stag = 0; - int ret = -ENOMEM; - - if (mr_type != IB_MR_TYPE_MEM_REG || - max_num_sg > T3_MAX_FASTREG_DEPTH) - return ERR_PTR(-EINVAL); - - php = to_iwch_pd(pd); - rhp = php->rhp; - mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); - if (!mhp) - goto err; - - mhp->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL); - if (!mhp->pages) - goto pl_err; - - mhp->rhp = rhp; - ret = iwch_alloc_pbl(mhp, max_num_sg); - if (ret) - goto err1; - mhp->attr.pbl_size = max_num_sg; - ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid, - mhp->attr.pbl_size, mhp->attr.pbl_addr); - if (ret) - goto err2; - mhp->attr.pdid = php->pdid; - mhp->attr.type = TPT_NON_SHARED_MR; - mhp->attr.stag = stag; - mhp->attr.state = 1; - mmid = (stag) >> 8; - mhp->ibmr.rkey = mhp->ibmr.lkey = stag; - ret = xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL); - if (ret) - goto err3; - - pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); - return &(mhp->ibmr); -err3: - cxio_dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size, - mhp->attr.pbl_addr); -err2: - iwch_free_pbl(mhp); -err1: - kfree(mhp->pages); -pl_err: - kfree(mhp); -err: - return ERR_PTR(ret); -} - -static int iwch_set_page(struct ib_mr *ibmr, u64 addr) -{ - struct iwch_mr *mhp = to_iwch_mr(ibmr); - - if (unlikely(mhp->npages == mhp->attr.pbl_size)) - return -ENOMEM; - - mhp->pages[mhp->npages++] = addr; - - return 0; -} - -static int iwch_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, - int sg_nents, unsigned int *sg_offset) -{ - struct iwch_mr *mhp = to_iwch_mr(ibmr); - - mhp->npages = 0; - - return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, iwch_set_page); -} - -static int iwch_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_qp *qhp; - struct iwch_qp_attributes attrs; - struct iwch_ucontext *ucontext; - - qhp = to_iwch_qp(ib_qp); - rhp = qhp->rhp; - - attrs.next_state = IWCH_QP_STATE_ERROR; - iwch_modify_qp(rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 0); - wait_event(qhp->wait, !qhp->ep); - - xa_erase_irq(&rhp->qps, qhp->wq.qpid); - - atomic_dec(&qhp->refcnt); - wait_event(qhp->wait, !atomic_read(&qhp->refcnt)); - - ucontext = rdma_udata_to_drv_context(udata, struct iwch_ucontext, - ibucontext); - cxio_destroy_qp(&rhp->rdev, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx); - - pr_debug("%s ib_qp %p qpid 0x%0x qhp %p\n", __func__, - ib_qp, qhp->wq.qpid, qhp); - kfree(qhp); - return 0; -} - -static struct ib_qp *iwch_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *attrs, - struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_qp *qhp; - struct iwch_pd *php; - struct iwch_cq *schp; - struct iwch_cq *rchp; - struct iwch_create_qp_resp uresp; - int wqsize, sqsize, rqsize; - struct iwch_ucontext *ucontext; - - pr_debug("%s ib_pd %p\n", __func__, pd); - if (attrs->qp_type != IB_QPT_RC) - return ERR_PTR(-EINVAL); - php = to_iwch_pd(pd); - rhp = php->rhp; - schp = get_chp(rhp, ((struct iwch_cq *) attrs->send_cq)->cq.cqid); - rchp = get_chp(rhp, ((struct iwch_cq *) attrs->recv_cq)->cq.cqid); - if (!schp || !rchp) - return ERR_PTR(-EINVAL); - - /* The RQT size must be # of entries + 1 rounded up to a power of two */ - rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr); - if (rqsize == attrs->cap.max_recv_wr) - rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr+1); - - /* T3 doesn't support RQT depth < 16 */ - if (rqsize < 16) - rqsize = 16; - - if (rqsize > T3_MAX_RQ_SIZE) - return ERR_PTR(-EINVAL); - - if (attrs->cap.max_inline_data > T3_MAX_INLINE) - return ERR_PTR(-EINVAL); - - /* - * NOTE: The SQ and total WQ sizes don't need to be - * a power of two. However, all the code assumes - * they are. EG: Q_FREECNT() and friends. - */ - sqsize = roundup_pow_of_two(attrs->cap.max_send_wr); - wqsize = roundup_pow_of_two(rqsize + sqsize); - - /* - * Kernel users need more wq space for fastreg WRs which can take - * 2 WR fragments. - */ - ucontext = rdma_udata_to_drv_context(udata, struct iwch_ucontext, - ibucontext); - if (!ucontext && wqsize < (rqsize + (2 * sqsize))) - wqsize = roundup_pow_of_two(rqsize + - roundup_pow_of_two(attrs->cap.max_send_wr * 2)); - pr_debug("%s wqsize %d sqsize %d rqsize %d\n", __func__, - wqsize, sqsize, rqsize); - qhp = kzalloc(sizeof(*qhp), GFP_KERNEL); - if (!qhp) - return ERR_PTR(-ENOMEM); - qhp->wq.size_log2 = ilog2(wqsize); - qhp->wq.rq_size_log2 = ilog2(rqsize); - qhp->wq.sq_size_log2 = ilog2(sqsize); - if (cxio_create_qp(&rhp->rdev, !udata, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx)) { - kfree(qhp); - return ERR_PTR(-ENOMEM); - } - - attrs->cap.max_recv_wr = rqsize - 1; - attrs->cap.max_send_wr = sqsize; - attrs->cap.max_inline_data = T3_MAX_INLINE; - - qhp->rhp = rhp; - qhp->attr.pd = php->pdid; - qhp->attr.scq = ((struct iwch_cq *) attrs->send_cq)->cq.cqid; - qhp->attr.rcq = ((struct iwch_cq *) attrs->recv_cq)->cq.cqid; - qhp->attr.sq_num_entries = attrs->cap.max_send_wr; - qhp->attr.rq_num_entries = attrs->cap.max_recv_wr; - qhp->attr.sq_max_sges = attrs->cap.max_send_sge; - qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge; - qhp->attr.rq_max_sges = attrs->cap.max_recv_sge; - qhp->attr.state = IWCH_QP_STATE_IDLE; - qhp->attr.next_state = IWCH_QP_STATE_IDLE; - - /* - * XXX - These don't get passed in from the openib user - * at create time. The CM sets them via a QP modify. - * Need to fix... I think the CM should - */ - qhp->attr.enable_rdma_read = 1; - qhp->attr.enable_rdma_write = 1; - qhp->attr.enable_bind = 1; - qhp->attr.max_ord = 1; - qhp->attr.max_ird = 1; - - spin_lock_init(&qhp->lock); - init_waitqueue_head(&qhp->wait); - atomic_set(&qhp->refcnt, 1); - - if (xa_store_irq(&rhp->qps, qhp->wq.qpid, qhp, GFP_KERNEL)) { - cxio_destroy_qp(&rhp->rdev, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx); - kfree(qhp); - return ERR_PTR(-ENOMEM); - } - - if (udata) { - - struct iwch_mm_entry *mm1, *mm2; - - mm1 = kmalloc(sizeof(*mm1), GFP_KERNEL); - if (!mm1) { - iwch_destroy_qp(&qhp->ibqp, udata); - return ERR_PTR(-ENOMEM); - } - - mm2 = kmalloc(sizeof(*mm2), GFP_KERNEL); - if (!mm2) { - kfree(mm1); - iwch_destroy_qp(&qhp->ibqp, udata); - return ERR_PTR(-ENOMEM); - } - - uresp.qpid = qhp->wq.qpid; - uresp.size_log2 = qhp->wq.size_log2; - uresp.sq_size_log2 = qhp->wq.sq_size_log2; - uresp.rq_size_log2 = qhp->wq.rq_size_log2; - spin_lock(&ucontext->mmap_lock); - uresp.key = ucontext->key; - ucontext->key += PAGE_SIZE; - uresp.db_key = ucontext->key; - ucontext->key += PAGE_SIZE; - spin_unlock(&ucontext->mmap_lock); - if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) { - kfree(mm1); - kfree(mm2); - iwch_destroy_qp(&qhp->ibqp, udata); - return ERR_PTR(-EFAULT); - } - mm1->key = uresp.key; - mm1->addr = virt_to_phys(qhp->wq.queue); - mm1->len = PAGE_ALIGN(wqsize * sizeof(union t3_wr)); - insert_mmap(ucontext, mm1); - mm2->key = uresp.db_key; - mm2->addr = qhp->wq.udb & PAGE_MASK; - mm2->len = PAGE_SIZE; - insert_mmap(ucontext, mm2); - } - qhp->ibqp.qp_num = qhp->wq.qpid; - pr_debug( - "%s sq_num_entries %d, rq_num_entries %d qpid 0x%0x qhp %p dma_addr %pad size %d rq_addr 0x%x\n", - __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, - qhp->wq.qpid, qhp, &qhp->wq.dma_addr, 1 << qhp->wq.size_log2, - qhp->wq.rq_addr); - return &qhp->ibqp; -} - -static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_udata *udata) -{ - struct iwch_dev *rhp; - struct iwch_qp *qhp; - enum iwch_qp_attr_mask mask = 0; - struct iwch_qp_attributes attrs = {}; - - pr_debug("%s ib_qp %p\n", __func__, ibqp); - - /* iwarp does not support the RTR state */ - if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR)) - attr_mask &= ~IB_QP_STATE; - - /* Make sure we still have something left to do */ - if (!attr_mask) - return 0; - - qhp = to_iwch_qp(ibqp); - rhp = qhp->rhp; - - attrs.next_state = iwch_convert_state(attr->qp_state); - attrs.enable_rdma_read = (attr->qp_access_flags & - IB_ACCESS_REMOTE_READ) ? 1 : 0; - attrs.enable_rdma_write = (attr->qp_access_flags & - IB_ACCESS_REMOTE_WRITE) ? 1 : 0; - attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0; - - - mask |= (attr_mask & IB_QP_STATE) ? IWCH_QP_ATTR_NEXT_STATE : 0; - mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ? - (IWCH_QP_ATTR_ENABLE_RDMA_READ | - IWCH_QP_ATTR_ENABLE_RDMA_WRITE | - IWCH_QP_ATTR_ENABLE_RDMA_BIND) : 0; - - return iwch_modify_qp(rhp, qhp, mask, &attrs, 0); -} - -void iwch_qp_add_ref(struct ib_qp *qp) -{ - pr_debug("%s ib_qp %p\n", __func__, qp); - atomic_inc(&(to_iwch_qp(qp)->refcnt)); -} - -void iwch_qp_rem_ref(struct ib_qp *qp) -{ - pr_debug("%s ib_qp %p\n", __func__, qp); - if (atomic_dec_and_test(&(to_iwch_qp(qp)->refcnt))) - wake_up(&(to_iwch_qp(qp)->wait)); -} - -static struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn) -{ - pr_debug("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn); - return (struct ib_qp *)get_qhp(to_iwch_dev(dev), qpn); -} - - -static int iwch_query_pkey(struct ib_device *ibdev, - u8 port, u16 index, u16 * pkey) -{ - pr_debug("%s ibdev %p\n", __func__, ibdev); - *pkey = 0; - return 0; -} - -static int iwch_query_gid(struct ib_device *ibdev, u8 port, - int index, union ib_gid *gid) -{ - struct iwch_dev *dev; - - pr_debug("%s ibdev %p, port %d, index %d, gid %p\n", - __func__, ibdev, port, index, gid); - dev = to_iwch_dev(ibdev); - BUG_ON(port == 0 || port > 2); - memset(&(gid->raw[0]), 0, sizeof(gid->raw)); - memcpy(&(gid->raw[0]), dev->rdev.port_info.lldevs[port-1]->dev_addr, 6); - return 0; -} - -static u64 fw_vers_string_to_u64(struct iwch_dev *iwch_dev) -{ - struct ethtool_drvinfo info; - struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; - char *cp, *next; - unsigned fw_maj, fw_min, fw_mic; - - lldev->ethtool_ops->get_drvinfo(lldev, &info); - - next = info.fw_version + 1; - cp = strsep(&next, "."); - sscanf(cp, "%i", &fw_maj); - cp = strsep(&next, "."); - sscanf(cp, "%i", &fw_min); - cp = strsep(&next, "."); - sscanf(cp, "%i", &fw_mic); - - return (((u64)fw_maj & 0xffff) << 32) | ((fw_min & 0xffff) << 16) | - (fw_mic & 0xffff); -} - -static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *props, - struct ib_udata *uhw) -{ - - struct iwch_dev *dev; - - pr_debug("%s ibdev %p\n", __func__, ibdev); - - if (uhw->inlen || uhw->outlen) - return -EINVAL; - - dev = to_iwch_dev(ibdev); - memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); - props->hw_ver = dev->rdev.t3cdev_p->type; - props->fw_ver = fw_vers_string_to_u64(dev); - props->device_cap_flags = dev->device_cap_flags; - props->page_size_cap = dev->attr.mem_pgsizes_bitmask; - props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor; - props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device; - props->max_mr_size = dev->attr.max_mr_size; - props->max_qp = dev->attr.max_qps; - props->max_qp_wr = dev->attr.max_wrs; - props->max_send_sge = dev->attr.max_sge_per_wr; - props->max_recv_sge = dev->attr.max_sge_per_wr; - props->max_sge_rd = 1; - props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp; - props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp; - props->max_cq = dev->attr.max_cqs; - props->max_cqe = dev->attr.max_cqes_per_cq; - props->max_mr = dev->attr.max_mem_regs; - props->max_pd = dev->attr.max_pds; - props->local_ca_ack_delay = 0; - props->max_fast_reg_page_list_len = T3_MAX_FASTREG_DEPTH; - - return 0; -} - -static int iwch_query_port(struct ib_device *ibdev, - u8 port, struct ib_port_attr *props) -{ - pr_debug("%s ibdev %p\n", __func__, ibdev); - - props->port_cap_flags = - IB_PORT_CM_SUP | - IB_PORT_SNMP_TUNNEL_SUP | - IB_PORT_REINIT_SUP | - IB_PORT_DEVICE_MGMT_SUP | - IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; - props->gid_tbl_len = 1; - props->pkey_tbl_len = 1; - props->active_width = 2; - props->active_speed = IB_SPEED_DDR; - props->max_msg_sz = -1; - - return 0; -} - -static ssize_t hw_rev_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iwch_dev *iwch_dev = - rdma_device_to_drv_device(dev, struct iwch_dev, ibdev); - - pr_debug("%s dev 0x%p\n", __func__, dev); - return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type); -} -static DEVICE_ATTR_RO(hw_rev); - -static ssize_t hca_type_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iwch_dev *iwch_dev = - rdma_device_to_drv_device(dev, struct iwch_dev, ibdev); - struct ethtool_drvinfo info; - struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; - - pr_debug("%s dev 0x%p\n", __func__, dev); - lldev->ethtool_ops->get_drvinfo(lldev, &info); - return sprintf(buf, "%s\n", info.driver); -} -static DEVICE_ATTR_RO(hca_type); - -static ssize_t board_id_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct iwch_dev *iwch_dev = - rdma_device_to_drv_device(dev, struct iwch_dev, ibdev); - - pr_debug("%s dev 0x%p\n", __func__, dev); - return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor, - iwch_dev->rdev.rnic_info.pdev->device); -} -static DEVICE_ATTR_RO(board_id); - -enum counters { - IPINRECEIVES, - IPINHDRERRORS, - IPINADDRERRORS, - IPINUNKNOWNPROTOS, - IPINDISCARDS, - IPINDELIVERS, - IPOUTREQUESTS, - IPOUTDISCARDS, - IPOUTNOROUTES, - IPREASMTIMEOUT, - IPREASMREQDS, - IPREASMOKS, - IPREASMFAILS, - TCPACTIVEOPENS, - TCPPASSIVEOPENS, - TCPATTEMPTFAILS, - TCPESTABRESETS, - TCPCURRESTAB, - TCPINSEGS, - TCPOUTSEGS, - TCPRETRANSSEGS, - TCPINERRS, - TCPOUTRSTS, - TCPRTOMIN, - TCPRTOMAX, - NR_COUNTERS -}; - -static const char * const names[] = { - [IPINRECEIVES] = "ipInReceives", - [IPINHDRERRORS] = "ipInHdrErrors", - [IPINADDRERRORS] = "ipInAddrErrors", - [IPINUNKNOWNPROTOS] = "ipInUnknownProtos", - [IPINDISCARDS] = "ipInDiscards", - [IPINDELIVERS] = "ipInDelivers", - [IPOUTREQUESTS] = "ipOutRequests", - [IPOUTDISCARDS] = "ipOutDiscards", - [IPOUTNOROUTES] = "ipOutNoRoutes", - [IPREASMTIMEOUT] = "ipReasmTimeout", - [IPREASMREQDS] = "ipReasmReqds", - [IPREASMOKS] = "ipReasmOKs", - [IPREASMFAILS] = "ipReasmFails", - [TCPACTIVEOPENS] = "tcpActiveOpens", - [TCPPASSIVEOPENS] = "tcpPassiveOpens", - [TCPATTEMPTFAILS] = "tcpAttemptFails", - [TCPESTABRESETS] = "tcpEstabResets", - [TCPCURRESTAB] = "tcpCurrEstab", - [TCPINSEGS] = "tcpInSegs", - [TCPOUTSEGS] = "tcpOutSegs", - [TCPRETRANSSEGS] = "tcpRetransSegs", - [TCPINERRS] = "tcpInErrs", - [TCPOUTRSTS] = "tcpOutRsts", - [TCPRTOMIN] = "tcpRtoMin", - [TCPRTOMAX] = "tcpRtoMax", -}; - -static struct rdma_hw_stats *iwch_alloc_stats(struct ib_device *ibdev, - u8 port_num) -{ - BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS); - - /* Our driver only supports device level stats */ - if (port_num != 0) - return NULL; - - return rdma_alloc_hw_stats_struct(names, NR_COUNTERS, - RDMA_HW_STATS_DEFAULT_LIFESPAN); -} - -static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats, - u8 port, int index) -{ - struct iwch_dev *dev; - struct tp_mib_stats m; - int ret; - - if (port != 0 || !stats) - return -ENOSYS; - - pr_debug("%s ibdev %p\n", __func__, ibdev); - dev = to_iwch_dev(ibdev); - ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m); - if (ret) - return -ENOSYS; - - stats->value[IPINRECEIVES] = ((u64)m.ipInReceive_hi << 32) + m.ipInReceive_lo; - stats->value[IPINHDRERRORS] = ((u64)m.ipInHdrErrors_hi << 32) + m.ipInHdrErrors_lo; - stats->value[IPINADDRERRORS] = ((u64)m.ipInAddrErrors_hi << 32) + m.ipInAddrErrors_lo; - stats->value[IPINUNKNOWNPROTOS] = ((u64)m.ipInUnknownProtos_hi << 32) + m.ipInUnknownProtos_lo; - stats->value[IPINDISCARDS] = ((u64)m.ipInDiscards_hi << 32) + m.ipInDiscards_lo; - stats->value[IPINDELIVERS] = ((u64)m.ipInDelivers_hi << 32) + m.ipInDelivers_lo; - stats->value[IPOUTREQUESTS] = ((u64)m.ipOutRequests_hi << 32) + m.ipOutRequests_lo; - stats->value[IPOUTDISCARDS] = ((u64)m.ipOutDiscards_hi << 32) + m.ipOutDiscards_lo; - stats->value[IPOUTNOROUTES] = ((u64)m.ipOutNoRoutes_hi << 32) + m.ipOutNoRoutes_lo; - stats->value[IPREASMTIMEOUT] = m.ipReasmTimeout; - stats->value[IPREASMREQDS] = m.ipReasmReqds; - stats->value[IPREASMOKS] = m.ipReasmOKs; - stats->value[IPREASMFAILS] = m.ipReasmFails; - stats->value[TCPACTIVEOPENS] = m.tcpActiveOpens; - stats->value[TCPPASSIVEOPENS] = m.tcpPassiveOpens; - stats->value[TCPATTEMPTFAILS] = m.tcpAttemptFails; - stats->value[TCPESTABRESETS] = m.tcpEstabResets; - stats->value[TCPCURRESTAB] = m.tcpOutRsts; - stats->value[TCPINSEGS] = m.tcpCurrEstab; - stats->value[TCPOUTSEGS] = ((u64)m.tcpInSegs_hi << 32) + m.tcpInSegs_lo; - stats->value[TCPRETRANSSEGS] = ((u64)m.tcpOutSegs_hi << 32) + m.tcpOutSegs_lo; - stats->value[TCPINERRS] = ((u64)m.tcpRetransSeg_hi << 32) + m.tcpRetransSeg_lo, - stats->value[TCPOUTRSTS] = ((u64)m.tcpInErrs_hi << 32) + m.tcpInErrs_lo; - stats->value[TCPRTOMIN] = m.tcpRtoMin; - stats->value[TCPRTOMAX] = m.tcpRtoMax; - - return stats->num_counters; -} - -static struct attribute *iwch_class_attributes[] = { - &dev_attr_hw_rev.attr, - &dev_attr_hca_type.attr, - &dev_attr_board_id.attr, - NULL -}; - -static const struct attribute_group iwch_attr_group = { - .attrs = iwch_class_attributes, -}; - -static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num, - struct ib_port_immutable *immutable) -{ - struct ib_port_attr attr; - int err; - - immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; - - err = ib_query_port(ibdev, port_num, &attr); - if (err) - return err; - - immutable->pkey_tbl_len = attr.pkey_tbl_len; - immutable->gid_tbl_len = attr.gid_tbl_len; - - return 0; -} - -static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str) -{ - struct iwch_dev *iwch_dev = to_iwch_dev(ibdev); - struct ethtool_drvinfo info; - struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; - - pr_debug("%s dev 0x%p\n", __func__, iwch_dev); - lldev->ethtool_ops->get_drvinfo(lldev, &info); - snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", info.fw_version); -} - -static const struct ib_device_ops iwch_dev_ops = { - .owner = THIS_MODULE, - .driver_id = RDMA_DRIVER_CXGB3, - .uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION, - .uverbs_no_driver_id_binding = 1, - - .alloc_hw_stats = iwch_alloc_stats, - .alloc_mr = iwch_alloc_mr, - .alloc_mw = iwch_alloc_mw, - .alloc_pd = iwch_allocate_pd, - .alloc_ucontext = iwch_alloc_ucontext, - .create_cq = iwch_create_cq, - .create_qp = iwch_create_qp, - .dealloc_mw = iwch_dealloc_mw, - .dealloc_pd = iwch_deallocate_pd, - .dealloc_ucontext = iwch_dealloc_ucontext, - .dereg_mr = iwch_dereg_mr, - .destroy_cq = iwch_destroy_cq, - .destroy_qp = iwch_destroy_qp, - .get_dev_fw_str = get_dev_fw_ver_str, - .get_dma_mr = iwch_get_dma_mr, - .get_hw_stats = iwch_get_mib, - .get_port_immutable = iwch_port_immutable, - .iw_accept = iwch_accept_cr, - .iw_add_ref = iwch_qp_add_ref, - .iw_connect = iwch_connect, - .iw_create_listen = iwch_create_listen, - .iw_destroy_listen = iwch_destroy_listen, - .iw_get_qp = iwch_get_qp, - .iw_reject = iwch_reject_cr, - .iw_rem_ref = iwch_qp_rem_ref, - .map_mr_sg = iwch_map_mr_sg, - .mmap = iwch_mmap, - .modify_qp = iwch_ib_modify_qp, - .poll_cq = iwch_poll_cq, - .post_recv = iwch_post_receive, - .post_send = iwch_post_send, - .query_device = iwch_query_device, - .query_gid = iwch_query_gid, - .query_pkey = iwch_query_pkey, - .query_port = iwch_query_port, - .reg_user_mr = iwch_reg_user_mr, - .req_notify_cq = iwch_arm_cq, - INIT_RDMA_OBJ_SIZE(ib_pd, iwch_pd, ibpd), - INIT_RDMA_OBJ_SIZE(ib_cq, iwch_cq, ibcq), - INIT_RDMA_OBJ_SIZE(ib_ucontext, iwch_ucontext, ibucontext), -}; - -static int set_netdevs(struct ib_device *ib_dev, struct cxio_rdev *rdev) -{ - int ret; - int i; - - for (i = 0; i < rdev->port_info.nports; i++) { - ret = ib_device_set_netdev(ib_dev, rdev->port_info.lldevs[i], - i + 1); - if (ret) - return ret; - } - return 0; -} - -int iwch_register_device(struct iwch_dev *dev) -{ - int err; - - pr_debug("%s iwch_dev %p\n", __func__, dev); - memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); - memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); - dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | - IB_DEVICE_MEM_WINDOW | - IB_DEVICE_MEM_MGT_EXTENSIONS; - - /* cxgb3 supports STag 0. */ - dev->ibdev.local_dma_lkey = 0; - - dev->ibdev.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_POLL_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV); - dev->ibdev.node_type = RDMA_NODE_RNIC; - BUILD_BUG_ON(sizeof(IWCH_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); - memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC)); - dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports; - dev->ibdev.num_comp_vectors = 1; - dev->ibdev.dev.parent = &dev->rdev.rnic_info.pdev->dev; - - memcpy(dev->ibdev.iw_ifname, dev->rdev.t3cdev_p->lldev->name, - sizeof(dev->ibdev.iw_ifname)); - - rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group); - ib_set_device_ops(&dev->ibdev, &iwch_dev_ops); - err = set_netdevs(&dev->ibdev, &dev->rdev); - if (err) - return err; - - return ib_register_device(&dev->ibdev, "cxgb3_%d"); -} - -void iwch_unregister_device(struct iwch_dev *dev) -{ - pr_debug("%s iwch_dev %p\n", __func__, dev); - ib_unregister_device(&dev->ibdev); - return; -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h deleted file mode 100644 index 8adbe9658935..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ /dev/null @@ -1,347 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __IWCH_PROVIDER_H__ -#define __IWCH_PROVIDER_H__ - -#include <linux/list.h> -#include <linux/spinlock.h> -#include <rdma/ib_verbs.h> -#include <asm/types.h> -#include "t3cdev.h" -#include "iwch.h" -#include "cxio_wr.h" -#include "cxio_hal.h" - -struct iwch_pd { - struct ib_pd ibpd; - u32 pdid; - struct iwch_dev *rhp; -}; - -static inline struct iwch_pd *to_iwch_pd(struct ib_pd *ibpd) -{ - return container_of(ibpd, struct iwch_pd, ibpd); -} - -struct tpt_attributes { - u32 stag; - u32 state:1; - u32 type:2; - u32 rsvd:1; - enum tpt_mem_perm perms; - u32 remote_invaliate_disable:1; - u32 zbva:1; - u32 mw_bind_enable:1; - u32 page_size:5; - - u32 pdid; - u32 qpid; - u32 pbl_addr; - u32 len; - u64 va_fbo; - u32 pbl_size; -}; - -struct iwch_mr { - struct ib_mr ibmr; - struct ib_umem *umem; - struct iwch_dev *rhp; - u64 kva; - struct tpt_attributes attr; - u64 *pages; - u32 npages; -}; - -typedef struct iwch_mw iwch_mw_handle; - -static inline struct iwch_mr *to_iwch_mr(struct ib_mr *ibmr) -{ - return container_of(ibmr, struct iwch_mr, ibmr); -} - -struct iwch_mw { - struct ib_mw ibmw; - struct iwch_dev *rhp; - u64 kva; - struct tpt_attributes attr; -}; - -static inline struct iwch_mw *to_iwch_mw(struct ib_mw *ibmw) -{ - return container_of(ibmw, struct iwch_mw, ibmw); -} - -struct iwch_cq { - struct ib_cq ibcq; - struct iwch_dev *rhp; - struct t3_cq cq; - spinlock_t lock; - spinlock_t comp_handler_lock; - atomic_t refcnt; - wait_queue_head_t wait; - u32 __user *user_rptr_addr; -}; - -static inline struct iwch_cq *to_iwch_cq(struct ib_cq *ibcq) -{ - return container_of(ibcq, struct iwch_cq, ibcq); -} - -enum IWCH_QP_FLAGS { - QP_QUIESCED = 0x01 -}; - -struct iwch_mpa_attributes { - u8 initiator; - u8 recv_marker_enabled; - u8 xmit_marker_enabled; /* iWARP: enable inbound Read Resp. */ - u8 crc_enabled; - u8 version; /* 0 or 1 */ -}; - -struct iwch_qp_attributes { - u32 scq; - u32 rcq; - u32 sq_num_entries; - u32 rq_num_entries; - u32 sq_max_sges; - u32 sq_max_sges_rdma_write; - u32 rq_max_sges; - u32 state; - u8 enable_rdma_read; - u8 enable_rdma_write; /* enable inbound Read Resp. */ - u8 enable_bind; - u8 enable_mmid0_fastreg; /* Enable STAG0 + Fast-register */ - /* - * Next QP state. If specify the current state, only the - * QP attributes will be modified. - */ - u32 max_ord; - u32 max_ird; - u32 pd; /* IN */ - u32 next_state; - char terminate_buffer[52]; - u32 terminate_msg_len; - u8 is_terminate_local; - struct iwch_mpa_attributes mpa_attr; /* IN-OUT */ - struct iwch_ep *llp_stream_handle; - char *stream_msg_buf; /* Last stream msg. before Idle -> RTS */ - u32 stream_msg_buf_len; /* Only on Idle -> RTS */ -}; - -struct iwch_qp { - struct ib_qp ibqp; - struct iwch_dev *rhp; - struct iwch_ep *ep; - struct iwch_qp_attributes attr; - struct t3_wq wq; - spinlock_t lock; - atomic_t refcnt; - wait_queue_head_t wait; - enum IWCH_QP_FLAGS flags; -}; - -static inline int qp_quiesced(struct iwch_qp *qhp) -{ - return qhp->flags & QP_QUIESCED; -} - -static inline struct iwch_qp *to_iwch_qp(struct ib_qp *ibqp) -{ - return container_of(ibqp, struct iwch_qp, ibqp); -} - -void iwch_qp_add_ref(struct ib_qp *qp); -void iwch_qp_rem_ref(struct ib_qp *qp); - -struct iwch_ucontext { - struct ib_ucontext ibucontext; - struct cxio_ucontext uctx; - u32 key; - spinlock_t mmap_lock; - struct list_head mmaps; -}; - -static inline struct iwch_ucontext *to_iwch_ucontext(struct ib_ucontext *c) -{ - return container_of(c, struct iwch_ucontext, ibucontext); -} - -struct iwch_mm_entry { - struct list_head entry; - u64 addr; - u32 key; - unsigned len; -}; - -static inline struct iwch_mm_entry *remove_mmap(struct iwch_ucontext *ucontext, - u32 key, unsigned len) -{ - struct list_head *pos, *nxt; - struct iwch_mm_entry *mm; - - spin_lock(&ucontext->mmap_lock); - list_for_each_safe(pos, nxt, &ucontext->mmaps) { - - mm = list_entry(pos, struct iwch_mm_entry, entry); - if (mm->key == key && mm->len == len) { - list_del_init(&mm->entry); - spin_unlock(&ucontext->mmap_lock); - pr_debug("%s key 0x%x addr 0x%llx len %d\n", - __func__, key, - (unsigned long long)mm->addr, mm->len); - return mm; - } - } - spin_unlock(&ucontext->mmap_lock); - return NULL; -} - -static inline void insert_mmap(struct iwch_ucontext *ucontext, - struct iwch_mm_entry *mm) -{ - spin_lock(&ucontext->mmap_lock); - pr_debug("%s key 0x%x addr 0x%llx len %d\n", - __func__, mm->key, (unsigned long long)mm->addr, mm->len); - list_add_tail(&mm->entry, &ucontext->mmaps); - spin_unlock(&ucontext->mmap_lock); -} - -enum iwch_qp_attr_mask { - IWCH_QP_ATTR_NEXT_STATE = 1 << 0, - IWCH_QP_ATTR_ENABLE_RDMA_READ = 1 << 7, - IWCH_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8, - IWCH_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9, - IWCH_QP_ATTR_MAX_ORD = 1 << 11, - IWCH_QP_ATTR_MAX_IRD = 1 << 12, - IWCH_QP_ATTR_LLP_STREAM_HANDLE = 1 << 22, - IWCH_QP_ATTR_STREAM_MSG_BUFFER = 1 << 23, - IWCH_QP_ATTR_MPA_ATTR = 1 << 24, - IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE = 1 << 25, - IWCH_QP_ATTR_VALID_MODIFY = (IWCH_QP_ATTR_ENABLE_RDMA_READ | - IWCH_QP_ATTR_ENABLE_RDMA_WRITE | - IWCH_QP_ATTR_MAX_ORD | - IWCH_QP_ATTR_MAX_IRD | - IWCH_QP_ATTR_LLP_STREAM_HANDLE | - IWCH_QP_ATTR_STREAM_MSG_BUFFER | - IWCH_QP_ATTR_MPA_ATTR | - IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE) -}; - -int iwch_modify_qp(struct iwch_dev *rhp, - struct iwch_qp *qhp, - enum iwch_qp_attr_mask mask, - struct iwch_qp_attributes *attrs, - int internal); - -enum iwch_qp_state { - IWCH_QP_STATE_IDLE, - IWCH_QP_STATE_RTS, - IWCH_QP_STATE_ERROR, - IWCH_QP_STATE_TERMINATE, - IWCH_QP_STATE_CLOSING, - IWCH_QP_STATE_TOT -}; - -static inline int iwch_convert_state(enum ib_qp_state ib_state) -{ - switch (ib_state) { - case IB_QPS_RESET: - case IB_QPS_INIT: - return IWCH_QP_STATE_IDLE; - case IB_QPS_RTS: - return IWCH_QP_STATE_RTS; - case IB_QPS_SQD: - return IWCH_QP_STATE_CLOSING; - case IB_QPS_SQE: - return IWCH_QP_STATE_TERMINATE; - case IB_QPS_ERR: - return IWCH_QP_STATE_ERROR; - default: - return -1; - } -} - -static inline u32 iwch_ib_to_tpt_access(int acc) -{ - return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) | - (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0) | - (acc & IB_ACCESS_LOCAL_WRITE ? TPT_LOCAL_WRITE : 0) | - (acc & IB_ACCESS_MW_BIND ? TPT_MW_BIND : 0) | - TPT_LOCAL_READ; -} - -static inline u32 iwch_ib_to_tpt_bind_access(int acc) -{ - return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) | - (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0); -} - -enum iwch_mmid_state { - IWCH_STAG_STATE_VALID, - IWCH_STAG_STATE_INVALID -}; - -enum iwch_qp_query_flags { - IWCH_QP_QUERY_CONTEXT_NONE = 0x0, /* No ctx; Only attrs */ - IWCH_QP_QUERY_CONTEXT_GET = 0x1, /* Get ctx + attrs */ - IWCH_QP_QUERY_CONTEXT_SUSPEND = 0x2, /* Not Supported */ - - /* - * Quiesce QP context; Consumer - * will NOT replay outstanding WR - */ - IWCH_QP_QUERY_CONTEXT_QUIESCE = 0x4, - IWCH_QP_QUERY_CONTEXT_REMOVE = 0x8, - IWCH_QP_QUERY_TEST_USERWRITE = 0x32 /* Test special */ -}; - -u16 iwch_rqes_posted(struct iwch_qp *qhp); -int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, - const struct ib_send_wr **bad_wr); -int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, - const struct ib_recv_wr **bad_wr); -int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); -int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg); -int iwch_post_zb_read(struct iwch_ep *ep); -int iwch_register_device(struct iwch_dev *dev); -void iwch_unregister_device(struct iwch_dev *dev); -void stop_read_rep_timer(struct iwch_qp *qhp); -int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, - struct iwch_mr *mhp, int shift); -int iwch_alloc_pbl(struct iwch_mr *mhp, int npages); -void iwch_free_pbl(struct iwch_mr *mhp); -int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset); - -#define IWCH_NODE_DESC "cxgb3 Chelsio Communications" - -#endif diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c deleted file mode 100644 index c649faad63f9..000000000000 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ /dev/null @@ -1,1082 +0,0 @@ -/* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include <linux/sched.h> -#include <linux/gfp.h> -#include "iwch_provider.h" -#include "iwch.h" -#include "iwch_cm.h" -#include "cxio_hal.h" -#include "cxio_resource.h" - -#define NO_SUPPORT -1 - -static int build_rdma_send(union t3_wr *wqe, const struct ib_send_wr *wr, - u8 *flit_cnt) -{ - int i; - u32 plen; - - switch (wr->opcode) { - case IB_WR_SEND: - if (wr->send_flags & IB_SEND_SOLICITED) - wqe->send.rdmaop = T3_SEND_WITH_SE; - else - wqe->send.rdmaop = T3_SEND; - wqe->send.rem_stag = 0; - break; - case IB_WR_SEND_WITH_INV: - if (wr->send_flags & IB_SEND_SOLICITED) - wqe->send.rdmaop = T3_SEND_WITH_SE_INV; - else - wqe->send.rdmaop = T3_SEND_WITH_INV; - wqe->send.rem_stag = cpu_to_be32(wr->ex.invalidate_rkey); - break; - default: - return -EINVAL; - } - if (wr->num_sge > T3_MAX_SGE) - return -EINVAL; - wqe->send.reserved[0] = 0; - wqe->send.reserved[1] = 0; - wqe->send.reserved[2] = 0; - plen = 0; - for (i = 0; i < wr->num_sge; i++) { - if ((plen + wr->sg_list[i].length) < plen) - return -EMSGSIZE; - - plen += wr->sg_list[i].length; - wqe->send.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey); - wqe->send.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); - wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); - } - wqe->send.num_sgle = cpu_to_be32(wr->num_sge); - *flit_cnt = 4 + ((wr->num_sge) << 1); - wqe->send.plen = cpu_to_be32(plen); - return 0; -} - -static int build_rdma_write(union t3_wr *wqe, const struct ib_send_wr *wr, - u8 *flit_cnt) -{ - int i; - u32 plen; - if (wr->num_sge > T3_MAX_SGE) - return -EINVAL; - wqe->write.rdmaop = T3_RDMA_WRITE; - wqe->write.reserved[0] = 0; - wqe->write.reserved[1] = 0; - wqe->write.reserved[2] = 0; - wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); - wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); - - if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { - plen = 4; - wqe->write.sgl[0].stag = wr->ex.imm_data; - wqe->write.sgl[0].len = cpu_to_be32(0); - wqe->write.num_sgle = cpu_to_be32(0); - *flit_cnt = 6; - } else { - plen = 0; - for (i = 0; i < wr->num_sge; i++) { - if ((plen + wr->sg_list[i].length) < plen) { - return -EMSGSIZE; - } - plen += wr->sg_list[i].length; - wqe->write.sgl[i].stag = - cpu_to_be32(wr->sg_list[i].lkey); - wqe->write.sgl[i].len = - cpu_to_be32(wr->sg_list[i].length); - wqe->write.sgl[i].to = - cpu_to_be64(wr->sg_list[i].addr); - } - wqe->write.num_sgle = cpu_to_be32(wr->num_sge); - *flit_cnt = 5 + ((wr->num_sge) << 1); - } - wqe->write.plen = cpu_to_be32(plen); - return 0; -} - -static int build_rdma_read(union t3_wr *wqe, const struct ib_send_wr *wr, - u8 *flit_cnt) -{ - if (wr->num_sge > 1) - return -EINVAL; - wqe->read.rdmaop = T3_READ_REQ; - if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) - wqe->read.local_inv = 1; - else - wqe->read.local_inv = 0; - wqe->read.reserved[0] = 0; - wqe->read.reserved[1] = 0; - wqe->read.rem_stag = cpu_to_be32(rdma_wr(wr)->rkey); - wqe->read.rem_to = cpu_to_be64(rdma_wr(wr)->remote_addr); - wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey); - wqe->read.local_len = cpu_to_be32(wr->sg_list[0].length); - wqe->read.local_to = cpu_to_be64(wr->sg_list[0].addr); - *flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3; - return 0; -} - -static int build_memreg(union t3_wr *wqe, const struct ib_reg_wr *wr, - u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) -{ - struct iwch_mr *mhp = to_iwch_mr(wr->mr); - int i; - __be64 *p; - - if (mhp->npages > T3_MAX_FASTREG_DEPTH) - return -EINVAL; - *wr_cnt = 1; - wqe->fastreg.stag = cpu_to_be32(wr->key); - wqe->fastreg.len = cpu_to_be32(mhp->ibmr.length); - wqe->fastreg.va_base_hi = cpu_to_be32(mhp->ibmr.iova >> 32); - wqe->fastreg.va_base_lo_fbo = - cpu_to_be32(mhp->ibmr.iova & 0xffffffff); - wqe->fastreg.page_type_perms = cpu_to_be32( - V_FR_PAGE_COUNT(mhp->npages) | - V_FR_PAGE_SIZE(ilog2(wr->mr->page_size) - 12) | - V_FR_TYPE(TPT_VATO) | - V_FR_PERMS(iwch_ib_to_tpt_access(wr->access))); - p = &wqe->fastreg.pbl_addrs[0]; - for (i = 0; i < mhp->npages; i++, p++) { - - /* If we need a 2nd WR, then set it up */ - if (i == T3_MAX_FASTREG_FRAG) { - *wr_cnt = 2; - wqe = (union t3_wr *)(wq->queue + - Q_PTR2IDX((wq->wptr+1), wq->size_log2)); - build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0, - Q_GENBIT(wq->wptr + 1, wq->size_log2), - 0, 1 + mhp->npages - T3_MAX_FASTREG_FRAG, - T3_EOP); - - p = &wqe->pbl_frag.pbl_addrs[0]; - } - *p = cpu_to_be64((u64)mhp->pages[i]); - } - *flit_cnt = 5 + mhp->npages; - if (*flit_cnt > 15) - *flit_cnt = 15; - return 0; -} - -static int build_inv_stag(union t3_wr *wqe, const struct ib_send_wr *wr, - u8 *flit_cnt) -{ - wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey); - wqe->local_inv.reserved = 0; - *flit_cnt = sizeof(struct t3_local_inv_wr) >> 3; - return 0; -} - -static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list, - u32 num_sgle, u32 * pbl_addr, u8 * page_size) -{ - int i; - struct iwch_mr *mhp; - u64 offset; - for (i = 0; i < num_sgle; i++) { - - mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8); - if (!mhp) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EIO; - } - if (!mhp->attr.state) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EIO; - } - if (mhp->attr.zbva) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EIO; - } - - if (sg_list[i].addr < mhp->attr.va_fbo) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EINVAL; - } - if (sg_list[i].addr + ((u64) sg_list[i].length) < - sg_list[i].addr) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EINVAL; - } - if (sg_list[i].addr + ((u64) sg_list[i].length) > - mhp->attr.va_fbo + ((u64) mhp->attr.len)) { - pr_debug("%s %d\n", __func__, __LINE__); - return -EINVAL; - } - offset = sg_list[i].addr - mhp->attr.va_fbo; - offset += mhp->attr.va_fbo & - ((1UL << (12 + mhp->attr.page_size)) - 1); - pbl_addr[i] = ((mhp->attr.pbl_addr - - rhp->rdev.rnic_info.pbl_base) >> 3) + - (offset >> (12 + mhp->attr.page_size)); - page_size[i] = mhp->attr.page_size; - } - return 0; -} - -static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe, - const struct ib_recv_wr *wr) -{ - int i, err = 0; - u32 pbl_addr[T3_MAX_SGE]; - u8 page_size[T3_MAX_SGE]; - - err = iwch_sgl2pbl_map(qhp->rhp, wr->sg_list, wr->num_sge, pbl_addr, - page_size); - if (err) - return err; - wqe->recv.pagesz[0] = page_size[0]; - wqe->recv.pagesz[1] = page_size[1]; - wqe->recv.pagesz[2] = page_size[2]; - wqe->recv.pagesz[3] = page_size[3]; - wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); - for (i = 0; i < wr->num_sge; i++) { - wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey); - wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); - - /* to in the WQE == the offset into the page */ - wqe->recv.sgl[i].to = cpu_to_be64(((u32)wr->sg_list[i].addr) & - ((1UL << (12 + page_size[i])) - 1)); - - /* pbl_addr is the adapters address in the PBL */ - wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]); - } - for (; i < T3_MAX_SGE; i++) { - wqe->recv.sgl[i].stag = 0; - wqe->recv.sgl[i].len = 0; - wqe->recv.sgl[i].to = 0; - wqe->recv.pbl_addr[i] = 0; - } - qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, - qhp->wq.rq_size_log2)].wr_id = wr->wr_id; - qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, - qhp->wq.rq_size_log2)].pbl_addr = 0; - return 0; -} - -static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe, - const struct ib_recv_wr *wr) -{ - int i; - u32 pbl_addr; - u32 pbl_offset; - - - /* - * The T3 HW requires the PBL in the HW recv descriptor to reference - * a PBL entry. So we allocate the max needed PBL memory here and pass - * it to the uP in the recv WR. The uP will build the PBL and setup - * the HW recv descriptor. - */ - pbl_addr = cxio_hal_pblpool_alloc(&qhp->rhp->rdev, T3_STAG0_PBL_SIZE); - if (!pbl_addr) - return -ENOMEM; - - /* - * Compute the 8B aligned offset. - */ - pbl_offset = (pbl_addr - qhp->rhp->rdev.rnic_info.pbl_base) >> 3; - - wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); - - for (i = 0; i < wr->num_sge; i++) { - - /* - * Use a 128MB page size. This and an imposed 128MB - * sge length limit allows us to require only a 2-entry HW - * PBL for each SGE. This restriction is acceptable since - * since it is not possible to allocate 128MB of contiguous - * DMA coherent memory! - */ - if (wr->sg_list[i].length > T3_STAG0_MAX_PBE_LEN) - return -EINVAL; - wqe->recv.pagesz[i] = T3_STAG0_PAGE_SHIFT; - - /* - * T3 restricts a recv to all zero-stag or all non-zero-stag. - */ - if (wr->sg_list[i].lkey != 0) - return -EINVAL; - wqe->recv.sgl[i].stag = 0; - wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); - wqe->recv.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); - wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_offset); - pbl_offset += 2; - } - for (; i < T3_MAX_SGE; i++) { - wqe->recv.pagesz[i] = 0; - wqe->recv.sgl[i].stag = 0; - wqe->recv.sgl[i].len = 0; - wqe->recv.sgl[i].to = 0; - wqe->recv.pbl_addr[i] = 0; - } - qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, - qhp->wq.rq_size_log2)].wr_id = wr->wr_id; - qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, - qhp->wq.rq_size_log2)].pbl_addr = pbl_addr; - return 0; -} - -int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, - const struct ib_send_wr **bad_wr) -{ - int err = 0; - u8 uninitialized_var(t3_wr_flit_cnt); - enum t3_wr_opcode t3_wr_opcode = 0; - enum t3_wr_flags t3_wr_flags; - struct iwch_qp *qhp; - u32 idx; - union t3_wr *wqe; - u32 num_wrs; - unsigned long flag; - struct t3_swsq *sqp; - int wr_cnt = 1; - - qhp = to_iwch_qp(ibqp); - spin_lock_irqsave(&qhp->lock, flag); - if (qhp->attr.state > IWCH_QP_STATE_RTS) { - spin_unlock_irqrestore(&qhp->lock, flag); - err = -EINVAL; - goto out; - } - num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr, - qhp->wq.sq_size_log2); - if (num_wrs == 0) { - spin_unlock_irqrestore(&qhp->lock, flag); - err = -ENOMEM; - goto out; - } - while (wr) { - if (num_wrs == 0) { - err = -ENOMEM; - break; - } - idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); - wqe = (union t3_wr *) (qhp->wq.queue + idx); - t3_wr_flags = 0; - if (wr->send_flags & IB_SEND_SOLICITED) - t3_wr_flags |= T3_SOLICITED_EVENT_FLAG; - if (wr->send_flags & IB_SEND_SIGNALED) - t3_wr_flags |= T3_COMPLETION_FLAG; - sqp = qhp->wq.sq + - Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2); - switch (wr->opcode) { - case IB_WR_SEND: - case IB_WR_SEND_WITH_INV: - if (wr->send_flags & IB_SEND_FENCE) - t3_wr_flags |= T3_READ_FENCE_FLAG; - t3_wr_opcode = T3_WR_SEND; - err = build_rdma_send(wqe, wr, &t3_wr_flit_cnt); - break; - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: - t3_wr_opcode = T3_WR_WRITE; - err = build_rdma_write(wqe, wr, &t3_wr_flit_cnt); - break; - case IB_WR_RDMA_READ: - case IB_WR_RDMA_READ_WITH_INV: - t3_wr_opcode = T3_WR_READ; - t3_wr_flags = 0; /* T3 reads are always signaled */ - err = build_rdma_read(wqe, wr, &t3_wr_flit_cnt); - if (err) - break; - sqp->read_len = wqe->read.local_len; - if (!qhp->wq.oldest_read) - qhp->wq.oldest_read = sqp; - break; - case IB_WR_REG_MR: - t3_wr_opcode = T3_WR_FASTREG; - err = build_memreg(wqe, reg_wr(wr), &t3_wr_flit_cnt, - &wr_cnt, &qhp->wq); - break; - case IB_WR_LOCAL_INV: - if (wr->send_flags & IB_SEND_FENCE) - t3_wr_flags |= T3_LOCAL_FENCE_FLAG; - t3_wr_opcode = T3_WR_INV_STAG; - err = build_inv_stag(wqe, wr, &t3_wr_flit_cnt); - break; - default: - pr_debug("%s post of type=%d TBD!\n", __func__, - wr->opcode); - err = -EINVAL; - } - if (err) - break; - wqe->send.wrid.id0.hi = qhp->wq.sq_wptr; - sqp->wr_id = wr->wr_id; - sqp->opcode = wr2opcode(t3_wr_opcode); - sqp->sq_wptr = qhp->wq.sq_wptr; - sqp->complete = 0; - sqp->signaled = (wr->send_flags & IB_SEND_SIGNALED); - - build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags, - Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), - 0, t3_wr_flit_cnt, - (wr_cnt == 1) ? T3_SOPEOP : T3_SOP); - pr_debug("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n", - __func__, (unsigned long long)wr->wr_id, idx, - Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2), - sqp->opcode); - wr = wr->next; - num_wrs--; - qhp->wq.wptr += wr_cnt; - ++(qhp->wq.sq_wptr); - } - spin_unlock_irqrestore(&qhp->lock, flag); - if (cxio_wq_db_enabled(&qhp->wq)) - ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); - -out: - if (err) - *bad_wr = wr; - return err; -} - -int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, - const struct ib_recv_wr **bad_wr) -{ - int err = 0; - struct iwch_qp *qhp; - u32 idx; - union t3_wr *wqe; - u32 num_wrs; - unsigned long flag; - - qhp = to_iwch_qp(ibqp); - spin_lock_irqsave(&qhp->lock, flag); - if (qhp->attr.state > IWCH_QP_STATE_RTS) { - spin_unlock_irqrestore(&qhp->lock, flag); - err = -EINVAL; - goto out; - } - num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr, - qhp->wq.rq_size_log2) - 1; - if (!wr) { - spin_unlock_irqrestore(&qhp->lock, flag); - err = -ENOMEM; - goto out; - } - while (wr) { - if (wr->num_sge > T3_MAX_SGE) { - err = -EINVAL; - break; - } - idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); - wqe = (union t3_wr *) (qhp->wq.queue + idx); - if (num_wrs) - if (wr->sg_list[0].lkey) - err = build_rdma_recv(qhp, wqe, wr); - else - err = build_zero_stag_recv(qhp, wqe, wr); - else - err = -ENOMEM; - - if (err) - break; - - build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG, - Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), - 0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP); - pr_debug("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x wqe %p\n", - __func__, (unsigned long long)wr->wr_id, - idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe); - ++(qhp->wq.rq_wptr); - ++(qhp->wq.wptr); - wr = wr->next; - num_wrs--; - } - spin_unlock_irqrestore(&qhp->lock, flag); - if (cxio_wq_db_enabled(&qhp->wq)) - ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); - -out: - if (err) - *bad_wr = wr; - return err; -} - -static inline void build_term_codes(struct respQ_msg_t *rsp_msg, - u8 *layer_type, u8 *ecode) -{ - int status = TPT_ERR_INTERNAL_ERR; - int tagged = 0; - int opcode = -1; - int rqtype = 0; - int send_inv = 0; - - if (rsp_msg) { - status = CQE_STATUS(rsp_msg->cqe); - opcode = CQE_OPCODE(rsp_msg->cqe); - rqtype = RQ_TYPE(rsp_msg->cqe); - send_inv = (opcode == T3_SEND_WITH_INV) || - (opcode == T3_SEND_WITH_SE_INV); - tagged = (opcode == T3_RDMA_WRITE) || - (rqtype && (opcode == T3_READ_RESP)); - } - - switch (status) { - case TPT_ERR_STAG: - if (send_inv) { - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; - *ecode = RDMAP_CANT_INV_STAG; - } else { - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - *ecode = RDMAP_INV_STAG; - } - break; - case TPT_ERR_PDID: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - if ((opcode == T3_SEND_WITH_INV) || - (opcode == T3_SEND_WITH_SE_INV)) - *ecode = RDMAP_CANT_INV_STAG; - else - *ecode = RDMAP_STAG_NOT_ASSOC; - break; - case TPT_ERR_QPID: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - *ecode = RDMAP_STAG_NOT_ASSOC; - break; - case TPT_ERR_ACCESS: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - *ecode = RDMAP_ACC_VIOL; - break; - case TPT_ERR_WRAP: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - *ecode = RDMAP_TO_WRAP; - break; - case TPT_ERR_BOUND: - if (tagged) { - *layer_type = LAYER_DDP|DDP_TAGGED_ERR; - *ecode = DDPT_BASE_BOUNDS; - } else { - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; - *ecode = RDMAP_BASE_BOUNDS; - } - break; - case TPT_ERR_INVALIDATE_SHARED_MR: - case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; - *ecode = RDMAP_CANT_INV_STAG; - break; - case TPT_ERR_ECC: - case TPT_ERR_ECC_PSTAG: - case TPT_ERR_INTERNAL_ERR: - *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA; - *ecode = 0; - break; - case TPT_ERR_OUT_OF_RQE: - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_INV_MSN_NOBUF; - break; - case TPT_ERR_PBL_ADDR_BOUND: - *layer_type = LAYER_DDP|DDP_TAGGED_ERR; - *ecode = DDPT_BASE_BOUNDS; - break; - case TPT_ERR_CRC: - *layer_type = LAYER_MPA|DDP_LLP; - *ecode = MPA_CRC_ERR; - break; - case TPT_ERR_MARKER: - *layer_type = LAYER_MPA|DDP_LLP; - *ecode = MPA_MARKER_ERR; - break; - case TPT_ERR_PDU_LEN_ERR: - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_MSG_TOOBIG; - break; - case TPT_ERR_DDP_VERSION: - if (tagged) { - *layer_type = LAYER_DDP|DDP_TAGGED_ERR; - *ecode = DDPT_INV_VERS; - } else { - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_INV_VERS; - } - break; - case TPT_ERR_RDMA_VERSION: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; - *ecode = RDMAP_INV_VERS; - break; - case TPT_ERR_OPCODE: - *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; - *ecode = RDMAP_INV_OPCODE; - break; - case TPT_ERR_DDP_QUEUE_NUM: - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_INV_QN; - break; - case TPT_ERR_MSN: - case TPT_ERR_MSN_GAP: - case TPT_ERR_MSN_RANGE: - case TPT_ERR_IRD_OVERFLOW: - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_INV_MSN_RANGE; - break; - case TPT_ERR_TBIT: - *layer_type = LAYER_DDP|DDP_LOCAL_CATA; - *ecode = 0; - break; - case TPT_ERR_MO: - *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; - *ecode = DDPU_INV_MO; - break; - default: - *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA; - *ecode = 0; - break; - } -} - -int iwch_post_zb_read(struct iwch_ep *ep) -{ - union t3_wr *wqe; - struct sk_buff *skb; - u8 flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3; - - pr_debug("%s enter\n", __func__); - skb = alloc_skb(40, GFP_KERNEL); - if (!skb) { - pr_err("%s cannot send zb_read!!\n", __func__); - return -ENOMEM; - } - wqe = skb_put_zero(skb, sizeof(struct t3_rdma_read_wr)); - wqe->read.rdmaop = T3_READ_REQ; - wqe->read.reserved[0] = 0; - wqe->read.reserved[1] = 0; - wqe->read.rem_stag = cpu_to_be32(1); - wqe->read.rem_to = cpu_to_be64(1); - wqe->read.local_stag = cpu_to_be32(1); - wqe->read.local_len = cpu_to_be32(0); - wqe->read.local_to = cpu_to_be64(1); - wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_READ)); - wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(ep->hwtid)| - V_FW_RIWR_LEN(flit_cnt)); - skb->priority = CPL_PRIORITY_DATA; - return iwch_cxgb3_ofld_send(ep->com.qp->rhp->rdev.t3cdev_p, skb); -} - -/* - * This posts a TERMINATE with layer=RDMA, type=catastrophic. - */ -int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg) -{ - union t3_wr *wqe; - struct terminate_message *term; - struct sk_buff *skb; - - pr_debug("%s %d\n", __func__, __LINE__); - skb = alloc_skb(40, GFP_ATOMIC); - if (!skb) { - pr_err("%s cannot send TERMINATE!\n", __func__); - return -ENOMEM; - } - wqe = skb_put_zero(skb, 40); - wqe->send.rdmaop = T3_TERMINATE; - - /* immediate data length */ - wqe->send.plen = htonl(4); - - /* immediate data starts here. */ - term = (struct terminate_message *)wqe->send.sgl; - build_term_codes(rsp_msg, &term->layer_etype, &term->ecode); - wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_SEND) | - V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG)); - wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid)); - skb->priority = CPL_PRIORITY_DATA; - return iwch_cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb); -} - -/* - * Assumes qhp lock is held. - */ -static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp, - struct iwch_cq *schp) - __releases(&qhp->lock) - __acquires(&qhp->lock) -{ - int count; - int flushed; - - lockdep_assert_held(&qhp->lock); - - pr_debug("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp); - /* take a ref on the qhp since we must release the lock */ - atomic_inc(&qhp->refcnt); - spin_unlock(&qhp->lock); - - /* locking hierarchy: cq lock first, then qp lock. */ - spin_lock(&rchp->lock); - spin_lock(&qhp->lock); - cxio_flush_hw_cq(&rchp->cq); - cxio_count_rcqes(&rchp->cq, &qhp->wq, &count); - flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count); - spin_unlock(&qhp->lock); - spin_unlock(&rchp->lock); - if (flushed) { - spin_lock(&rchp->comp_handler_lock); - (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); - spin_unlock(&rchp->comp_handler_lock); - } - - /* locking hierarchy: cq lock first, then qp lock. */ - spin_lock(&schp->lock); - spin_lock(&qhp->lock); - cxio_flush_hw_cq(&schp->cq); - cxio_count_scqes(&schp->cq, &qhp->wq, &count); - flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count); - spin_unlock(&qhp->lock); - spin_unlock(&schp->lock); - if (flushed) { - spin_lock(&schp->comp_handler_lock); - (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); - spin_unlock(&schp->comp_handler_lock); - } - - /* deref */ - if (atomic_dec_and_test(&qhp->refcnt)) - wake_up(&qhp->wait); - - spin_lock(&qhp->lock); -} - -static void flush_qp(struct iwch_qp *qhp) -{ - struct iwch_cq *rchp, *schp; - - rchp = get_chp(qhp->rhp, qhp->attr.rcq); - schp = get_chp(qhp->rhp, qhp->attr.scq); - - if (qhp->ibqp.uobject) { - cxio_set_wq_in_error(&qhp->wq); - cxio_set_cq_in_error(&rchp->cq); - spin_lock(&rchp->comp_handler_lock); - (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); - spin_unlock(&rchp->comp_handler_lock); - if (schp != rchp) { - cxio_set_cq_in_error(&schp->cq); - spin_lock(&schp->comp_handler_lock); - (*schp->ibcq.comp_handler)(&schp->ibcq, - schp->ibcq.cq_context); - spin_unlock(&schp->comp_handler_lock); - } - return; - } - __flush_qp(qhp, rchp, schp); -} - - -/* - * Return count of RECV WRs posted - */ -u16 iwch_rqes_posted(struct iwch_qp *qhp) -{ - union t3_wr *wqe = qhp->wq.queue; - u16 count = 0; - - while (count < USHRT_MAX && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) { - count++; - wqe++; - } - pr_debug("%s qhp %p count %u\n", __func__, qhp, count); - return count; -} - -static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, - enum iwch_qp_attr_mask mask, - struct iwch_qp_attributes *attrs) -{ - struct t3_rdma_init_attr init_attr; - int ret; - - init_attr.tid = qhp->ep->hwtid; - init_attr.qpid = qhp->wq.qpid; - init_attr.pdid = qhp->attr.pd; - init_attr.scqid = qhp->attr.scq; - init_attr.rcqid = qhp->attr.rcq; - init_attr.rq_addr = qhp->wq.rq_addr; - init_attr.rq_size = 1 << qhp->wq.rq_size_log2; - init_attr.mpaattrs = uP_RI_MPA_IETF_ENABLE | - qhp->attr.mpa_attr.recv_marker_enabled | - (qhp->attr.mpa_attr.xmit_marker_enabled << 1) | - (qhp->attr.mpa_attr.crc_enabled << 2); - - init_attr.qpcaps = uP_RI_QP_RDMA_READ_ENABLE | - uP_RI_QP_RDMA_WRITE_ENABLE | - uP_RI_QP_BIND_ENABLE; - if (!qhp->ibqp.uobject) - init_attr.qpcaps |= uP_RI_QP_STAG0_ENABLE | - uP_RI_QP_FAST_REGISTER_ENABLE; - - init_attr.tcp_emss = qhp->ep->emss; - init_attr.ord = qhp->attr.max_ord; - init_attr.ird = qhp->attr.max_ird; - init_attr.qp_dma_addr = qhp->wq.dma_addr; - init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); - init_attr.rqe_count = iwch_rqes_posted(qhp); - init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0; - init_attr.chan = qhp->ep->l2t->smt_idx; - if (peer2peer) { - init_attr.rtr_type = RTR_READ; - if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator) - init_attr.ord = 1; - if (init_attr.ird == 0 && !qhp->attr.mpa_attr.initiator) - init_attr.ird = 1; - } else - init_attr.rtr_type = 0; - init_attr.irs = qhp->ep->rcv_seq; - pr_debug("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d flags 0x%x qpcaps 0x%x\n", - __func__, - init_attr.rq_addr, init_attr.rq_size, - init_attr.flags, init_attr.qpcaps); - ret = cxio_rdma_init(&rhp->rdev, &init_attr); - pr_debug("%s ret %d\n", __func__, ret); - return ret; -} - -int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, - enum iwch_qp_attr_mask mask, - struct iwch_qp_attributes *attrs, - int internal) -{ - int ret = 0; - struct iwch_qp_attributes newattr = qhp->attr; - unsigned long flag; - int disconnect = 0; - int terminate = 0; - int abort = 0; - int free = 0; - struct iwch_ep *ep = NULL; - - pr_debug("%s qhp %p qpid 0x%x ep %p state %d -> %d\n", __func__, - qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state, - (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1); - - spin_lock_irqsave(&qhp->lock, flag); - - /* Process attr changes if in IDLE */ - if (mask & IWCH_QP_ATTR_VALID_MODIFY) { - if (qhp->attr.state != IWCH_QP_STATE_IDLE) { - ret = -EIO; - goto out; - } - if (mask & IWCH_QP_ATTR_ENABLE_RDMA_READ) - newattr.enable_rdma_read = attrs->enable_rdma_read; - if (mask & IWCH_QP_ATTR_ENABLE_RDMA_WRITE) - newattr.enable_rdma_write = attrs->enable_rdma_write; - if (mask & IWCH_QP_ATTR_ENABLE_RDMA_BIND) - newattr.enable_bind = attrs->enable_bind; - if (mask & IWCH_QP_ATTR_MAX_ORD) { - if (attrs->max_ord > - rhp->attr.max_rdma_read_qp_depth) { - ret = -EINVAL; - goto out; - } - newattr.max_ord = attrs->max_ord; - } - if (mask & IWCH_QP_ATTR_MAX_IRD) { - if (attrs->max_ird > - rhp->attr.max_rdma_reads_per_qp) { - ret = -EINVAL; - goto out; - } - newattr.max_ird = attrs->max_ird; - } - qhp->attr = newattr; - } - - if (!(mask & IWCH_QP_ATTR_NEXT_STATE)) - goto out; - if (qhp->attr.state == attrs->next_state) - goto out; - - switch (qhp->attr.state) { - case IWCH_QP_STATE_IDLE: - switch (attrs->next_state) { - case IWCH_QP_STATE_RTS: - if (!(mask & IWCH_QP_ATTR_LLP_STREAM_HANDLE)) { - ret = -EINVAL; - goto out; - } - if (!(mask & IWCH_QP_ATTR_MPA_ATTR)) { - ret = -EINVAL; - goto out; - } - qhp->attr.mpa_attr = attrs->mpa_attr; - qhp->attr.llp_stream_handle = attrs->llp_stream_handle; - qhp->ep = qhp->attr.llp_stream_handle; - qhp->attr.state = IWCH_QP_STATE_RTS; - - /* - * Ref the endpoint here and deref when we - * disassociate the endpoint from the QP. This - * happens in CLOSING->IDLE transition or *->ERROR - * transition. - */ - get_ep(&qhp->ep->com); - spin_unlock_irqrestore(&qhp->lock, flag); - ret = rdma_init(rhp, qhp, mask, attrs); - spin_lock_irqsave(&qhp->lock, flag); - if (ret) - goto err; - break; - case IWCH_QP_STATE_ERROR: - qhp->attr.state = IWCH_QP_STATE_ERROR; - flush_qp(qhp); - break; - default: - ret = -EINVAL; - goto out; - } - break; - case IWCH_QP_STATE_RTS: - switch (attrs->next_state) { - case IWCH_QP_STATE_CLOSING: - BUG_ON(kref_read(&qhp->ep->com.kref) < 2); - qhp->attr.state = IWCH_QP_STATE_CLOSING; - if (!internal) { - abort=0; - disconnect = 1; - ep = qhp->ep; - get_ep(&ep->com); - } - break; - case IWCH_QP_STATE_TERMINATE: - qhp->attr.state = IWCH_QP_STATE_TERMINATE; - if (qhp->ibqp.uobject) - cxio_set_wq_in_error(&qhp->wq); - if (!internal) - terminate = 1; - break; - case IWCH_QP_STATE_ERROR: - qhp->attr.state = IWCH_QP_STATE_ERROR; - if (!internal) { - abort=1; - disconnect = 1; - ep = qhp->ep; - get_ep(&ep->com); - } - goto err; - break; - default: - ret = -EINVAL; - goto out; - } - break; - case IWCH_QP_STATE_CLOSING: - if (!internal) { - ret = -EINVAL; - goto out; - } - switch (attrs->next_state) { - case IWCH_QP_STATE_IDLE: - flush_qp(qhp); - qhp->attr.state = IWCH_QP_STATE_IDLE; - qhp->attr.llp_stream_handle = NULL; - put_ep(&qhp->ep->com); - qhp->ep = NULL; - wake_up(&qhp->wait); - break; - case IWCH_QP_STATE_ERROR: - goto err; - default: - ret = -EINVAL; - goto err; - } - break; - case IWCH_QP_STATE_ERROR: - if (attrs->next_state != IWCH_QP_STATE_IDLE) { - ret = -EINVAL; - goto out; - } - - if (!Q_EMPTY(qhp->wq.sq_rptr, qhp->wq.sq_wptr) || - !Q_EMPTY(qhp->wq.rq_rptr, qhp->wq.rq_wptr)) { - ret = -EINVAL; - goto out; - } - qhp->attr.state = IWCH_QP_STATE_IDLE; - break; - case IWCH_QP_STATE_TERMINATE: - if (!internal) { - ret = -EINVAL; - goto out; - } - goto err; - break; - default: - pr_err("%s in a bad state %d\n", __func__, qhp->attr.state); - ret = -EINVAL; - goto err; - break; - } - goto out; -err: - pr_debug("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep, - qhp->wq.qpid); - - /* disassociate the LLP connection */ - qhp->attr.llp_stream_handle = NULL; - ep = qhp->ep; - qhp->ep = NULL; - qhp->attr.state = IWCH_QP_STATE_ERROR; - free=1; - wake_up(&qhp->wait); - BUG_ON(!ep); - flush_qp(qhp); -out: - spin_unlock_irqrestore(&qhp->lock, flag); - - if (terminate) - iwch_post_terminate(qhp, NULL); - - /* - * If disconnect is 1, then we need to initiate a disconnect - * on the EP. This can be a normal close (RTS->CLOSING) or - * an abnormal close (RTS/CLOSING->ERROR). - */ - if (disconnect) { - iwch_ep_disconnect(ep, abort, GFP_KERNEL); - put_ep(&ep->com); - } - - /* - * If free is 1, then we've disassociated the EP from the QP - * and we need to dereference the EP. - */ - if (free) - put_ep(&ep->com); - - pr_debug("%s exit state %d\n", __func__, qhp->attr.state); - return ret; -} diff --git a/drivers/infiniband/hw/cxgb3/tcb.h b/drivers/infiniband/hw/cxgb3/tcb.h deleted file mode 100644 index c702dc199e18..000000000000 --- a/drivers/infiniband/hw/cxgb3/tcb.h +++ /dev/null @@ -1,632 +0,0 @@ -/* - * Copyright (c) 2007 Chelsio, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef _TCB_DEFS_H -#define _TCB_DEFS_H - -#define W_TCB_T_STATE 0 -#define S_TCB_T_STATE 0 -#define M_TCB_T_STATE 0xfULL -#define V_TCB_T_STATE(x) ((x) << S_TCB_T_STATE) - -#define W_TCB_TIMER 0 -#define S_TCB_TIMER 4 -#define M_TCB_TIMER 0x1ULL -#define V_TCB_TIMER(x) ((x) << S_TCB_TIMER) - -#define W_TCB_DACK_TIMER 0 -#define S_TCB_DACK_TIMER 5 -#define M_TCB_DACK_TIMER 0x1ULL -#define V_TCB_DACK_TIMER(x) ((x) << S_TCB_DACK_TIMER) - -#define W_TCB_DEL_FLAG 0 -#define S_TCB_DEL_FLAG 6 -#define M_TCB_DEL_FLAG 0x1ULL -#define V_TCB_DEL_FLAG(x) ((x) << S_TCB_DEL_FLAG) - -#define W_TCB_L2T_IX 0 -#define S_TCB_L2T_IX 7 -#define M_TCB_L2T_IX 0x7ffULL -#define V_TCB_L2T_IX(x) ((x) << S_TCB_L2T_IX) - -#define W_TCB_SMAC_SEL 0 -#define S_TCB_SMAC_SEL 18 -#define M_TCB_SMAC_SEL 0x3ULL -#define V_TCB_SMAC_SEL(x) ((x) << S_TCB_SMAC_SEL) - -#define W_TCB_TOS 0 -#define S_TCB_TOS 20 -#define M_TCB_TOS 0x3fULL -#define V_TCB_TOS(x) ((x) << S_TCB_TOS) - -#define W_TCB_MAX_RT 0 -#define S_TCB_MAX_RT 26 -#define M_TCB_MAX_RT 0xfULL -#define V_TCB_MAX_RT(x) ((x) << S_TCB_MAX_RT) - -#define W_TCB_T_RXTSHIFT 0 -#define S_TCB_T_RXTSHIFT 30 -#define M_TCB_T_RXTSHIFT 0xfULL -#define V_TCB_T_RXTSHIFT(x) ((x) << S_TCB_T_RXTSHIFT) - -#define W_TCB_T_DUPACKS 1 -#define S_TCB_T_DUPACKS 2 -#define M_TCB_T_DUPACKS 0xfULL -#define V_TCB_T_DUPACKS(x) ((x) << S_TCB_T_DUPACKS) - -#define W_TCB_T_MAXSEG 1 -#define S_TCB_T_MAXSEG 6 -#define M_TCB_T_MAXSEG 0xfULL -#define V_TCB_T_MAXSEG(x) ((x) << S_TCB_T_MAXSEG) - -#define W_TCB_T_FLAGS1 1 -#define S_TCB_T_FLAGS1 10 -#define M_TCB_T_FLAGS1 0xffffffffULL -#define V_TCB_T_FLAGS1(x) ((x) << S_TCB_T_FLAGS1) - -#define W_TCB_T_MIGRATION 1 -#define S_TCB_T_MIGRATION 20 -#define M_TCB_T_MIGRATION 0x1ULL -#define V_TCB_T_MIGRATION(x) ((x) << S_TCB_T_MIGRATION) - -#define W_TCB_T_FLAGS2 2 -#define S_TCB_T_FLAGS2 10 -#define M_TCB_T_FLAGS2 0x7fULL -#define V_TCB_T_FLAGS2(x) ((x) << S_TCB_T_FLAGS2) - -#define W_TCB_SND_SCALE 2 -#define S_TCB_SND_SCALE 17 -#define M_TCB_SND_SCALE 0xfULL -#define V_TCB_SND_SCALE(x) ((x) << S_TCB_SND_SCALE) - -#define W_TCB_RCV_SCALE 2 -#define S_TCB_RCV_SCALE 21 -#define M_TCB_RCV_SCALE 0xfULL -#define V_TCB_RCV_SCALE(x) ((x) << S_TCB_RCV_SCALE) - -#define W_TCB_SND_UNA_RAW 2 -#define S_TCB_SND_UNA_RAW 25 -#define M_TCB_SND_UNA_RAW 0x7ffffffULL -#define V_TCB_SND_UNA_RAW(x) ((x) << S_TCB_SND_UNA_RAW) - -#define W_TCB_SND_NXT_RAW 3 -#define S_TCB_SND_NXT_RAW 20 -#define M_TCB_SND_NXT_RAW 0x7ffffffULL -#define V_TCB_SND_NXT_RAW(x) ((x) << S_TCB_SND_NXT_RAW) - -#define W_TCB_RCV_NXT 4 -#define S_TCB_RCV_NXT 15 -#define M_TCB_RCV_NXT 0xffffffffULL -#define V_TCB_RCV_NXT(x) ((x) << S_TCB_RCV_NXT) - -#define W_TCB_RCV_ADV 5 -#define S_TCB_RCV_ADV 15 -#define M_TCB_RCV_ADV 0xffffULL -#define V_TCB_RCV_ADV(x) ((x) << S_TCB_RCV_ADV) - -#define W_TCB_SND_MAX_RAW 5 -#define S_TCB_SND_MAX_RAW 31 -#define M_TCB_SND_MAX_RAW 0x7ffffffULL -#define V_TCB_SND_MAX_RAW(x) ((x) << S_TCB_SND_MAX_RAW) - -#define W_TCB_SND_CWND 6 -#define S_TCB_SND_CWND 26 -#define M_TCB_SND_CWND 0x7ffffffULL -#define V_TCB_SND_CWND(x) ((x) << S_TCB_SND_CWND) - -#define W_TCB_SND_SSTHRESH 7 -#define S_TCB_SND_SSTHRESH 21 -#define M_TCB_SND_SSTHRESH 0x7ffffffULL -#define V_TCB_SND_SSTHRESH(x) ((x) << S_TCB_SND_SSTHRESH) - -#define W_TCB_T_RTT_TS_RECENT_AGE 8 -#define S_TCB_T_RTT_TS_RECENT_AGE 16 -#define M_TCB_T_RTT_TS_RECENT_AGE 0xffffffffULL -#define V_TCB_T_RTT_TS_RECENT_AGE(x) ((x) << S_TCB_T_RTT_TS_RECENT_AGE) - -#define W_TCB_T_RTSEQ_RECENT 9 -#define S_TCB_T_RTSEQ_RECENT 16 -#define M_TCB_T_RTSEQ_RECENT 0xffffffffULL -#define V_TCB_T_RTSEQ_RECENT(x) ((x) << S_TCB_T_RTSEQ_RECENT) - -#define W_TCB_T_SRTT 10 -#define S_TCB_T_SRTT 16 -#define M_TCB_T_SRTT 0xffffULL -#define V_TCB_T_SRTT(x) ((x) << S_TCB_T_SRTT) - -#define W_TCB_T_RTTVAR 11 -#define S_TCB_T_RTTVAR 0 -#define M_TCB_T_RTTVAR 0xffffULL -#define V_TCB_T_RTTVAR(x) ((x) << S_TCB_T_RTTVAR) - -#define W_TCB_TS_LAST_ACK_SENT_RAW 11 -#define S_TCB_TS_LAST_ACK_SENT_RAW 16 -#define M_TCB_TS_LAST_ACK_SENT_RAW 0x7ffffffULL -#define V_TCB_TS_LAST_ACK_SENT_RAW(x) ((x) << S_TCB_TS_LAST_ACK_SENT_RAW) - -#define W_TCB_DIP 12 -#define S_TCB_DIP 11 -#define M_TCB_DIP 0xffffffffULL -#define V_TCB_DIP(x) ((x) << S_TCB_DIP) - -#define W_TCB_SIP 13 -#define S_TCB_SIP 11 -#define M_TCB_SIP 0xffffffffULL -#define V_TCB_SIP(x) ((x) << S_TCB_SIP) - -#define W_TCB_DP 14 -#define S_TCB_DP 11 -#define M_TCB_DP 0xffffULL -#define V_TCB_DP(x) ((x) << S_TCB_DP) - -#define W_TCB_SP 14 -#define S_TCB_SP 27 -#define M_TCB_SP 0xffffULL -#define V_TCB_SP(x) ((x) << S_TCB_SP) - -#define W_TCB_TIMESTAMP 15 -#define S_TCB_TIMESTAMP 11 -#define M_TCB_TIMESTAMP 0xffffffffULL -#define V_TCB_TIMESTAMP(x) ((x) << S_TCB_TIMESTAMP) - -#define W_TCB_TIMESTAMP_OFFSET 16 -#define S_TCB_TIMESTAMP_OFFSET 11 -#define M_TCB_TIMESTAMP_OFFSET 0xfULL -#define V_TCB_TIMESTAMP_OFFSET(x) ((x) << S_TCB_TIMESTAMP_OFFSET) - -#define W_TCB_TX_MAX 16 -#define S_TCB_TX_MAX 15 -#define M_TCB_TX_MAX 0xffffffffULL -#define V_TCB_TX_MAX(x) ((x) << S_TCB_TX_MAX) - -#define W_TCB_TX_HDR_PTR_RAW 17 -#define S_TCB_TX_HDR_PTR_RAW 15 -#define M_TCB_TX_HDR_PTR_RAW 0x1ffffULL -#define V_TCB_TX_HDR_PTR_RAW(x) ((x) << S_TCB_TX_HDR_PTR_RAW) - -#define W_TCB_TX_LAST_PTR_RAW 18 -#define S_TCB_TX_LAST_PTR_RAW 0 -#define M_TCB_TX_LAST_PTR_RAW 0x1ffffULL -#define V_TCB_TX_LAST_PTR_RAW(x) ((x) << S_TCB_TX_LAST_PTR_RAW) - -#define W_TCB_TX_COMPACT 18 -#define S_TCB_TX_COMPACT 17 -#define M_TCB_TX_COMPACT 0x1ULL -#define V_TCB_TX_COMPACT(x) ((x) << S_TCB_TX_COMPACT) - -#define W_TCB_RX_COMPACT 18 -#define S_TCB_RX_COMPACT 18 -#define M_TCB_RX_COMPACT 0x1ULL -#define V_TCB_RX_COMPACT(x) ((x) << S_TCB_RX_COMPACT) - -#define W_TCB_RCV_WND 18 -#define S_TCB_RCV_WND 19 -#define M_TCB_RCV_WND 0x7ffffffULL -#define V_TCB_RCV_WND(x) ((x) << S_TCB_RCV_WND) - -#define W_TCB_RX_HDR_OFFSET 19 -#define S_TCB_RX_HDR_OFFSET 14 -#define M_TCB_RX_HDR_OFFSET 0x7ffffffULL -#define V_TCB_RX_HDR_OFFSET(x) ((x) << S_TCB_RX_HDR_OFFSET) - -#define W_TCB_RX_FRAG0_START_IDX_RAW 20 -#define S_TCB_RX_FRAG0_START_IDX_RAW 9 -#define M_TCB_RX_FRAG0_START_IDX_RAW 0x7ffffffULL -#define V_TCB_RX_FRAG0_START_IDX_RAW(x) ((x) << S_TCB_RX_FRAG0_START_IDX_RAW) - -#define W_TCB_RX_FRAG1_START_IDX_OFFSET 21 -#define S_TCB_RX_FRAG1_START_IDX_OFFSET 4 -#define M_TCB_RX_FRAG1_START_IDX_OFFSET 0x7ffffffULL -#define V_TCB_RX_FRAG1_START_IDX_OFFSET(x) ((x) << S_TCB_RX_FRAG1_START_IDX_OFFSET) - -#define W_TCB_RX_FRAG0_LEN 21 -#define S_TCB_RX_FRAG0_LEN 31 -#define M_TCB_RX_FRAG0_LEN 0x7ffffffULL -#define V_TCB_RX_FRAG0_LEN(x) ((x) << S_TCB_RX_FRAG0_LEN) - -#define W_TCB_RX_FRAG1_LEN 22 -#define S_TCB_RX_FRAG1_LEN 26 -#define M_TCB_RX_FRAG1_LEN 0x7ffffffULL -#define V_TCB_RX_FRAG1_LEN(x) ((x) << S_TCB_RX_FRAG1_LEN) - -#define W_TCB_NEWRENO_RECOVER 23 -#define S_TCB_NEWRENO_RECOVER 21 -#define M_TCB_NEWRENO_RECOVER 0x7ffffffULL -#define V_TCB_NEWRENO_RECOVER(x) ((x) << S_TCB_NEWRENO_RECOVER) - -#define W_TCB_PDU_HAVE_LEN 24 -#define S_TCB_PDU_HAVE_LEN 16 -#define M_TCB_PDU_HAVE_LEN 0x1ULL -#define V_TCB_PDU_HAVE_LEN(x) ((x) << S_TCB_PDU_HAVE_LEN) - -#define W_TCB_PDU_LEN 24 -#define S_TCB_PDU_LEN 17 -#define M_TCB_PDU_LEN 0xffffULL -#define V_TCB_PDU_LEN(x) ((x) << S_TCB_PDU_LEN) - -#define W_TCB_RX_QUIESCE 25 -#define S_TCB_RX_QUIESCE 1 -#define M_TCB_RX_QUIESCE 0x1ULL -#define V_TCB_RX_QUIESCE(x) ((x) << S_TCB_RX_QUIESCE) - -#define W_TCB_RX_PTR_RAW 25 -#define S_TCB_RX_PTR_RAW 2 -#define M_TCB_RX_PTR_RAW 0x1ffffULL -#define V_TCB_RX_PTR_RAW(x) ((x) << S_TCB_RX_PTR_RAW) - -#define W_TCB_CPU_NO 25 -#define S_TCB_CPU_NO 19 -#define M_TCB_CPU_NO 0x7fULL -#define V_TCB_CPU_NO(x) ((x) << S_TCB_CPU_NO) - -#define W_TCB_ULP_TYPE 25 -#define S_TCB_ULP_TYPE 26 -#define M_TCB_ULP_TYPE 0xfULL -#define V_TCB_ULP_TYPE(x) ((x) << S_TCB_ULP_TYPE) - -#define W_TCB_RX_FRAG1_PTR_RAW 25 -#define S_TCB_RX_FRAG1_PTR_RAW 30 -#define M_TCB_RX_FRAG1_PTR_RAW 0x1ffffULL -#define V_TCB_RX_FRAG1_PTR_RAW(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW) - -#define W_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 26 -#define S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 15 -#define M_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 0x7ffffffULL -#define V_TCB_RX_FRAG2_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW) - -#define W_TCB_RX_FRAG2_PTR_RAW 27 -#define S_TCB_RX_FRAG2_PTR_RAW 10 -#define M_TCB_RX_FRAG2_PTR_RAW 0x1ffffULL -#define V_TCB_RX_FRAG2_PTR_RAW(x) ((x) << S_TCB_RX_FRAG2_PTR_RAW) - -#define W_TCB_RX_FRAG2_LEN_RAW 27 -#define S_TCB_RX_FRAG2_LEN_RAW 27 -#define M_TCB_RX_FRAG2_LEN_RAW 0x7ffffffULL -#define V_TCB_RX_FRAG2_LEN_RAW(x) ((x) << S_TCB_RX_FRAG2_LEN_RAW) - -#define W_TCB_RX_FRAG3_PTR_RAW 28 -#define S_TCB_RX_FRAG3_PTR_RAW 22 -#define M_TCB_RX_FRAG3_PTR_RAW 0x1ffffULL -#define V_TCB_RX_FRAG3_PTR_RAW(x) ((x) << S_TCB_RX_FRAG3_PTR_RAW) - -#define W_TCB_RX_FRAG3_LEN_RAW 29 -#define S_TCB_RX_FRAG3_LEN_RAW 7 -#define M_TCB_RX_FRAG3_LEN_RAW 0x7ffffffULL -#define V_TCB_RX_FRAG3_LEN_RAW(x) ((x) << S_TCB_RX_FRAG3_LEN_RAW) - -#define W_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 30 -#define S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 2 -#define M_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 0x7ffffffULL -#define V_TCB_RX_FRAG3_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW) - -#define W_TCB_PDU_HDR_LEN 30 -#define S_TCB_PDU_HDR_LEN 29 -#define M_TCB_PDU_HDR_LEN 0xffULL -#define V_TCB_PDU_HDR_LEN(x) ((x) << S_TCB_PDU_HDR_LEN) - -#define W_TCB_SLUSH1 31 -#define S_TCB_SLUSH1 5 -#define M_TCB_SLUSH1 0x7ffffULL -#define V_TCB_SLUSH1(x) ((x) << S_TCB_SLUSH1) - -#define W_TCB_ULP_RAW 31 -#define S_TCB_ULP_RAW 24 -#define M_TCB_ULP_RAW 0xffULL -#define V_TCB_ULP_RAW(x) ((x) << S_TCB_ULP_RAW) - -#define W_TCB_DDP_RDMAP_VERSION 25 -#define S_TCB_DDP_RDMAP_VERSION 30 -#define M_TCB_DDP_RDMAP_VERSION 0x1ULL -#define V_TCB_DDP_RDMAP_VERSION(x) ((x) << S_TCB_DDP_RDMAP_VERSION) - -#define W_TCB_MARKER_ENABLE_RX 25 -#define S_TCB_MARKER_ENABLE_RX 31 -#define M_TCB_MARKER_ENABLE_RX 0x1ULL -#define V_TCB_MARKER_ENABLE_RX(x) ((x) << S_TCB_MARKER_ENABLE_RX) - -#define W_TCB_MARKER_ENABLE_TX 26 -#define S_TCB_MARKER_ENABLE_TX 0 -#define M_TCB_MARKER_ENABLE_TX 0x1ULL -#define V_TCB_MARKER_ENABLE_TX(x) ((x) << S_TCB_MARKER_ENABLE_TX) - -#define W_TCB_CRC_ENABLE 26 -#define S_TCB_CRC_ENABLE 1 -#define M_TCB_CRC_ENABLE 0x1ULL -#define V_TCB_CRC_ENABLE(x) ((x) << S_TCB_CRC_ENABLE) - -#define W_TCB_IRS_ULP 26 -#define S_TCB_IRS_ULP 2 -#define M_TCB_IRS_ULP 0x1ffULL -#define V_TCB_IRS_ULP(x) ((x) << S_TCB_IRS_ULP) - -#define W_TCB_ISS_ULP 26 -#define S_TCB_ISS_ULP 11 -#define M_TCB_ISS_ULP 0x1ffULL -#define V_TCB_ISS_ULP(x) ((x) << S_TCB_ISS_ULP) - -#define W_TCB_TX_PDU_LEN 26 -#define S_TCB_TX_PDU_LEN 20 -#define M_TCB_TX_PDU_LEN 0x3fffULL -#define V_TCB_TX_PDU_LEN(x) ((x) << S_TCB_TX_PDU_LEN) - -#define W_TCB_TX_PDU_OUT 27 -#define S_TCB_TX_PDU_OUT 2 -#define M_TCB_TX_PDU_OUT 0x1ULL -#define V_TCB_TX_PDU_OUT(x) ((x) << S_TCB_TX_PDU_OUT) - -#define W_TCB_CQ_IDX_SQ 27 -#define S_TCB_CQ_IDX_SQ 3 -#define M_TCB_CQ_IDX_SQ 0xffffULL -#define V_TCB_CQ_IDX_SQ(x) ((x) << S_TCB_CQ_IDX_SQ) - -#define W_TCB_CQ_IDX_RQ 27 -#define S_TCB_CQ_IDX_RQ 19 -#define M_TCB_CQ_IDX_RQ 0xffffULL -#define V_TCB_CQ_IDX_RQ(x) ((x) << S_TCB_CQ_IDX_RQ) - -#define W_TCB_QP_ID 28 -#define S_TCB_QP_ID 3 -#define M_TCB_QP_ID 0xffffULL -#define V_TCB_QP_ID(x) ((x) << S_TCB_QP_ID) - -#define W_TCB_PD_ID 28 -#define S_TCB_PD_ID 19 -#define M_TCB_PD_ID 0xffffULL -#define V_TCB_PD_ID(x) ((x) << S_TCB_PD_ID) - -#define W_TCB_STAG 29 -#define S_TCB_STAG 3 -#define M_TCB_STAG 0xffffffffULL -#define V_TCB_STAG(x) ((x) << S_TCB_STAG) - -#define W_TCB_RQ_START 30 -#define S_TCB_RQ_START 3 -#define M_TCB_RQ_START 0x3ffffffULL -#define V_TCB_RQ_START(x) ((x) << S_TCB_RQ_START) - -#define W_TCB_RQ_MSN 30 -#define S_TCB_RQ_MSN 29 -#define M_TCB_RQ_MSN 0x3ffULL -#define V_TCB_RQ_MSN(x) ((x) << S_TCB_RQ_MSN) - -#define W_TCB_RQ_MAX_OFFSET 31 -#define S_TCB_RQ_MAX_OFFSET 7 -#define M_TCB_RQ_MAX_OFFSET 0xfULL -#define V_TCB_RQ_MAX_OFFSET(x) ((x) << S_TCB_RQ_MAX_OFFSET) - -#define W_TCB_RQ_WRITE_PTR 31 -#define S_TCB_RQ_WRITE_PTR 11 -#define M_TCB_RQ_WRITE_PTR 0x3ffULL -#define V_TCB_RQ_WRITE_PTR(x) ((x) << S_TCB_RQ_WRITE_PTR) - -#define W_TCB_INB_WRITE_PERM 31 -#define S_TCB_INB_WRITE_PERM 21 -#define M_TCB_INB_WRITE_PERM 0x1ULL -#define V_TCB_INB_WRITE_PERM(x) ((x) << S_TCB_INB_WRITE_PERM) - -#define W_TCB_INB_READ_PERM 31 -#define S_TCB_INB_READ_PERM 22 -#define M_TCB_INB_READ_PERM 0x1ULL -#define V_TCB_INB_READ_PERM(x) ((x) << S_TCB_INB_READ_PERM) - -#define W_TCB_ORD_L_BIT_VLD 31 -#define S_TCB_ORD_L_BIT_VLD 23 -#define M_TCB_ORD_L_BIT_VLD 0x1ULL -#define V_TCB_ORD_L_BIT_VLD(x) ((x) << S_TCB_ORD_L_BIT_VLD) - -#define W_TCB_RDMAP_OPCODE 31 -#define S_TCB_RDMAP_OPCODE 24 -#define M_TCB_RDMAP_OPCODE 0xfULL -#define V_TCB_RDMAP_OPCODE(x) ((x) << S_TCB_RDMAP_OPCODE) - -#define W_TCB_TX_FLUSH 31 -#define S_TCB_TX_FLUSH 28 -#define M_TCB_TX_FLUSH 0x1ULL -#define V_TCB_TX_FLUSH(x) ((x) << S_TCB_TX_FLUSH) - -#define W_TCB_TX_OOS_RXMT 31 -#define S_TCB_TX_OOS_RXMT 29 -#define M_TCB_TX_OOS_RXMT 0x1ULL -#define V_TCB_TX_OOS_RXMT(x) ((x) << S_TCB_TX_OOS_RXMT) - -#define W_TCB_TX_OOS_TXMT 31 -#define S_TCB_TX_OOS_TXMT 30 -#define M_TCB_TX_OOS_TXMT 0x1ULL -#define V_TCB_TX_OOS_TXMT(x) ((x) << S_TCB_TX_OOS_TXMT) - -#define W_TCB_SLUSH_AUX2 31 -#define S_TCB_SLUSH_AUX2 31 -#define M_TCB_SLUSH_AUX2 0x1ULL -#define V_TCB_SLUSH_AUX2(x) ((x) << S_TCB_SLUSH_AUX2) - -#define W_TCB_RX_FRAG1_PTR_RAW2 25 -#define S_TCB_RX_FRAG1_PTR_RAW2 30 -#define M_TCB_RX_FRAG1_PTR_RAW2 0x1ffffULL -#define V_TCB_RX_FRAG1_PTR_RAW2(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW2) - -#define W_TCB_RX_DDP_FLAGS 26 -#define S_TCB_RX_DDP_FLAGS 15 -#define M_TCB_RX_DDP_FLAGS 0x3ffULL -#define V_TCB_RX_DDP_FLAGS(x) ((x) << S_TCB_RX_DDP_FLAGS) - -#define W_TCB_SLUSH_AUX3 26 -#define S_TCB_SLUSH_AUX3 31 -#define M_TCB_SLUSH_AUX3 0x1ffULL -#define V_TCB_SLUSH_AUX3(x) ((x) << S_TCB_SLUSH_AUX3) - -#define W_TCB_RX_DDP_BUF0_OFFSET 27 -#define S_TCB_RX_DDP_BUF0_OFFSET 8 -#define M_TCB_RX_DDP_BUF0_OFFSET 0x3fffffULL -#define V_TCB_RX_DDP_BUF0_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF0_OFFSET) - -#define W_TCB_RX_DDP_BUF0_LEN 27 -#define S_TCB_RX_DDP_BUF0_LEN 30 -#define M_TCB_RX_DDP_BUF0_LEN 0x3fffffULL -#define V_TCB_RX_DDP_BUF0_LEN(x) ((x) << S_TCB_RX_DDP_BUF0_LEN) - -#define W_TCB_RX_DDP_BUF1_OFFSET 28 -#define S_TCB_RX_DDP_BUF1_OFFSET 20 -#define M_TCB_RX_DDP_BUF1_OFFSET 0x3fffffULL -#define V_TCB_RX_DDP_BUF1_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF1_OFFSET) - -#define W_TCB_RX_DDP_BUF1_LEN 29 -#define S_TCB_RX_DDP_BUF1_LEN 10 -#define M_TCB_RX_DDP_BUF1_LEN 0x3fffffULL -#define V_TCB_RX_DDP_BUF1_LEN(x) ((x) << S_TCB_RX_DDP_BUF1_LEN) - -#define W_TCB_RX_DDP_BUF0_TAG 30 -#define S_TCB_RX_DDP_BUF0_TAG 0 -#define M_TCB_RX_DDP_BUF0_TAG 0xffffffffULL -#define V_TCB_RX_DDP_BUF0_TAG(x) ((x) << S_TCB_RX_DDP_BUF0_TAG) - -#define W_TCB_RX_DDP_BUF1_TAG 31 -#define S_TCB_RX_DDP_BUF1_TAG 0 -#define M_TCB_RX_DDP_BUF1_TAG 0xffffffffULL -#define V_TCB_RX_DDP_BUF1_TAG(x) ((x) << S_TCB_RX_DDP_BUF1_TAG) - -#define S_TF_DACK 10 -#define V_TF_DACK(x) ((x) << S_TF_DACK) - -#define S_TF_NAGLE 11 -#define V_TF_NAGLE(x) ((x) << S_TF_NAGLE) - -#define S_TF_RECV_SCALE 12 -#define V_TF_RECV_SCALE(x) ((x) << S_TF_RECV_SCALE) - -#define S_TF_RECV_TSTMP 13 -#define V_TF_RECV_TSTMP(x) ((x) << S_TF_RECV_TSTMP) - -#define S_TF_RECV_SACK 14 -#define V_TF_RECV_SACK(x) ((x) << S_TF_RECV_SACK) - -#define S_TF_TURBO 15 -#define V_TF_TURBO(x) ((x) << S_TF_TURBO) - -#define S_TF_KEEPALIVE 16 -#define V_TF_KEEPALIVE(x) ((x) << S_TF_KEEPALIVE) - -#define S_TF_TCAM_BYPASS 17 -#define V_TF_TCAM_BYPASS(x) ((x) << S_TF_TCAM_BYPASS) - -#define S_TF_CORE_FIN 18 -#define V_TF_CORE_FIN(x) ((x) << S_TF_CORE_FIN) - -#define S_TF_CORE_MORE 19 -#define V_TF_CORE_MORE(x) ((x) << S_TF_CORE_MORE) - -#define S_TF_MIGRATING 20 -#define V_TF_MIGRATING(x) ((x) << S_TF_MIGRATING) - -#define S_TF_ACTIVE_OPEN 21 -#define V_TF_ACTIVE_OPEN(x) ((x) << S_TF_ACTIVE_OPEN) - -#define S_TF_ASK_MODE 22 -#define V_TF_ASK_MODE(x) ((x) << S_TF_ASK_MODE) - -#define S_TF_NON_OFFLOAD 23 -#define V_TF_NON_OFFLOAD(x) ((x) << S_TF_NON_OFFLOAD) - -#define S_TF_MOD_SCHD 24 -#define V_TF_MOD_SCHD(x) ((x) << S_TF_MOD_SCHD) - -#define S_TF_MOD_SCHD_REASON0 25 -#define V_TF_MOD_SCHD_REASON0(x) ((x) << S_TF_MOD_SCHD_REASON0) - -#define S_TF_MOD_SCHD_REASON1 26 -#define V_TF_MOD_SCHD_REASON1(x) ((x) << S_TF_MOD_SCHD_REASON1) - -#define S_TF_MOD_SCHD_RX 27 -#define V_TF_MOD_SCHD_RX(x) ((x) << S_TF_MOD_SCHD_RX) - -#define S_TF_CORE_PUSH 28 -#define V_TF_CORE_PUSH(x) ((x) << S_TF_CORE_PUSH) - -#define S_TF_RCV_COALESCE_ENABLE 29 -#define V_TF_RCV_COALESCE_ENABLE(x) ((x) << S_TF_RCV_COALESCE_ENABLE) - -#define S_TF_RCV_COALESCE_PUSH 30 -#define V_TF_RCV_COALESCE_PUSH(x) ((x) << S_TF_RCV_COALESCE_PUSH) - -#define S_TF_RCV_COALESCE_LAST_PSH 31 -#define V_TF_RCV_COALESCE_LAST_PSH(x) ((x) << S_TF_RCV_COALESCE_LAST_PSH) - -#define S_TF_RCV_COALESCE_HEARTBEAT 32 -#define V_TF_RCV_COALESCE_HEARTBEAT(x) ((x) << S_TF_RCV_COALESCE_HEARTBEAT) - -#define S_TF_HALF_CLOSE 33 -#define V_TF_HALF_CLOSE(x) ((x) << S_TF_HALF_CLOSE) - -#define S_TF_DACK_MSS 34 -#define V_TF_DACK_MSS(x) ((x) << S_TF_DACK_MSS) - -#define S_TF_CCTRL_SEL0 35 -#define V_TF_CCTRL_SEL0(x) ((x) << S_TF_CCTRL_SEL0) - -#define S_TF_CCTRL_SEL1 36 -#define V_TF_CCTRL_SEL1(x) ((x) << S_TF_CCTRL_SEL1) - -#define S_TF_TCP_NEWRENO_FAST_RECOVERY 37 -#define V_TF_TCP_NEWRENO_FAST_RECOVERY(x) ((x) << S_TF_TCP_NEWRENO_FAST_RECOVERY) - -#define S_TF_TX_PACE_AUTO 38 -#define V_TF_TX_PACE_AUTO(x) ((x) << S_TF_TX_PACE_AUTO) - -#define S_TF_PEER_FIN_HELD 39 -#define V_TF_PEER_FIN_HELD(x) ((x) << S_TF_PEER_FIN_HELD) - -#define S_TF_CORE_URG 40 -#define V_TF_CORE_URG(x) ((x) << S_TF_CORE_URG) - -#define S_TF_RDMA_ERROR 41 -#define V_TF_RDMA_ERROR(x) ((x) << S_TF_RDMA_ERROR) - -#define S_TF_SSWS_DISABLED 42 -#define V_TF_SSWS_DISABLED(x) ((x) << S_TF_SSWS_DISABLED) - -#define S_TF_DUPACK_COUNT_ODD 43 -#define V_TF_DUPACK_COUNT_ODD(x) ((x) << S_TF_DUPACK_COUNT_ODD) - -#define S_TF_TX_CHANNEL 44 -#define V_TF_TX_CHANNEL(x) ((x) << S_TF_TX_CHANNEL) - -#define S_TF_RX_CHANNEL 45 -#define V_TF_RX_CHANNEL(x) ((x) << S_TF_RX_CHANNEL) - -#define S_TF_TX_PACE_FIXED 46 -#define V_TF_TX_PACE_FIXED(x) ((x) << S_TF_TX_PACE_FIXED) - -#define S_TF_RDMA_FLM_ERROR 47 -#define V_TF_RDMA_FLM_ERROR(x) ((x) << S_TF_RDMA_FLM_ERROR) - -#define S_TF_RX_FLOW_CONTROL_DISABLE 48 -#define V_TF_RX_FLOW_CONTROL_DISABLE(x) ((x) << S_TF_RX_FLOW_CONTROL_DISABLE) - -#endif /* _TCB_DEFS_H */ diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 347dc242fb88..ee1182f9b627 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3379,7 +3379,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) { err = pick_local_ipaddrs(dev, cm_id); if (err) - goto fail2; + goto fail3; } /* find a route */ @@ -3401,7 +3401,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) { err = pick_local_ip6addrs(dev, cm_id); if (err) - goto fail2; + goto fail3; } /* find a route */ diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 35c284af574d..fe3a7e8561df 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -543,7 +543,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mhp->rhp = rhp; - mhp->umem = ib_umem_get(udata, start, length, acc, 0); + mhp->umem = ib_umem_get(udata, start, length, acc); if (IS_ERR(mhp->umem)) goto err_free_skb; diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index d373ac0fe2cb..ba83d942997c 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -305,7 +305,10 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro static int c4iw_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { + int ret = 0; pr_debug("ibdev %p\n", ibdev); + ret = ib_get_eth_speed(ibdev, port, &props->active_speed, + &props->active_width); props->port_cap_flags = IB_PORT_CM_SUP | @@ -315,11 +318,9 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port, IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; props->gid_tbl_len = 1; props->pkey_tbl_len = 1; - props->active_width = 2; - props->active_speed = IB_SPEED_DDR; props->max_msg_sz = -1; - return 0; + return ret; } static ssize_t hw_rev_show(struct device *dev, diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h index 2283e432693e..aa7396a1588a 100644 --- a/drivers/infiniband/hw/efa/efa.h +++ b/drivers/infiniband/hw/efa/efa.h @@ -60,8 +60,6 @@ struct efa_dev { u64 mem_bar_len; u64 db_bar_addr; u64 db_bar_len; - u8 addr[EFA_GID_SIZE]; - u32 mtu; int admin_msix_vector_idx; struct efa_irq admin_irq; @@ -71,8 +69,6 @@ struct efa_dev { struct efa_ucontext { struct ib_ucontext ibucontext; - struct xarray mmap_xa; - u32 mmap_xa_page; u16 uarn; }; @@ -91,6 +87,7 @@ struct efa_cq { struct efa_ucontext *ucontext; dma_addr_t dma_addr; void *cpu_addr; + struct rdma_user_mmap_entry *mmap_entry; size_t size; u16 cq_idx; }; @@ -101,6 +98,13 @@ struct efa_qp { void *rq_cpu_addr; size_t rq_size; enum ib_qp_state state; + + /* Used for saving mmap_xa entries */ + struct rdma_user_mmap_entry *sq_db_mmap_entry; + struct rdma_user_mmap_entry *llq_desc_mmap_entry; + struct rdma_user_mmap_entry *rq_db_mmap_entry; + struct rdma_user_mmap_entry *rq_mmap_entry; + u32 qp_handle; u32 max_send_wr; u32 max_recv_wr; @@ -147,6 +151,7 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata); void efa_dealloc_ucontext(struct ib_ucontext *ibucontext); int efa_mmap(struct ib_ucontext *ibucontext, struct vm_area_struct *vma); +void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry); int efa_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, u32 flags, diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index 2be0469d545f..e96bcb16bd2b 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -362,9 +362,13 @@ struct efa_admin_reg_mr_cmd { /* * permissions - * 0 : local_write_enable - Write permissions: value - * of 1 needed for RQ buffers and for RDMA write - * 7:1 : reserved1 - remote access flags, etc + * 0 : local_write_enable - Local write permissions: + * must be set for RQ buffers and buffers posted for + * RDMA Read requests + * 1 : reserved1 - MBZ + * 2 : remote_read_enable - Remote read permissions: + * must be set to enable RDMA read from the region + * 7:3 : reserved2 - MBZ */ u8 permissions; @@ -558,6 +562,16 @@ struct efa_admin_feature_device_attr_desc { /* Indicates how many bits are used virtual address access */ u8 virt_addr_width; + + /* + * 0 : rdma_read - If set, RDMA Read is supported on + * TX queues + * 31:1 : reserved - MBZ + */ + u32 device_caps; + + /* Max RDMA transfer size in bytes */ + u32 max_rdma_size; }; struct efa_admin_feature_queue_attr_desc { @@ -604,6 +618,9 @@ struct efa_admin_feature_queue_attr_desc { /* The maximum size of LLQ in bytes */ u32 max_llq_size; + + /* Maximum number of SGEs for a single RDMA read WQE */ + u16 max_wr_rdma_sges; }; struct efa_admin_feature_aenq_desc { @@ -618,6 +635,7 @@ struct efa_admin_feature_network_attr_desc { /* Raw address data in network byte order */ u8 addr[16]; + /* max packet payload size in bytes */ u32 mtu; }; @@ -780,6 +798,8 @@ struct efa_admin_mmio_req_read_less_resp { #define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_SHIFT 7 #define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_MASK BIT(7) #define EFA_ADMIN_REG_MR_CMD_LOCAL_WRITE_ENABLE_MASK BIT(0) +#define EFA_ADMIN_REG_MR_CMD_REMOTE_READ_ENABLE_SHIFT 2 +#define EFA_ADMIN_REG_MR_CMD_REMOTE_READ_ENABLE_MASK BIT(2) /* create_cq_cmd */ #define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_SHIFT 5 @@ -791,4 +811,7 @@ struct efa_admin_mmio_req_read_less_resp { /* get_set_feature_common_desc */ #define EFA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0) +/* feature_device_attr_desc */ +#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK BIT(0) + #endif /* _EFA_ADMIN_CMDS_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c index 3c412bc5b94f..0778f4f7dccd 100644 --- a/drivers/infiniband/hw/efa/efa_com.c +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -317,6 +317,7 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu struct efa_admin_acq_entry *comp, size_t comp_size_in_bytes) { + struct efa_admin_aq_entry *aqe; struct efa_comp_ctx *comp_ctx; u16 queue_size_mask; u16 cmd_id; @@ -350,7 +351,9 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu reinit_completion(&comp_ctx->wait_event); - memcpy(&aq->sq.entries[pi], cmd, cmd_size_in_bytes); + aqe = &aq->sq.entries[pi]; + memset(aqe, 0, sizeof(*aqe)); + memcpy(aqe, cmd, cmd_size_in_bytes); aq->sq.pc++; atomic64_inc(&aq->stats.submitted_cmd); diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index c079f1332082..e20bd84a1014 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -230,8 +230,7 @@ int efa_com_register_mr(struct efa_com_dev *edev, mr_cmd.flags |= params->page_shift & EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK; mr_cmd.iova = params->iova; - mr_cmd.permissions |= params->permissions & - EFA_ADMIN_REG_MR_CMD_LOCAL_WRITE_ENABLE_MASK; + mr_cmd.permissions = params->permissions; if (params->inline_pbl) { memcpy(mr_cmd.pbl.inline_pbl_array, @@ -423,28 +422,6 @@ static int efa_com_get_feature(struct efa_com_dev *edev, return efa_com_get_feature_ex(edev, get_resp, feature_id, 0, 0); } -int efa_com_get_network_attr(struct efa_com_dev *edev, - struct efa_com_get_network_attr_result *result) -{ - struct efa_admin_get_feature_resp resp; - int err; - - err = efa_com_get_feature(edev, &resp, - EFA_ADMIN_NETWORK_ATTR); - if (err) { - ibdev_err_ratelimited(edev->efa_dev, - "Failed to get network attributes %d\n", - err); - return err; - } - - memcpy(result->addr, resp.u.network_attr.addr, - sizeof(resp.u.network_attr.addr)); - result->mtu = resp.u.network_attr.mtu; - - return 0; -} - int efa_com_get_device_attr(struct efa_com_dev *edev, struct efa_com_get_device_attr_result *result) { @@ -467,6 +444,8 @@ int efa_com_get_device_attr(struct efa_com_dev *edev, result->phys_addr_width = resp.u.device_attr.phys_addr_width; result->virt_addr_width = resp.u.device_attr.virt_addr_width; result->db_bar = resp.u.device_attr.db_bar; + result->max_rdma_size = resp.u.device_attr.max_rdma_size; + result->device_caps = resp.u.device_attr.device_caps; if (result->admin_api_version < 1) { ibdev_err_ratelimited( @@ -500,6 +479,19 @@ int efa_com_get_device_attr(struct efa_com_dev *edev, result->max_ah = resp.u.queue_attr.max_ah; result->max_llq_size = resp.u.queue_attr.max_llq_size; result->sub_cqs_per_cq = resp.u.queue_attr.sub_cqs_per_cq; + result->max_wr_rdma_sge = resp.u.queue_attr.max_wr_rdma_sges; + + err = efa_com_get_feature(edev, &resp, EFA_ADMIN_NETWORK_ATTR); + if (err) { + ibdev_err_ratelimited(edev->efa_dev, + "Failed to get network attributes %d\n", + err); + return err; + } + + memcpy(result->addr, resp.u.network_attr.addr, + sizeof(resp.u.network_attr.addr)); + result->mtu = resp.u.network_attr.mtu; return 0; } diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index 7f6c13052f49..31db5a0cbd5b 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -100,14 +100,11 @@ struct efa_com_destroy_ah_params { u16 pdn; }; -struct efa_com_get_network_attr_result { - u8 addr[EFA_GID_SIZE]; - u32 mtu; -}; - struct efa_com_get_device_attr_result { + u8 addr[EFA_GID_SIZE]; u64 page_size_cap; u64 max_mr_pages; + u32 mtu; u32 fw_version; u32 admin_api_version; u32 device_version; @@ -124,9 +121,12 @@ struct efa_com_get_device_attr_result { u32 max_pd; u32 max_ah; u32 max_llq_size; + u32 max_rdma_size; + u32 device_caps; u16 sub_cqs_per_cq; u16 max_sq_sge; u16 max_rq_sge; + u16 max_wr_rdma_sge; u8 db_bar; }; @@ -181,12 +181,7 @@ struct efa_com_reg_mr_params { * address mapping */ u8 page_shift; - /* - * permissions - * 0: local_write_enable - Write permissions: value of 1 needed - * for RQ buffers and for RDMA write:1: reserved1 - remote - * access flags, etc - */ + /* see permissions field of struct efa_admin_reg_mr_cmd */ u8 permissions; u8 inline_pbl; u8 indirect; @@ -271,8 +266,6 @@ int efa_com_create_ah(struct efa_com_dev *edev, struct efa_com_create_ah_result *result); int efa_com_destroy_ah(struct efa_com_dev *edev, struct efa_com_destroy_ah_params *params); -int efa_com_get_network_attr(struct efa_com_dev *edev, - struct efa_com_get_network_attr_result *result); int efa_com_get_device_attr(struct efa_com_dev *edev, struct efa_com_get_device_attr_result *result); int efa_com_get_hw_hints(struct efa_com_dev *edev, diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 83858f7e83d0..faf3ff1bca2a 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -30,15 +30,6 @@ MODULE_DEVICE_TABLE(pci, efa_pci_tbl); (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \ BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE)) -static void efa_update_network_attr(struct efa_dev *dev, - struct efa_com_get_network_attr_result *network_attr) -{ - memcpy(dev->addr, network_attr->addr, sizeof(network_attr->addr)); - dev->mtu = network_attr->mtu; - - dev_dbg(&dev->pdev->dev, "Full address %pI6\n", dev->addr); -} - /* This handler will called for unknown event group or unimplemented handlers */ static void unimplemented_aenq_handler(void *data, struct efa_admin_aenq_entry *aenq_e) @@ -217,6 +208,7 @@ static const struct ib_device_ops efa_dev_ops = { .get_link_layer = efa_port_link_layer, .get_port_immutable = efa_get_port_immutable, .mmap = efa_mmap, + .mmap_free = efa_mmap_free, .modify_qp = efa_modify_qp, .query_device = efa_query_device, .query_gid = efa_query_gid, @@ -233,7 +225,6 @@ static const struct ib_device_ops efa_dev_ops = { static int efa_ib_device_add(struct efa_dev *dev) { - struct efa_com_get_network_attr_result network_attr; struct efa_com_get_hw_hints_result hw_hints; struct pci_dev *pdev = dev->pdev; int err; @@ -249,12 +240,6 @@ static int efa_ib_device_add(struct efa_dev *dev) if (err) return err; - err = efa_com_get_network_attr(&dev->edev, &network_attr); - if (err) - goto err_release_doorbell_bar; - - efa_update_network_attr(dev, &network_attr); - err = efa_com_get_hw_hints(&dev->edev, &hw_hints); if (err) goto err_release_doorbell_bar; diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 4edae89e8e3c..c9d294caa27a 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -13,10 +13,6 @@ #include "efa.h" -#define EFA_MMAP_FLAG_SHIFT 56 -#define EFA_MMAP_PAGE_MASK GENMASK(EFA_MMAP_FLAG_SHIFT - 1, 0) -#define EFA_MMAP_INVALID U64_MAX - enum { EFA_MMAP_DMA_PAGE = 0, EFA_MMAP_IO_WC, @@ -27,20 +23,12 @@ enum { (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \ BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE)) -struct efa_mmap_entry { - void *obj; +struct efa_user_mmap_entry { + struct rdma_user_mmap_entry rdma_entry; u64 address; - u64 length; - u32 mmap_page; u8 mmap_flag; }; -static inline u64 get_mmap_key(const struct efa_mmap_entry *efa) -{ - return ((u64)efa->mmap_flag << EFA_MMAP_FLAG_SHIFT) | - ((u64)efa->mmap_page << PAGE_SHIFT); -} - #define EFA_DEFINE_STATS(op) \ op(EFA_TX_BYTES, "tx_bytes") \ op(EFA_TX_PKTS, "tx_pkts") \ @@ -82,8 +70,6 @@ static const char *const efa_stats_names[] = { #define EFA_CHUNK_USED_SIZE \ ((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE) -#define EFA_SUPPORTED_ACCESS_FLAGS IB_ACCESS_LOCAL_WRITE - struct pbl_chunk { dma_addr_t dma_addr; u64 *buf; @@ -147,6 +133,17 @@ static inline struct efa_ah *to_eah(struct ib_ah *ibah) return container_of(ibah, struct efa_ah, ibah); } +static inline struct efa_user_mmap_entry * +to_emmap(struct rdma_user_mmap_entry *rdma_entry) +{ + return container_of(rdma_entry, struct efa_user_mmap_entry, rdma_entry); +} + +static inline bool is_rdma_read_cap(struct efa_dev *dev) +{ + return dev->dev_attr.device_caps & EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK; +} + #define field_avail(x, fld, sz) (offsetof(typeof(x), fld) + \ FIELD_SIZEOF(typeof(x), fld) <= (sz)) @@ -172,106 +169,6 @@ static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr, return addr; } -/* - * This is only called when the ucontext is destroyed and there can be no - * concurrent query via mmap or allocate on the xarray, thus we can be sure no - * other thread is using the entry pointer. We also know that all the BAR - * pages have either been zap'd or munmaped at this point. Normal pages are - * refcounted and will be freed at the proper time. - */ -static void mmap_entries_remove_free(struct efa_dev *dev, - struct efa_ucontext *ucontext) -{ - struct efa_mmap_entry *entry; - unsigned long mmap_page; - - xa_for_each(&ucontext->mmap_xa, mmap_page, entry) { - xa_erase(&ucontext->mmap_xa, mmap_page); - - ibdev_dbg( - &dev->ibdev, - "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n", - entry->obj, get_mmap_key(entry), entry->address, - entry->length); - if (entry->mmap_flag == EFA_MMAP_DMA_PAGE) - /* DMA mapping is already gone, now free the pages */ - free_pages_exact(phys_to_virt(entry->address), - entry->length); - kfree(entry); - } -} - -static struct efa_mmap_entry *mmap_entry_get(struct efa_dev *dev, - struct efa_ucontext *ucontext, - u64 key, u64 len) -{ - struct efa_mmap_entry *entry; - u64 mmap_page; - - mmap_page = (key & EFA_MMAP_PAGE_MASK) >> PAGE_SHIFT; - if (mmap_page > U32_MAX) - return NULL; - - entry = xa_load(&ucontext->mmap_xa, mmap_page); - if (!entry || get_mmap_key(entry) != key || entry->length != len) - return NULL; - - ibdev_dbg(&dev->ibdev, - "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n", - entry->obj, key, entry->address, entry->length); - - return entry; -} - -/* - * Note this locking scheme cannot support removal of entries, except during - * ucontext destruction when the core code guarentees no concurrency. - */ -static u64 mmap_entry_insert(struct efa_dev *dev, struct efa_ucontext *ucontext, - void *obj, u64 address, u64 length, u8 mmap_flag) -{ - struct efa_mmap_entry *entry; - u32 next_mmap_page; - int err; - - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - return EFA_MMAP_INVALID; - - entry->obj = obj; - entry->address = address; - entry->length = length; - entry->mmap_flag = mmap_flag; - - xa_lock(&ucontext->mmap_xa); - if (check_add_overflow(ucontext->mmap_xa_page, - (u32)(length >> PAGE_SHIFT), - &next_mmap_page)) - goto err_unlock; - - entry->mmap_page = ucontext->mmap_xa_page; - ucontext->mmap_xa_page = next_mmap_page; - err = __xa_insert(&ucontext->mmap_xa, entry->mmap_page, entry, - GFP_KERNEL); - if (err) - goto err_unlock; - - xa_unlock(&ucontext->mmap_xa); - - ibdev_dbg( - &dev->ibdev, - "mmap: obj[0x%p] addr[%#llx], len[%#llx], key[%#llx] inserted\n", - entry->obj, entry->address, entry->length, get_mmap_key(entry)); - - return get_mmap_key(entry); - -err_unlock: - xa_unlock(&ucontext->mmap_xa); - kfree(entry); - return EFA_MMAP_INVALID; - -} - int efa_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *udata) @@ -306,12 +203,17 @@ int efa_query_device(struct ib_device *ibdev, dev_attr->max_rq_depth); props->max_send_sge = dev_attr->max_sq_sge; props->max_recv_sge = dev_attr->max_rq_sge; + props->max_sge_rd = dev_attr->max_wr_rdma_sge; if (udata && udata->outlen) { resp.max_sq_sge = dev_attr->max_sq_sge; resp.max_rq_sge = dev_attr->max_rq_sge; resp.max_sq_wr = dev_attr->max_sq_depth; resp.max_rq_wr = dev_attr->max_rq_depth; + resp.max_rdma_size = dev_attr->max_rdma_size; + + if (is_rdma_read_cap(dev)) + resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ; err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); @@ -338,9 +240,9 @@ int efa_query_port(struct ib_device *ibdev, u8 port, props->pkey_tbl_len = 1; props->active_speed = IB_SPEED_EDR; props->active_width = IB_WIDTH_4X; - props->max_mtu = ib_mtu_int_to_enum(dev->mtu); - props->active_mtu = ib_mtu_int_to_enum(dev->mtu); - props->max_msg_sz = dev->mtu; + props->max_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu); + props->active_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu); + props->max_msg_sz = dev->dev_attr.mtu; props->max_vl_num = 1; return 0; @@ -401,7 +303,7 @@ int efa_query_gid(struct ib_device *ibdev, u8 port, int index, { struct efa_dev *dev = to_edev(ibdev); - memcpy(gid->raw, dev->addr, sizeof(dev->addr)); + memcpy(gid->raw, dev->dev_attr.addr, sizeof(dev->dev_attr.addr)); return 0; } @@ -485,8 +387,19 @@ static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle) return efa_com_destroy_qp(&dev->edev, ¶ms); } +static void efa_qp_user_mmap_entries_remove(struct efa_ucontext *uctx, + struct efa_qp *qp) +{ + rdma_user_mmap_entry_remove(qp->rq_mmap_entry); + rdma_user_mmap_entry_remove(qp->rq_db_mmap_entry); + rdma_user_mmap_entry_remove(qp->llq_desc_mmap_entry); + rdma_user_mmap_entry_remove(qp->sq_db_mmap_entry); +} + int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { + struct efa_ucontext *ucontext = rdma_udata_to_drv_context(udata, + struct efa_ucontext, ibucontext); struct efa_dev *dev = to_edev(ibqp->pd->device); struct efa_qp *qp = to_eqp(ibqp); int err; @@ -505,61 +418,101 @@ int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) DMA_TO_DEVICE); } + efa_qp_user_mmap_entries_remove(ucontext, qp); kfree(qp); return 0; } +static struct rdma_user_mmap_entry* +efa_user_mmap_entry_insert(struct ib_ucontext *ucontext, + u64 address, size_t length, + u8 mmap_flag, u64 *offset) +{ + struct efa_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); + int err; + + if (!entry) + return NULL; + + entry->address = address; + entry->mmap_flag = mmap_flag; + + err = rdma_user_mmap_entry_insert(ucontext, &entry->rdma_entry, + length); + if (err) { + kfree(entry); + return NULL; + } + *offset = rdma_user_mmap_get_offset(&entry->rdma_entry); + + return &entry->rdma_entry; +} + static int qp_mmap_entries_setup(struct efa_qp *qp, struct efa_dev *dev, struct efa_ucontext *ucontext, struct efa_com_create_qp_params *params, struct efa_ibv_create_qp_resp *resp) { - /* - * Once an entry is inserted it might be mmapped, hence cannot be - * cleaned up until dealloc_ucontext. - */ - resp->sq_db_mmap_key = - mmap_entry_insert(dev, ucontext, qp, - dev->db_bar_addr + resp->sq_db_offset, - PAGE_SIZE, EFA_MMAP_IO_NC); - if (resp->sq_db_mmap_key == EFA_MMAP_INVALID) + size_t length; + u64 address; + + address = dev->db_bar_addr + resp->sq_db_offset; + qp->sq_db_mmap_entry = + efa_user_mmap_entry_insert(&ucontext->ibucontext, + address, + PAGE_SIZE, EFA_MMAP_IO_NC, + &resp->sq_db_mmap_key); + if (!qp->sq_db_mmap_entry) return -ENOMEM; resp->sq_db_offset &= ~PAGE_MASK; - resp->llq_desc_mmap_key = - mmap_entry_insert(dev, ucontext, qp, - dev->mem_bar_addr + resp->llq_desc_offset, - PAGE_ALIGN(params->sq_ring_size_in_bytes + - (resp->llq_desc_offset & ~PAGE_MASK)), - EFA_MMAP_IO_WC); - if (resp->llq_desc_mmap_key == EFA_MMAP_INVALID) - return -ENOMEM; + address = dev->mem_bar_addr + resp->llq_desc_offset; + length = PAGE_ALIGN(params->sq_ring_size_in_bytes + + (resp->llq_desc_offset & ~PAGE_MASK)); + + qp->llq_desc_mmap_entry = + efa_user_mmap_entry_insert(&ucontext->ibucontext, + address, length, + EFA_MMAP_IO_WC, + &resp->llq_desc_mmap_key); + if (!qp->llq_desc_mmap_entry) + goto err_remove_mmap; resp->llq_desc_offset &= ~PAGE_MASK; if (qp->rq_size) { - resp->rq_db_mmap_key = - mmap_entry_insert(dev, ucontext, qp, - dev->db_bar_addr + resp->rq_db_offset, - PAGE_SIZE, EFA_MMAP_IO_NC); - if (resp->rq_db_mmap_key == EFA_MMAP_INVALID) - return -ENOMEM; + address = dev->db_bar_addr + resp->rq_db_offset; + + qp->rq_db_mmap_entry = + efa_user_mmap_entry_insert(&ucontext->ibucontext, + address, PAGE_SIZE, + EFA_MMAP_IO_NC, + &resp->rq_db_mmap_key); + if (!qp->rq_db_mmap_entry) + goto err_remove_mmap; resp->rq_db_offset &= ~PAGE_MASK; - resp->rq_mmap_key = - mmap_entry_insert(dev, ucontext, qp, - virt_to_phys(qp->rq_cpu_addr), - qp->rq_size, EFA_MMAP_DMA_PAGE); - if (resp->rq_mmap_key == EFA_MMAP_INVALID) - return -ENOMEM; + address = virt_to_phys(qp->rq_cpu_addr); + qp->rq_mmap_entry = + efa_user_mmap_entry_insert(&ucontext->ibucontext, + address, qp->rq_size, + EFA_MMAP_DMA_PAGE, + &resp->rq_mmap_key); + if (!qp->rq_mmap_entry) + goto err_remove_mmap; resp->rq_mmap_size = qp->rq_size; } return 0; + +err_remove_mmap: + efa_qp_user_mmap_entries_remove(ucontext, qp); + + return -ENOMEM; } static int efa_qp_validate_cap(struct efa_dev *dev, @@ -634,7 +587,6 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd, struct efa_dev *dev = to_edev(ibpd->device); struct efa_ibv_create_qp_resp resp = {}; struct efa_ibv_create_qp cmd = {}; - bool rq_entry_inserted = false; struct efa_ucontext *ucontext; struct efa_qp *qp; int err; @@ -742,7 +694,6 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd, if (err) goto err_destroy_qp; - rq_entry_inserted = true; qp->qp_handle = create_qp_resp.qp_handle; qp->ibqp.qp_num = create_qp_resp.qp_num; qp->ibqp.qp_type = init_attr->qp_type; @@ -759,7 +710,7 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd, ibdev_dbg(&dev->ibdev, "Failed to copy udata for qp[%u]\n", create_qp_resp.qp_num); - goto err_destroy_qp; + goto err_remove_mmap_entries; } } @@ -767,13 +718,16 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd, return &qp->ibqp; +err_remove_mmap_entries: + efa_qp_user_mmap_entries_remove(ucontext, qp); err_destroy_qp: efa_destroy_qp_handle(dev, create_qp_resp.qp_handle); err_free_mapped: if (qp->rq_size) { dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size, DMA_TO_DEVICE); - if (!rq_entry_inserted) + + if (!qp->rq_mmap_entry) free_pages_exact(qp->rq_cpu_addr, qp->rq_size); } err_free_qp: @@ -897,16 +851,18 @@ void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) efa_destroy_cq_idx(dev, cq->cq_idx); dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size, DMA_FROM_DEVICE); + rdma_user_mmap_entry_remove(cq->mmap_entry); } static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, struct efa_ibv_create_cq_resp *resp) { resp->q_mmap_size = cq->size; - resp->q_mmap_key = mmap_entry_insert(dev, cq->ucontext, cq, - virt_to_phys(cq->cpu_addr), - cq->size, EFA_MMAP_DMA_PAGE); - if (resp->q_mmap_key == EFA_MMAP_INVALID) + cq->mmap_entry = efa_user_mmap_entry_insert(&cq->ucontext->ibucontext, + virt_to_phys(cq->cpu_addr), + cq->size, EFA_MMAP_DMA_PAGE, + &resp->q_mmap_key); + if (!cq->mmap_entry) return -ENOMEM; return 0; @@ -924,7 +880,6 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct efa_dev *dev = to_edev(ibdev); struct efa_ibv_create_cq cmd = {}; struct efa_cq *cq = to_ecq(ibcq); - bool cq_entry_inserted = false; int entries = attr->cqe; int err; @@ -1013,15 +968,13 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, goto err_destroy_cq; } - cq_entry_inserted = true; - if (udata->outlen) { err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); if (err) { ibdev_dbg(ibdev, "Failed to copy udata for create_cq\n"); - goto err_destroy_cq; + goto err_remove_mmap; } } @@ -1030,13 +983,16 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, return 0; +err_remove_mmap: + rdma_user_mmap_entry_remove(cq->mmap_entry); err_destroy_cq: efa_destroy_cq_idx(dev, cq->cq_idx); err_free_mapped: dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size, DMA_FROM_DEVICE); - if (!cq_entry_inserted) + if (!cq->mmap_entry) free_pages_exact(cq->cpu_addr, cq->size); + err_out: atomic64_inc(&dev->stats.sw_stats.create_cq_err); return err; @@ -1396,6 +1352,7 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, struct efa_com_reg_mr_params params = {}; struct efa_com_reg_mr_result result = {}; struct pbl_context pbl; + int supp_access_flags; unsigned int pg_sz; struct efa_mr *mr; int inline_size; @@ -1409,10 +1366,14 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, goto err_out; } - if (access_flags & ~EFA_SUPPORTED_ACCESS_FLAGS) { + supp_access_flags = + IB_ACCESS_LOCAL_WRITE | + (is_rdma_read_cap(dev) ? IB_ACCESS_REMOTE_READ : 0); + + if (access_flags & ~supp_access_flags) { ibdev_dbg(&dev->ibdev, "Unsupported access flags[%#x], supported[%#x]\n", - access_flags, EFA_SUPPORTED_ACCESS_FLAGS); + access_flags, supp_access_flags); err = -EOPNOTSUPP; goto err_out; } @@ -1423,7 +1384,7 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, goto err_out; } - mr->umem = ib_umem_get(udata, start, length, access_flags, 0); + mr->umem = ib_umem_get(udata, start, length, access_flags); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); ibdev_dbg(&dev->ibdev, @@ -1434,7 +1395,7 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, params.pd = to_epd(ibpd)->pdn; params.iova = virt_addr; params.mr_length_in_bytes = length; - params.permissions = access_flags & 0x1; + params.permissions = access_flags; pg_sz = ib_umem_find_best_pgsz(mr->umem, dev->dev_attr.page_size_cap, @@ -1556,7 +1517,6 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) goto err_out; ucontext->uarn = result.uarn; - xa_init(&ucontext->mmap_xa); resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE; resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH; @@ -1585,38 +1545,56 @@ void efa_dealloc_ucontext(struct ib_ucontext *ibucontext) struct efa_ucontext *ucontext = to_eucontext(ibucontext); struct efa_dev *dev = to_edev(ibucontext->device); - mmap_entries_remove_free(dev, ucontext); efa_dealloc_uar(dev, ucontext->uarn); } +void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry) +{ + struct efa_user_mmap_entry *entry = to_emmap(rdma_entry); + + /* DMA mapping is already gone, now free the pages */ + if (entry->mmap_flag == EFA_MMAP_DMA_PAGE) + free_pages_exact(phys_to_virt(entry->address), + entry->rdma_entry.npages * PAGE_SIZE); + kfree(entry); +} + static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, - struct vm_area_struct *vma, u64 key, u64 length) + struct vm_area_struct *vma) { - struct efa_mmap_entry *entry; + struct rdma_user_mmap_entry *rdma_entry; + struct efa_user_mmap_entry *entry; unsigned long va; + int err = 0; u64 pfn; - int err; - entry = mmap_entry_get(dev, ucontext, key, length); - if (!entry) { - ibdev_dbg(&dev->ibdev, "key[%#llx] does not have valid entry\n", - key); + rdma_entry = rdma_user_mmap_entry_get(&ucontext->ibucontext, vma); + if (!rdma_entry) { + ibdev_dbg(&dev->ibdev, + "pgoff[%#lx] does not have valid entry\n", + vma->vm_pgoff); return -EINVAL; } + entry = to_emmap(rdma_entry); ibdev_dbg(&dev->ibdev, - "Mapping address[%#llx], length[%#llx], mmap_flag[%d]\n", - entry->address, length, entry->mmap_flag); + "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n", + entry->address, rdma_entry->npages * PAGE_SIZE, + entry->mmap_flag); pfn = entry->address >> PAGE_SHIFT; switch (entry->mmap_flag) { case EFA_MMAP_IO_NC: - err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length, - pgprot_noncached(vma->vm_page_prot)); + err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, + entry->rdma_entry.npages * PAGE_SIZE, + pgprot_noncached(vma->vm_page_prot), + rdma_entry); break; case EFA_MMAP_IO_WC: - err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length, - pgprot_writecombine(vma->vm_page_prot)); + err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, + entry->rdma_entry.npages * PAGE_SIZE, + pgprot_writecombine(vma->vm_page_prot), + rdma_entry); break; case EFA_MMAP_DMA_PAGE: for (va = vma->vm_start; va < vma->vm_end; @@ -1633,12 +1611,13 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, if (err) { ibdev_dbg( &dev->ibdev, - "Couldn't mmap address[%#llx] length[%#llx] mmap_flag[%d] err[%d]\n", - entry->address, length, entry->mmap_flag, err); - return err; + "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n", + entry->address, rdma_entry->npages * PAGE_SIZE, + entry->mmap_flag, err); } - return 0; + rdma_user_mmap_entry_put(rdma_entry); + return err; } int efa_mmap(struct ib_ucontext *ibucontext, @@ -1646,26 +1625,13 @@ int efa_mmap(struct ib_ucontext *ibucontext, { struct efa_ucontext *ucontext = to_eucontext(ibucontext); struct efa_dev *dev = to_edev(ibucontext->device); - u64 length = vma->vm_end - vma->vm_start; - u64 key = vma->vm_pgoff << PAGE_SHIFT; + size_t length = vma->vm_end - vma->vm_start; ibdev_dbg(&dev->ibdev, - "start %#lx, end %#lx, length = %#llx, key = %#llx\n", - vma->vm_start, vma->vm_end, length, key); - - if (length % PAGE_SIZE != 0 || !(vma->vm_flags & VM_SHARED)) { - ibdev_dbg(&dev->ibdev, - "length[%#llx] is not page size aligned[%#lx] or VM_SHARED is not set [%#lx]\n", - length, PAGE_SIZE, vma->vm_flags); - return -EINVAL; - } - - if (vma->vm_flags & VM_EXEC) { - ibdev_dbg(&dev->ibdev, "Mapping executable pages is not permitted\n"); - return -EPERM; - } + "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n", + vma->vm_start, vma->vm_end, length, vma->vm_pgoff); - return __efa_mmap(dev, ucontext, vma, key, length); + return __efa_mmap(dev, ucontext, vma); } static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah) diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index d8ff063a5419..a51bcd2b4391 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -4915,16 +4915,11 @@ static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port, */ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, size_t in_mad_size, - struct ib_mad_hdr *out_mad, size_t *out_mad_size, - u16 *out_mad_pkey_index) + const struct ib_mad *in_mad, struct ib_mad *out_mad, + size_t *out_mad_size, u16 *out_mad_pkey_index) { - switch (in_mad->base_version) { + switch (in_mad->mad_hdr.base_version) { case OPA_MGMT_BASE_VERSION: - if (unlikely(in_mad_size != sizeof(struct opa_mad))) { - dev_err(ibdev->dev.parent, "invalid in_mad_size\n"); - return IB_MAD_RESULT_FAILURE; - } return hfi1_process_opa_mad(ibdev, mad_flags, port, in_wc, in_grh, (struct opa_mad *)in_mad, @@ -4932,10 +4927,8 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, out_mad_size, out_mad_pkey_index); case IB_MGMT_BASE_VERSION: - return hfi1_process_ib_mad(ibdev, mad_flags, port, - in_wc, in_grh, - (const struct ib_mad *)in_mad, - (struct ib_mad *)out_mad); + return hfi1_process_ib_mad(ibdev, mad_flags, port, in_wc, + in_grh, in_mad, out_mad); default: break; } diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index cbf7faa5038c..36593f2efe26 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -634,7 +634,7 @@ static void apply_tx_lanes(struct hfi1_pportdata *ppd, u8 field_id, u32 config_data, const char *message) { u8 i; - int ret = HCMD_SUCCESS; + int ret; for (i = 0; i < 4; i++) { ret = load_8051_config(ppd->dd, field_id, i, config_data); diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index ae9582ddbc8f..b0e9bf7cd150 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -330,9 +330,8 @@ void hfi1_sys_guid_chg(struct hfi1_ibport *ibp); void hfi1_node_desc_chg(struct hfi1_ibport *ibp); int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, size_t in_mad_size, - struct ib_mad_hdr *out_mad, size_t *out_mad_size, - u16 *out_mad_pkey_index); + const struct ib_mad *in_mad, struct ib_mad *out_mad, + size_t *out_mad_size, u16 *out_mad_pkey_index); /* * The PSN_MASK and PSN_SHIFT allow for diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig index d602b698b57e..4921c1e40ccd 100644 --- a/drivers/infiniband/hw/hns/Kconfig +++ b/drivers/infiniband/hw/hns/Kconfig @@ -1,23 +1,34 @@ # SPDX-License-Identifier: GPL-2.0-only config INFINIBAND_HNS - bool "HNS RoCE Driver" + tristate "HNS RoCE Driver" depends on NET_VENDOR_HISILICON depends on ARM64 || (COMPILE_TEST && 64BIT) + depends on (HNS_DSAF && HNS_ENET) || HNS3 ---help--- This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine is used in Hisilicon Hip06 and more further ICT SoC based on platform device. + To compile HIP06 or HIP08 driver as module, choose M here. + config INFINIBAND_HNS_HIP06 - tristate "Hisilicon Hip06 Family RoCE support" + bool "Hisilicon Hip06 Family RoCE support" depends on INFINIBAND_HNS && HNS && HNS_DSAF && HNS_ENET + depends on INFINIBAND_HNS=m || (HNS_DSAF=y && HNS_ENET=y) ---help--- RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip06 and Hip07 SoC. These RoCE engines are platform devices. + To compile this driver, choose Y here: if INFINIBAND_HNS is m, this + module will be called hns-roce-hw-v1 + config INFINIBAND_HNS_HIP08 - tristate "Hisilicon Hip08 Family RoCE support" + bool "Hisilicon Hip08 Family RoCE support" depends on INFINIBAND_HNS && PCI && HNS3 + depends on INFINIBAND_HNS=m || HNS3=y ---help--- RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip08 SoC. The RoCE engine is a PCI device. + + To compile this driver, choose Y here: if INFINIBAND_HNS is m, this + module will be called hns-roce-hw-v2. diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile index 449a2d81319d..e105945b94a1 100644 --- a/drivers/infiniband/hw/hns/Makefile +++ b/drivers/infiniband/hw/hns/Makefile @@ -9,8 +9,12 @@ hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o +ifdef CONFIG_INFINIBAND_HNS_HIP06 hns-roce-hw-v1-objs := hns_roce_hw_v1.o $(hns-roce-objs) -obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o +obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v1.o +endif +ifdef CONFIG_INFINIBAND_HNS_HIP08 hns-roce-hw-v2-objs := hns_roce_hw_v2.o hns_roce_hw_v2_dfx.o $(hns-roce-objs) -obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o +obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v2.o +endif diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 90e08c0c332d..8a522e14ef62 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -46,32 +46,32 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, const struct ib_gid_attr *gid_attr; struct device *dev = hr_dev->dev; struct hns_roce_ah *ah = to_hr_ah(ibah); - u16 vlan_tag = 0xffff; const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); + u16 vlan_id = 0xffff; bool vlan_en = false; int ret; gid_attr = ah_attr->grh.sgid_attr; - ret = rdma_read_gid_l2_fields(gid_attr, &vlan_tag, NULL); + ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL); if (ret) return ret; /* Get mac address */ memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); - if (vlan_tag < VLAN_CFI_MASK) { + if (vlan_id < VLAN_N_VID) { vlan_en = true; - vlan_tag |= (rdma_ah_get_sl(ah_attr) & + vlan_id |= (rdma_ah_get_sl(ah_attr) & HNS_ROCE_VLAN_SL_BIT_MASK) << HNS_ROCE_VLAN_SL_SHIFT; } ah->av.port = rdma_ah_get_port_num(ah_attr); ah->av.gid_index = grh->sgid_index; - ah->av.vlan = vlan_tag; + ah->av.vlan_id = vlan_id; ah->av.vlan_en = vlan_en; - dev_dbg(dev, "gid_index = 0x%x,vlan = 0x%x\n", ah->av.gid_index, - ah->av.vlan); + dev_dbg(dev, "gid_index = 0x%x,vlan_id = 0x%x\n", ah->av.gid_index, + ah->av.vlan_id); if (rdma_ah_get_static_rate(ah_attr)) ah->av.stat_rate = IB_RATE_10_GBPS; diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 8c063c598d2a..da574c26e063 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -55,7 +55,7 @@ int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj) bitmap->last = 0; *obj |= bitmap->top; } else { - ret = -1; + ret = -EINVAL; } spin_unlock(&bitmap->lock); @@ -100,7 +100,7 @@ int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt, } *obj |= bitmap->top; } else { - ret = -1; + ret = -EINVAL; } spin_unlock(&bitmap->lock); diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h index 2b6ac646ca9a..1915bacaded0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.h +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -115,12 +115,12 @@ enum { enum { /* TPT commands */ - HNS_ROCE_CMD_SW2HW_MPT = 0xd, - HNS_ROCE_CMD_HW2SW_MPT = 0xf, + HNS_ROCE_CMD_CREATE_MPT = 0xd, + HNS_ROCE_CMD_DESTROY_MPT = 0xf, /* CQ commands */ - HNS_ROCE_CMD_SW2HW_CQ = 0x16, - HNS_ROCE_CMD_HW2SW_CQ = 0x17, + HNS_ROCE_CMD_CREATE_CQC = 0x16, + HNS_ROCE_CMD_DESTROY_CQC = 0x17, /* QP/EE commands */ HNS_ROCE_CMD_RST2INIT_QP = 0x19, @@ -129,14 +129,14 @@ enum { HNS_ROCE_CMD_RTS2RTS_QP = 0x1c, HNS_ROCE_CMD_2ERR_QP = 0x1e, HNS_ROCE_CMD_RTS2SQD_QP = 0x1f, - HNS_ROCE_CMD_SQD2SQD_QP = 0x38, HNS_ROCE_CMD_SQD2RTS_QP = 0x20, HNS_ROCE_CMD_2RST_QP = 0x21, HNS_ROCE_CMD_QUERY_QP = 0x22, - HNS_ROCE_CMD_SW2HW_SRQ = 0x70, + HNS_ROCE_CMD_SQD2SQD_QP = 0x38, + HNS_ROCE_CMD_CREATE_SRQ = 0x70, HNS_ROCE_CMD_MODIFY_SRQC = 0x72, HNS_ROCE_CMD_QUERY_SRQC = 0x73, - HNS_ROCE_CMD_HW2SW_SRQ = 0x74, + HNS_ROCE_CMD_DESTROY_SRQ = 0x74, }; int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 22541d19cd09..af1d8823b3f0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -39,51 +39,8 @@ #include <rdma/hns-abi.h> #include "hns_roce_common.h" -static void hns_roce_ib_cq_comp(struct hns_roce_cq *hr_cq) -{ - struct ib_cq *ibcq = &hr_cq->ib_cq; - - ibcq->comp_handler(ibcq, ibcq->cq_context); -} - -static void hns_roce_ib_cq_event(struct hns_roce_cq *hr_cq, - enum hns_roce_event event_type) -{ - struct hns_roce_dev *hr_dev; - struct ib_event event; - struct ib_cq *ibcq; - - ibcq = &hr_cq->ib_cq; - hr_dev = to_hr_dev(ibcq->device); - - if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID && - event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR && - event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) { - dev_err(hr_dev->dev, - "hns_roce_ib: Unexpected event type 0x%x on CQ %06lx\n", - event_type, hr_cq->cqn); - return; - } - - if (ibcq->event_handler) { - event.device = ibcq->device; - event.event = IB_EVENT_CQ_ERR; - event.element.cq = ibcq; - ibcq->event_handler(&event, ibcq->cq_context); - } -} - -static int hns_roce_sw2hw_cq(struct hns_roce_dev *dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long cq_num) -{ - return hns_roce_cmd_mbox(dev, mailbox->dma, 0, cq_num, 0, - HNS_ROCE_CMD_SW2HW_CQ, HNS_ROCE_CMD_TIMEOUT_MSECS); -} - -static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, - struct hns_roce_mtt *hr_mtt, - struct hns_roce_cq *hr_cq, int vector) +static int hns_roce_alloc_cqc(struct hns_roce_dev *hr_dev, + struct hns_roce_cq *hr_cq) { struct hns_roce_cmd_mailbox *mailbox; struct hns_roce_hem_table *mtt_table; @@ -101,35 +58,32 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, else mtt_table = &hr_dev->mr_table.mtt_table; - mtts = hns_roce_table_find(hr_dev, mtt_table, - hr_mtt->first_seg, &dma_handle); - if (!mtts) { - dev_err(dev, "CQ alloc.Failed to find cq buf addr.\n"); - return -EINVAL; - } + mtts = hns_roce_table_find(hr_dev, mtt_table, hr_cq->mtt.first_seg, + &dma_handle); - if (vector >= hr_dev->caps.num_comp_vectors) { - dev_err(dev, "CQ alloc.Invalid vector.\n"); + if (!mtts) { + dev_err(dev, "Failed to find mtt for CQ buf.\n"); return -EINVAL; } - hr_cq->vector = vector; ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn); - if (ret == -1) { - dev_err(dev, "CQ alloc.Failed to alloc index.\n"); - return -ENOMEM; + if (ret) { + dev_err(dev, "Num of CQ out of range.\n"); + return ret; } /* Get CQC memory HEM(Hardware Entry Memory) table */ ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn); if (ret) { - dev_err(dev, "CQ alloc.Failed to get context mem.\n"); + dev_err(dev, + "Get context mem failed(%d) when CQ(0x%lx) alloc.\n", + ret, hr_cq->cqn); goto err_out; } ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL)); if (ret) { - dev_err(dev, "CQ alloc failed xa_store.\n"); + dev_err(dev, "Failed to xa_store CQ.\n"); goto err_put; } @@ -140,14 +94,16 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, goto err_xa; } - hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle, - nent, vector); + hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle); /* Send mailbox to hw */ - ret = hns_roce_sw2hw_cq(hr_dev, mailbox, hr_cq->cqn); + ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_cq->cqn, 0, + HNS_ROCE_CMD_CREATE_CQC, HNS_ROCE_CMD_TIMEOUT_MSECS); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { - dev_err(dev, "CQ alloc.Failed to cmd mailbox.\n"); + dev_err(dev, + "Send cmd mailbox failed(%d) when CQ(0x%lx) alloc.\n", + ret, hr_cq->cqn); goto err_xa; } @@ -170,24 +126,17 @@ err_out: return ret; } -static int hns_roce_hw2sw_cq(struct hns_roce_dev *dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long cq_num) -{ - return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, cq_num, - mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_CQ, - HNS_ROCE_CMD_TIMEOUT_MSECS); -} - -void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) +void hns_roce_free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; struct device *dev = hr_dev->dev; int ret; - ret = hns_roce_hw2sw_cq(hr_dev, NULL, hr_cq->cqn); + ret = hns_roce_cmd_mbox(hr_dev, 0, 0, hr_cq->cqn, 1, + HNS_ROCE_CMD_DESTROY_CQC, + HNS_ROCE_CMD_TIMEOUT_MSECS); if (ret) - dev_err(dev, "HW2SW_CQ failed (%d) for CQN %06lx\n", ret, + dev_err(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", ret, hr_cq->cqn); xa_erase(&cq_table->array, hr_cq->cqn); @@ -204,103 +153,91 @@ void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR); } -static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev, - struct ib_udata *udata, - struct hns_roce_cq_buf *buf, - struct ib_umem **umem, u64 buf_addr, int cqe) +static int get_cq_umem(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, + struct hns_roce_ib_create_cq ucmd, + struct ib_udata *udata) { - int ret; - u32 page_shift; + struct hns_roce_buf *buf = &hr_cq->buf; + struct hns_roce_mtt *mtt = &hr_cq->mtt; + struct ib_umem **umem = &hr_cq->umem; u32 npages; + int ret; - *umem = ib_umem_get(udata, buf_addr, cqe * hr_dev->caps.cq_entry_sz, - IB_ACCESS_LOCAL_WRITE, 1); + *umem = ib_umem_get(udata, ucmd.buf_addr, buf->size, + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(*umem)) return PTR_ERR(*umem); if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) - buf->hr_mtt.mtt_type = MTT_TYPE_CQE; + mtt->mtt_type = MTT_TYPE_CQE; else - buf->hr_mtt.mtt_type = MTT_TYPE_WQE; - - if (hr_dev->caps.cqe_buf_pg_sz) { - npages = (ib_umem_page_count(*umem) + - (1 << hr_dev->caps.cqe_buf_pg_sz) - 1) / - (1 << hr_dev->caps.cqe_buf_pg_sz); - page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz; - ret = hns_roce_mtt_init(hr_dev, npages, page_shift, - &buf->hr_mtt); - } else { - ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(*umem), - PAGE_SHIFT, &buf->hr_mtt); - } + mtt->mtt_type = MTT_TYPE_WQE; + + npages = DIV_ROUND_UP(ib_umem_page_count(*umem), + 1 << hr_dev->caps.cqe_buf_pg_sz); + ret = hns_roce_mtt_init(hr_dev, npages, buf->page_shift, mtt); if (ret) goto err_buf; - ret = hns_roce_ib_umem_write_mtt(hr_dev, &buf->hr_mtt, *umem); + ret = hns_roce_ib_umem_write_mtt(hr_dev, mtt, *umem); if (ret) goto err_mtt; return 0; err_mtt: - hns_roce_mtt_cleanup(hr_dev, &buf->hr_mtt); + hns_roce_mtt_cleanup(hr_dev, mtt); err_buf: ib_umem_release(*umem); return ret; } -static int hns_roce_ib_alloc_cq_buf(struct hns_roce_dev *hr_dev, - struct hns_roce_cq_buf *buf, u32 nent) +static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { + struct hns_roce_buf *buf = &hr_cq->buf; + struct hns_roce_mtt *mtt = &hr_cq->mtt; int ret; - u32 page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz; - ret = hns_roce_buf_alloc(hr_dev, nent * hr_dev->caps.cq_entry_sz, - (1 << page_shift) * 2, &buf->hr_buf, - page_shift); + ret = hns_roce_buf_alloc(hr_dev, buf->size, (1 << buf->page_shift) * 2, + buf, buf->page_shift); if (ret) goto out; if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) - buf->hr_mtt.mtt_type = MTT_TYPE_CQE; + mtt->mtt_type = MTT_TYPE_CQE; else - buf->hr_mtt.mtt_type = MTT_TYPE_WQE; + mtt->mtt_type = MTT_TYPE_WQE; - ret = hns_roce_mtt_init(hr_dev, buf->hr_buf.npages, - buf->hr_buf.page_shift, &buf->hr_mtt); + ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift, mtt); if (ret) goto err_buf; - ret = hns_roce_buf_write_mtt(hr_dev, &buf->hr_mtt, &buf->hr_buf); + ret = hns_roce_buf_write_mtt(hr_dev, mtt, buf); if (ret) goto err_mtt; return 0; err_mtt: - hns_roce_mtt_cleanup(hr_dev, &buf->hr_mtt); + hns_roce_mtt_cleanup(hr_dev, mtt); err_buf: - hns_roce_buf_free(hr_dev, nent * hr_dev->caps.cq_entry_sz, - &buf->hr_buf); + hns_roce_buf_free(hr_dev, buf->size, buf); + out: return ret; } -static void hns_roce_ib_free_cq_buf(struct hns_roce_dev *hr_dev, - struct hns_roce_cq_buf *buf, int cqe) +static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { - hns_roce_buf_free(hr_dev, (cqe + 1) * hr_dev->caps.cq_entry_sz, - &buf->hr_buf); + hns_roce_buf_free(hr_dev, hr_cq->buf.size, &hr_cq->buf); } static int create_user_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, struct ib_udata *udata, - struct hns_roce_ib_create_cq_resp *resp, - int cq_entries) + struct hns_roce_ib_create_cq_resp *resp) { struct hns_roce_ib_create_cq ucmd; struct device *dev = hr_dev->dev; @@ -314,9 +251,7 @@ static int create_user_cq(struct hns_roce_dev *hr_dev, } /* Get user space address, write it into mtt table */ - ret = hns_roce_ib_get_cq_umem(hr_dev, udata, &hr_cq->hr_buf, - &hr_cq->umem, ucmd.buf_addr, - cq_entries); + ret = get_cq_umem(hr_dev, hr_cq, ucmd, udata); if (ret) { dev_err(dev, "Failed to get_cq_umem.\n"); return ret; @@ -337,17 +272,16 @@ static int create_user_cq(struct hns_roce_dev *hr_dev, return 0; err_mtt: - hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); ib_umem_release(hr_cq->umem); return ret; } static int create_kernel_cq(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq, int cq_entries) + struct hns_roce_cq *hr_cq) { struct device *dev = hr_dev->dev; - struct hns_roce_uar *uar; int ret; if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) { @@ -361,15 +295,14 @@ static int create_kernel_cq(struct hns_roce_dev *hr_dev, } /* Init mtt table and write buff address to mtt table */ - ret = hns_roce_ib_alloc_cq_buf(hr_dev, &hr_cq->hr_buf, cq_entries); + ret = alloc_cq_buf(hr_dev, hr_cq); if (ret) { dev_err(dev, "Failed to alloc_cq_buf.\n"); goto err_db; } - uar = &hr_dev->priv_uar; hr_cq->cq_db_l = hr_dev->reg_base + hr_dev->odb_offset + - DB_REG_OFFSET * uar->index; + DB_REG_OFFSET * hr_dev->priv_uar.index; return 0; @@ -392,64 +325,69 @@ static void destroy_user_cq(struct hns_roce_dev *hr_dev, (udata->outlen >= sizeof(*resp))) hns_roce_db_unmap_user(context, &hr_cq->db); - hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); ib_umem_release(hr_cq->umem); } static void destroy_kernel_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { - hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); - hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, hr_cq->ib_cq.cqe); + hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); + free_cq_buf(hr_dev, hr_cq); if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) hns_roce_free_db(hr_dev, &hr_cq->db); } -int hns_roce_ib_create_cq(struct ib_cq *ib_cq, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata) +int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); - struct device *dev = hr_dev->dev; struct hns_roce_ib_create_cq_resp resp = {}; struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); + struct device *dev = hr_dev->dev; int vector = attr->comp_vector; - int cq_entries = attr->cqe; + u32 cq_entries = attr->cqe; int ret; if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) { - dev_err(dev, "Creat CQ failed. entries=%d, max=%d\n", + dev_err(dev, "Create CQ failed. entries=%d, max=%d\n", cq_entries, hr_dev->caps.max_cqes); return -EINVAL; } - if (hr_dev->caps.min_cqes) - cq_entries = max(cq_entries, hr_dev->caps.min_cqes); + if (vector >= hr_dev->caps.num_comp_vectors) { + dev_err(dev, "Create CQ failed, vector=%d, max=%d\n", + vector, hr_dev->caps.num_comp_vectors); + return -EINVAL; + } - cq_entries = roundup_pow_of_two((unsigned int)cq_entries); - hr_cq->ib_cq.cqe = cq_entries - 1; + cq_entries = max(cq_entries, hr_dev->caps.min_cqes); + cq_entries = roundup_pow_of_two(cq_entries); + hr_cq->ib_cq.cqe = cq_entries - 1; /* used as cqe index */ + hr_cq->cq_depth = cq_entries; + hr_cq->vector = vector; + hr_cq->buf.size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz; + hr_cq->buf.page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz; spin_lock_init(&hr_cq->lock); if (udata) { - ret = create_user_cq(hr_dev, hr_cq, udata, &resp, cq_entries); + ret = create_user_cq(hr_dev, hr_cq, udata, &resp); if (ret) { dev_err(dev, "Create cq failed in user mode!\n"); goto err_cq; } } else { - ret = create_kernel_cq(hr_dev, hr_cq, cq_entries); + ret = create_kernel_cq(hr_dev, hr_cq); if (ret) { dev_err(dev, "Create cq failed in kernel mode!\n"); goto err_cq; } } - /* Allocate cq index, fill cq_context */ - ret = hns_roce_cq_alloc(hr_dev, cq_entries, &hr_cq->hr_buf.hr_mtt, - hr_cq, vector); + ret = hns_roce_alloc_cqc(hr_dev, hr_cq); if (ret) { - dev_err(dev, "Creat CQ .Failed to cq_alloc.\n"); + dev_err(dev, "Alloc CQ failed(%d).\n", ret); goto err_dbmap; } @@ -462,11 +400,6 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq, if (!udata && hr_cq->tptr_addr) *hr_cq->tptr_addr = 0; - /* Get created cq handler and carry out event */ - hr_cq->comp = hns_roce_ib_cq_comp; - hr_cq->event = hns_roce_ib_cq_event; - hr_cq->cq_depth = cq_entries; - if (udata) { resp.cqn = hr_cq->cqn; ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); @@ -477,7 +410,7 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq, return 0; err_cqc: - hns_roce_free_cq(hr_dev, hr_cq); + hns_roce_free_cqc(hr_dev, hr_cq); err_dbmap: if (udata) @@ -489,7 +422,7 @@ err_cq: return ret; } -void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) +void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); @@ -499,8 +432,8 @@ void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) return; } - hns_roce_free_cq(hr_dev, hr_cq); - hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + hns_roce_free_cqc(hr_dev, hr_cq); + hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); ib_umem_release(hr_cq->umem); if (udata) { @@ -512,7 +445,7 @@ void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) &hr_cq->db); } else { /* Free the buff of stored cq */ - hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, ib_cq->cqe); + free_cq_buf(hr_dev, hr_cq); if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) hns_roce_free_db(hr_dev, &hr_cq->db); } @@ -520,38 +453,57 @@ void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn) { - struct device *dev = hr_dev->dev; - struct hns_roce_cq *cq; + struct hns_roce_cq *hr_cq; + struct ib_cq *ibcq; - cq = xa_load(&hr_dev->cq_table.array, cqn & (hr_dev->caps.num_cqs - 1)); - if (!cq) { - dev_warn(dev, "Completion event for bogus CQ 0x%08x\n", cqn); + hr_cq = xa_load(&hr_dev->cq_table.array, + cqn & (hr_dev->caps.num_cqs - 1)); + if (!hr_cq) { + dev_warn(hr_dev->dev, "Completion event for bogus CQ 0x%06x\n", + cqn); return; } - ++cq->arm_sn; - cq->comp(cq); + ++hr_cq->arm_sn; + ibcq = &hr_cq->ib_cq; + if (ibcq->comp_handler) + ibcq->comp_handler(ibcq, ibcq->cq_context); } void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type) { - struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; struct device *dev = hr_dev->dev; - struct hns_roce_cq *cq; + struct hns_roce_cq *hr_cq; + struct ib_event event; + struct ib_cq *ibcq; - cq = xa_load(&cq_table->array, cqn & (hr_dev->caps.num_cqs - 1)); - if (cq) - atomic_inc(&cq->refcount); + hr_cq = xa_load(&hr_dev->cq_table.array, + cqn & (hr_dev->caps.num_cqs - 1)); + if (!hr_cq) { + dev_warn(dev, "Async event for bogus CQ 0x%06x\n", cqn); + return; + } - if (!cq) { - dev_warn(dev, "Async event for bogus CQ %08x\n", cqn); + if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID && + event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR && + event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) { + dev_err(dev, "Unexpected event type 0x%x on CQ 0x%06x\n", + event_type, cqn); return; } - cq->event(cq, (enum hns_roce_event)event_type); + atomic_inc(&hr_cq->refcount); + + ibcq = &hr_cq->ib_cq; + if (ibcq->event_handler) { + event.device = ibcq->device; + event.element.cq = ibcq; + event.event = IB_EVENT_CQ_ERR; + ibcq->event_handler(&event, ibcq->cq_context); + } - if (atomic_dec_and_test(&cq->refcount)) - complete(&cq->free); + if (atomic_dec_and_test(&hr_cq->refcount)) + complete(&hr_cq->free); } int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c index c00714c2f16a..10af6958ab69 100644 --- a/drivers/infiniband/hw/hns/hns_roce_db.c +++ b/drivers/infiniband/hw/hns/hns_roce_db.c @@ -31,7 +31,7 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, refcount_set(&page->refcount, 1); page->user_virt = page_addr; - page->umem = ib_umem_get(udata, page_addr, PAGE_SIZE, 0, 0); + page->umem = ib_umem_get(udata, page_addr, PAGE_SIZE, 0); if (IS_ERR(page->umem)) { ret = PTR_ERR(page->umem); kfree(page); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 96d1302abde1..5617434cbfb4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -45,7 +45,7 @@ #define HNS_ROCE_MAX_MSG_LEN 0x80000000 -#define HNS_ROCE_ALOGN_UP(a, b) ((((a) + (b) - 1) / (b)) * (b)) +#define HNS_ROCE_ALIGN_UP(a, b) ((((a) + (b) - 1) / (b)) * (b)) #define HNS_ROCE_IB_MIN_SQ_STRIDE 6 @@ -53,8 +53,6 @@ #define BA_BYTE_LEN 8 -#define BITS_PER_BYTE 8 - /* Hardware specification only for v1 engine */ #define HNS_ROCE_MIN_CQE_NUM 0x40 #define HNS_ROCE_MIN_WQE_NUM 0x20 @@ -426,7 +424,6 @@ struct hns_roce_wq { u64 *wrid; /* Work request ID */ spinlock_t lock; int wqe_cnt; /* WQE num */ - u32 max_post; int max_gs; int offset; int wqe_shift; /* WQE size */ @@ -451,6 +448,7 @@ struct hns_roce_buf { struct hns_roce_buf_list *page_list; int nbufs; u32 npages; + u32 size; int page_shift; }; @@ -482,22 +480,14 @@ struct hns_roce_db { int order; }; -struct hns_roce_cq_buf { - struct hns_roce_buf hr_buf; - struct hns_roce_mtt hr_mtt; -}; - struct hns_roce_cq { struct ib_cq ib_cq; - struct hns_roce_cq_buf hr_buf; + struct hns_roce_buf buf; + struct hns_roce_mtt mtt; struct hns_roce_db db; u8 db_en; spinlock_t lock; struct ib_umem *umem; - void (*comp)(struct hns_roce_cq *cq); - void (*event)(struct hns_roce_cq *cq, enum hns_roce_event event_type); - - struct hns_roce_uar *uar; u32 cq_depth; u32 cons_index; u32 *set_ci_db; @@ -521,9 +511,8 @@ struct hns_roce_idx_que { struct hns_roce_srq { struct ib_srq ibsrq; - void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event); unsigned long srqn; - int max; + u32 wqe_cnt; int max_gs; int wqe_shift; void __iomem *db_reg_l; @@ -539,8 +528,8 @@ struct hns_roce_srq { spinlock_t lock; int head; int tail; - u16 wqe_ctr; struct mutex mutex; + void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event); }; struct hns_roce_uar_table { @@ -582,7 +571,7 @@ struct hns_roce_av { u8 tclass; u8 dgid[HNS_ROCE_GID_SIZE]; u8 mac[ETH_ALEN]; - u16 vlan; + u16 vlan_id; bool vlan_en; }; @@ -695,10 +684,6 @@ struct hns_roce_qp { struct hns_roce_rinl_buf rq_inl_buf; }; -struct hns_roce_sqp { - struct hns_roce_qp hr_qp; -}; - struct hns_roce_ib_iboe { spinlock_t lock; struct net_device *netdevs[HNS_ROCE_MAX_PORTS]; @@ -821,8 +806,8 @@ struct hns_roce_caps { int max_qp_init_rdma; int max_qp_dest_rdma; int num_cqs; - int max_cqes; - int min_cqes; + u32 max_cqes; + u32 min_cqes; u32 min_wqes; int reserved_cqs; int reserved_srqs; @@ -953,7 +938,7 @@ struct hns_roce_hw { int (*mw_write_mtpt)(void *mb_buf, struct hns_roce_mw *mw); void (*write_cqc)(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts, - dma_addr_t dma_handle, int nent, u32 vector); + dma_addr_t dma_handle); int (*set_hem)(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, int obj, int step_idx); int (*clear_hem)(struct hns_roce_dev *hr_dev, @@ -1092,11 +1077,6 @@ static inline struct hns_roce_srq *to_hr_srq(struct ib_srq *ibsrq) return container_of(ibsrq, struct hns_roce_srq, ibsrq); } -static inline struct hns_roce_sqp *hr_to_hr_sqp(struct hns_roce_qp *hr_qp) -{ - return container_of(hr_qp, struct hns_roce_sqp, hr_qp); -} - static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest) { __raw_writeq(*(u64 *) val, dest); @@ -1198,9 +1178,9 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); -int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long mpt_index); +int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long mpt_index); unsigned long key_to_hw_index(u32 key); struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type, @@ -1257,12 +1237,11 @@ void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn, __be32 send_ieth(const struct ib_send_wr *wr); int to_hr_qp_type(int qp_type); -int hns_roce_ib_create_cq(struct ib_cq *ib_cq, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata); +int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata); -void hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); -void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq); +void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); +void hns_roce_free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq); int hns_roce_db_map_user(struct hns_roce_ucontext *context, struct ib_udata *udata, unsigned long virt, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 5f74bf55f471..2a2b2112f886 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -732,7 +732,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) if (!cq) return -ENOMEM; - ret = hns_roce_ib_create_cq(cq, &cq_init_attr, NULL); + ret = hns_roce_create_cq(cq, &cq_init_attr, NULL); if (ret) { dev_err(dev, "Create cq for reserved loop qp failed!"); goto alloc_cq_failed; @@ -868,7 +868,7 @@ alloc_pd_failed: kfree(pd); alloc_mem_failed: - hns_roce_ib_destroy_cq(cq, NULL); + hns_roce_destroy_cq(cq, NULL); alloc_cq_failed: kfree(cq); return ret; @@ -897,7 +897,7 @@ static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev) i, ret); } - hns_roce_ib_destroy_cq(&free_mr->mr_free_cq->ib_cq, NULL); + hns_roce_destroy_cq(&free_mr->mr_free_cq->ib_cq, NULL); kfree(&free_mr->mr_free_cq->ib_cq); hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd, NULL); kfree(&free_mr->mr_free_pd->ibpd); @@ -1114,9 +1114,10 @@ static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev, free_mr = &priv->free_mr; if (mr->enabled) { - if (hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mr->key) - & (hr_dev->caps.num_mtpts - 1))) - dev_warn(dev, "HW2SW_MPT failed!\n"); + if (hns_roce_hw_destroy_mpt(hr_dev, NULL, + key_to_hw_index(mr->key) & + (hr_dev->caps.num_mtpts - 1))) + dev_warn(dev, "DESTROY_MPT failed!\n"); } mr_work = kzalloc(sizeof(*mr_work), GFP_KERNEL); @@ -1979,8 +1980,7 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, static void *get_cqe(struct hns_roce_cq *hr_cq, int n) { - return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf, - n * HNS_ROCE_V1_CQE_ENTRY_SIZE); + return hns_roce_buf_offset(&hr_cq->buf, n * HNS_ROCE_V1_CQE_ENTRY_SIZE); } static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n) @@ -1989,7 +1989,7 @@ static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n) /* Get cqe when Owner bit is Conversely with the MSB of cons_idx */ return (roce_get_bit(hr_cqe->cqe_byte_4, CQE_BYTE_4_OWNER_S) ^ - !!(n & (hr_cq->ib_cq.cqe + 1))) ? hr_cqe : NULL; + !!(n & hr_cq->cq_depth)) ? hr_cqe : NULL; } static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *hr_cq) @@ -2072,8 +2072,7 @@ static void hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, void *mb_buf, - u64 *mtts, dma_addr_t dma_handle, int nent, - u32 vector) + u64 *mtts, dma_addr_t dma_handle) { struct hns_roce_cq_context *cq_context = NULL; struct hns_roce_buf_list *tptr_buf; @@ -2108,9 +2107,9 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_M, CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_S, - ilog2((unsigned int)nent)); + ilog2(hr_cq->cq_depth)); roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CEQN_M, - CQ_CONTEXT_CQC_BYTE_12_CEQN_S, vector); + CQ_CONTEXT_CQC_BYTE_12_CEQN_S, hr_cq->vector); cq_context->cur_cqe_ba0_l = cpu_to_le32((u32)(mtts[0])); @@ -3644,10 +3643,7 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); } - if (hr_qp->ibqp.qp_type == IB_QPT_RC) - kfree(hr_qp); - else - kfree(hr_to_hr_sqp(hr_qp)); + kfree(hr_qp); return 0; } @@ -3658,10 +3654,9 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) struct device *dev = &hr_dev->pdev->dev; u32 cqe_cnt_ori; u32 cqe_cnt_cur; - u32 cq_buf_size; int wait_time = 0; - hns_roce_free_cq(hr_dev, hr_cq); + hns_roce_free_cqc(hr_dev, hr_cq); /* * Before freeing cq buffer, we need to ensure that the outstanding CQE @@ -3686,13 +3681,12 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) wait_time++; } - hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); ib_umem_release(hr_cq->umem); if (!udata) { /* Free the buff of stored cq */ - cq_buf_size = (ibcq->cqe + 1) * hr_dev->caps.cq_entry_sz; - hns_roce_buf_free(hr_dev, cq_buf_size, &hr_cq->hr_buf.hr_buf); + hns_roce_buf_free(hr_dev, hr_cq->buf.size, &hr_cq->buf); } } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index e82567fcdeb7..cb8071a3e0d5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -389,7 +389,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M, V2_UD_SEND_WQE_BYTE_36_VLAN_S, - le16_to_cpu(ah->av.vlan)); + ah->av.vlan_id); roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, @@ -2447,8 +2447,7 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw) static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n) { - return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf, - n * HNS_ROCE_V2_CQE_ENTRY_SIZE); + return hns_roce_buf_offset(&hr_cq->buf, n * HNS_ROCE_V2_CQE_ENTRY_SIZE); } static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n) @@ -2457,7 +2456,7 @@ static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n) /* Get cqe when Owner bit is Conversely with the MSB of cons_idx */ return (roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_OWNER_S) ^ - !!(n & (hr_cq->ib_cq.cqe + 1))) ? cqe : NULL; + !!(n & hr_cq->cq_depth)) ? cqe : NULL; } static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq) @@ -2550,8 +2549,7 @@ static void hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, void *mb_buf, - u64 *mtts, dma_addr_t dma_handle, int nent, - u32 vector) + u64 *mtts, dma_addr_t dma_handle) { struct hns_roce_v2_cq_context *cq_context; @@ -2563,9 +2561,10 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_ARM_ST_M, V2_CQC_BYTE_4_ARM_ST_S, REG_NXT_CEQE); roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_SHIFT_M, - V2_CQC_BYTE_4_SHIFT_S, ilog2((unsigned int)nent)); + V2_CQC_BYTE_4_SHIFT_S, + ilog2(hr_cq->cq_depth)); roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_CEQN_M, - V2_CQC_BYTE_4_CEQN_S, vector); + V2_CQC_BYTE_4_CEQN_S, hr_cq->vector); roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQN_M, V2_CQC_BYTE_8_CQN_S, hr_cq->cqn); @@ -4061,8 +4060,8 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); const struct ib_gid_attr *gid_attr = NULL; int is_roce_protocol; + u16 vlan_id = 0xffff; bool is_udp = false; - u16 vlan = 0xffff; u8 ib_port; u8 hr_port; int ret; @@ -4074,7 +4073,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, if (is_roce_protocol) { gid_attr = attr->ah_attr.grh.sgid_attr; - ret = rdma_read_gid_l2_fields(gid_attr, &vlan, NULL); + ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL); if (ret) return ret; @@ -4083,7 +4082,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, IB_GID_TYPE_ROCE_UDP_ENCAP); } - if (vlan < VLAN_CFI_MASK) { + if (vlan_id < VLAN_N_VID) { roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1); roce_set_bit(qpc_mask->byte_76_srqn_op_en, @@ -4095,7 +4094,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, } roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M, - V2_QPC_BYTE_24_VLAN_ID_S, vlan); + V2_QPC_BYTE_24_VLAN_ID_S, vlan_id); roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M, V2_QPC_BYTE_24_VLAN_ID_S, 0); @@ -4650,16 +4649,14 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, { struct hns_roce_cq *send_cq, *recv_cq; struct ib_device *ibdev = &hr_dev->ib_dev; - int ret; + int ret = 0; if (hr_qp->ibqp.qp_type == IB_QPT_RC && hr_qp->state != IB_QPS_RESET) { /* Modify qp to reset before destroying qp */ ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, IB_QPS_RESET); - if (ret) { + if (ret) ibdev_err(ibdev, "modify QP to Reset failed.\n"); - return ret; - } } send_cq = to_hr_cq(hr_qp->ibqp.send_cq); @@ -4715,7 +4712,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, kfree(hr_qp->rq_inl_buf.wqe_list); } - return 0; + return ret; } static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) @@ -4725,16 +4722,11 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) int ret; ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata); - if (ret) { + if (ret) ibdev_err(&hr_dev->ib_dev, "Destroy qp 0x%06lx failed(%d)\n", hr_qp->qpn, ret); - return ret; - } - if (hr_qp->ibqp.qp_type == IB_QPT_GSI) - kfree(hr_to_hr_sqp(hr_qp)); - else - kfree(hr_qp); + kfree(hr_qp); return 0; } @@ -4951,10 +4943,7 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev, static void set_eq_cons_index_v2(struct hns_roce_eq *eq) { struct hns_roce_dev *hr_dev = eq->hr_dev; - __le32 doorbell[2]; - - doorbell[0] = 0; - doorbell[1] = 0; + __le32 doorbell[2] = {}; if (eq->type_flag == HNS_ROCE_AEQ) { roce_set_field(doorbell[0], HNS_ROCE_V2_EQ_DB_CMD_M, @@ -6047,7 +6036,7 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, hr_dev->caps.srqwqe_hop_num)); roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S, - ilog2(srq->max)); + ilog2(srq->wqe_cnt)); roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M, SRQC_BYTE_4_SRQN_S, srq->srqn); @@ -6092,11 +6081,11 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, roce_set_field(srq_context->byte_44_idxbufpgsz_addr, SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M, SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S, - hr_dev->caps.idx_ba_pg_sz); + hr_dev->caps.idx_ba_pg_sz + PG_SHIFT_OFFSET); roce_set_field(srq_context->byte_44_idxbufpgsz_addr, SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M, SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S, - hr_dev->caps.idx_buf_pg_sz); + hr_dev->caps.idx_buf_pg_sz + PG_SHIFT_OFFSET); srq_context->idx_nxt_blk_addr = cpu_to_le32(mtts_idx[1] >> PAGE_ADDR_SHIFT); @@ -6133,7 +6122,7 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, int ret; if (srq_attr_mask & IB_SRQ_LIMIT) { - if (srq_attr->srq_limit >= srq->max) + if (srq_attr->srq_limit >= srq->wqe_cnt) return -EINVAL; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); @@ -6193,7 +6182,7 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) SRQC_BYTE_8_SRQ_LIMIT_WL_S); attr->srq_limit = limit_wl; - attr->max_wr = srq->max - 1; + attr->max_wr = srq->wqe_cnt - 1; attr->max_sge = srq->max_gs; memcpy(srq_context, mailbox->buf, sizeof(*srq_context)); @@ -6246,7 +6235,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, spin_lock_irqsave(&srq->lock, flags); - ind = srq->head & (srq->max - 1); + ind = srq->head & (srq->wqe_cnt - 1); for (nreq = 0; wr; ++nreq, wr = wr->next) { if (unlikely(wr->num_sge > srq->max_gs)) { @@ -6261,7 +6250,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, break; } - wqe_idx = find_empty_entry(&srq->idx_que, srq->max); + wqe_idx = find_empty_entry(&srq->idx_que, srq->wqe_cnt); if (wqe_idx < 0) { ret = -ENOMEM; *bad_wr = wr; @@ -6285,7 +6274,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, } srq->wrid[wqe_idx] = wr->wr_id; - ind = (ind + 1) & (srq->max - 1); + ind = (ind + 1) & (srq->wqe_cnt - 1); } if (likely(nreq)) { @@ -6380,12 +6369,14 @@ static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { MODULE_DEVICE_TABLE(pci, hns_roce_hw_v2_pci_tbl); -static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, +static void hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, struct hnae3_handle *handle) { struct hns_roce_v2_priv *priv = hr_dev->priv; int i; + hr_dev->pci_dev = handle->pdev; + hr_dev->dev = &handle->pdev->dev; hr_dev->hw = &hns_roce_hw_v2; hr_dev->dfx = &hns_roce_dfx_hw_v2; hr_dev->sdb_offset = ROCEE_DB_SQ_L_0_REG; @@ -6410,8 +6401,6 @@ static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, hr_dev->reset_cnt = handle->ae_algo->ops->ae_dev_reset_cnt(handle); priv->handle = handle; - - return 0; } static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) @@ -6429,14 +6418,7 @@ static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) goto error_failed_kzalloc; } - hr_dev->pci_dev = handle->pdev; - hr_dev->dev = &handle->pdev->dev; - - ret = hns_roce_hw_v2_get_cfg(hr_dev, handle); - if (ret) { - dev_err(hr_dev->dev, "Get Configuration failed!\n"); - goto error_failed_get_cfg; - } + hns_roce_hw_v2_get_cfg(hr_dev, handle); ret = hns_roce_init(hr_dev); if (ret) { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 43219d2f7de0..76a14db7028d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -87,8 +87,8 @@ #define HNS_ROCE_V2_MTT_ENTRY_SZ 64 #define HNS_ROCE_V2_CQE_ENTRY_SIZE 32 #define HNS_ROCE_V2_SCCC_ENTRY_SZ 32 -#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ 4096 -#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ 4096 +#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE +#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000 #define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2 #define HNS_ROCE_INVALID_LKEY 0x100 diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index b5d196c119ee..854ef6e74788 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -111,7 +111,7 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port, netdev = hr_dev->iboe.netdevs[port]; if (!netdev) { - dev_err(dev, "port(%d) can't find netdev\n", port); + dev_err(dev, "Can't find netdev on port(%u)!\n", port); return -ENODEV; } @@ -253,7 +253,7 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num, net_dev = hr_dev->iboe.netdevs[port]; if (!net_dev) { spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); - dev_err(dev, "find netdev %d failed!\r\n", port); + dev_err(dev, "Find netdev %u failed!\n", port); return -EINVAL; } @@ -301,12 +301,6 @@ static int hns_roce_modify_device(struct ib_device *ib_dev, int mask, return 0; } -static int hns_roce_modify_port(struct ib_device *ib_dev, u8 port_num, int mask, - struct ib_port_modify *props) -{ - return 0; -} - static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { @@ -359,7 +353,8 @@ static int hns_roce_mmap(struct ib_ucontext *context, return rdma_user_mmap_io(context, vma, to_hr_ucontext(context)->uar.pfn, PAGE_SIZE, - pgprot_noncached(vma->vm_page_prot)); + pgprot_noncached(vma->vm_page_prot), + NULL); /* vm_pgoff: 1 -- TPTR */ case 1: @@ -372,7 +367,8 @@ static int hns_roce_mmap(struct ib_ucontext *context, return rdma_user_mmap_io(context, vma, hr_dev->tptr_dma_addr >> PAGE_SHIFT, hr_dev->tptr_size, - vma->vm_page_prot); + vma->vm_page_prot, + NULL); default: return -EINVAL; @@ -423,14 +419,14 @@ static const struct ib_device_ops hns_roce_dev_ops = { .alloc_pd = hns_roce_alloc_pd, .alloc_ucontext = hns_roce_alloc_ucontext, .create_ah = hns_roce_create_ah, - .create_cq = hns_roce_ib_create_cq, + .create_cq = hns_roce_create_cq, .create_qp = hns_roce_create_qp, .dealloc_pd = hns_roce_dealloc_pd, .dealloc_ucontext = hns_roce_dealloc_ucontext, .del_gid = hns_roce_del_gid, .dereg_mr = hns_roce_dereg_mr, .destroy_ah = hns_roce_destroy_ah, - .destroy_cq = hns_roce_ib_destroy_cq, + .destroy_cq = hns_roce_destroy_cq, .disassociate_ucontext = hns_roce_disassociate_ucontext, .fill_res_entry = hns_roce_fill_res_entry, .get_dma_mr = hns_roce_get_dma_mr, @@ -438,7 +434,6 @@ static const struct ib_device_ops hns_roce_dev_ops = { .get_port_immutable = hns_roce_port_immutable, .mmap = hns_roce_mmap, .modify_device = hns_roce_modify_device, - .modify_port = hns_roce_modify_port, .modify_qp = hns_roce_modify_qp, .query_ah = hns_roce_query_ah, .query_device = hns_roce_query_device, diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 5f8416ba09a9..9ad19170c3f9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -48,21 +48,21 @@ unsigned long key_to_hw_index(u32 key) return (key << 24) | (key >> 8); } -static int hns_roce_sw2hw_mpt(struct hns_roce_dev *hr_dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long mpt_index) +static int hns_roce_hw_create_mpt(struct hns_roce_dev *hr_dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long mpt_index) { return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0, - HNS_ROCE_CMD_SW2HW_MPT, + HNS_ROCE_CMD_CREATE_MPT, HNS_ROCE_CMD_TIMEOUT_MSECS); } -int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long mpt_index) +int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long mpt_index) { return hns_roce_cmd_mbox(hr_dev, 0, mailbox ? mailbox->dma : 0, - mpt_index, !mailbox, HNS_ROCE_CMD_HW2SW_MPT, + mpt_index, !mailbox, HNS_ROCE_CMD_DESTROY_MPT, HNS_ROCE_CMD_TIMEOUT_MSECS); } @@ -83,7 +83,7 @@ static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order, } } spin_unlock(&buddy->lock); - return -1; + return -EINVAL; found: clear_bit(*seg, buddy->bits[o]); @@ -206,13 +206,14 @@ static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order, } ret = hns_roce_buddy_alloc(buddy, order, seg); - if (ret == -1) - return -1; + if (ret) + return ret; - if (hns_roce_table_get_range(hr_dev, table, *seg, - *seg + (1 << order) - 1)) { + ret = hns_roce_table_get_range(hr_dev, table, *seg, + *seg + (1 << order) - 1); + if (ret) { hns_roce_buddy_free(buddy, *seg, order); - return -1; + return ret; } return 0; @@ -578,7 +579,7 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova, /* Allocate a key for mr from mr_table */ ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index); - if (ret == -1) + if (ret) return -ENOMEM; mr->iova = iova; /* MR va starting addr */ @@ -707,10 +708,11 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, int ret; if (mr->enabled) { - ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mr->key) - & (hr_dev->caps.num_mtpts - 1)); + ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, + key_to_hw_index(mr->key) & + (hr_dev->caps.num_mtpts - 1)); if (ret) - dev_warn(dev, "HW2SW_MPT failed (%d)\n", ret); + dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret); } if (mr->size != ~0ULL) { @@ -763,10 +765,10 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, goto err_page; } - ret = hns_roce_sw2hw_mpt(hr_dev, mailbox, - mtpt_idx & (hr_dev->caps.num_mtpts - 1)); + ret = hns_roce_hw_create_mpt(hr_dev, mailbox, + mtpt_idx & (hr_dev->caps.num_mtpts - 1)); if (ret) { - dev_err(dev, "SW2HW_MPT failed (%d)\n", ret); + dev_err(dev, "CREATE_MPT failed (%d)\n", ret); goto err_page; } @@ -1143,7 +1145,7 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); - mr->umem = ib_umem_get(udata, start, length, access_flags, 0); + mr->umem = ib_umem_get(udata, start, length, access_flags); if (IS_ERR(mr->umem)) { ret = PTR_ERR(mr->umem); goto err_free; @@ -1228,7 +1230,7 @@ static int rereg_mr_trans(struct ib_mr *ibmr, int flags, } ib_umem_release(mr->umem); - mr->umem = ib_umem_get(udata, start, length, mr_access_flags, 0); + mr->umem = ib_umem_get(udata, start, length, mr_access_flags); if (IS_ERR(mr->umem)) { ret = PTR_ERR(mr->umem); mr->umem = NULL; @@ -1308,9 +1310,9 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, if (ret) goto free_cmd_mbox; - ret = hns_roce_hw2sw_mpt(hr_dev, NULL, mtpt_idx); + ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, mtpt_idx); if (ret) - dev_warn(dev, "HW2SW_MPT failed (%d)\n", ret); + dev_warn(dev, "DESTROY_MPT failed (%d)\n", ret); mr->enabled = 0; @@ -1332,9 +1334,9 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, goto free_cmd_mbox; } - ret = hns_roce_sw2hw_mpt(hr_dev, mailbox, mtpt_idx); + ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx); if (ret) { - dev_err(dev, "SW2HW_MPT failed (%d)\n", ret); + dev_err(dev, "CREATE_MPT failed (%d)\n", ret); ib_umem_release(mr->umem); goto free_cmd_mbox; } @@ -1448,10 +1450,11 @@ static void hns_roce_mw_free(struct hns_roce_dev *hr_dev, int ret; if (mw->enabled) { - ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mw->rkey) - & (hr_dev->caps.num_mtpts - 1)); + ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, + key_to_hw_index(mw->rkey) & + (hr_dev->caps.num_mtpts - 1)); if (ret) - dev_warn(dev, "MW HW2SW_MPT failed (%d)\n", ret); + dev_warn(dev, "MW DESTROY_MPT failed (%d)\n", ret); hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, key_to_hw_index(mw->rkey)); @@ -1487,10 +1490,10 @@ static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev, goto err_page; } - ret = hns_roce_sw2hw_mpt(hr_dev, mailbox, - mtpt_idx & (hr_dev->caps.num_mtpts - 1)); + ret = hns_roce_hw_create_mpt(hr_dev, mailbox, + mtpt_idx & (hr_dev->caps.num_mtpts - 1)); if (ret) { - dev_err(dev, "MW sw2hw_mpt failed (%d)\n", ret); + dev_err(dev, "MW CREATE_MPT failed (%d)\n", ret); goto err_page; } diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index 912b89b4da34..780c780fdb22 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -96,7 +96,7 @@ int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar) /* Using bitmap to manager UAR index */ ret = hns_roce_bitmap_alloc(&hr_dev->uar_table.bitmap, &uar->logic_idx); - if (ret == -1) + if (ret) return -ENOMEM; if (uar->logic_idx > 0 && hr_dev->caps.phy_num_uars > 1) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index bd78ff90d998..a6565b674801 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -318,7 +318,7 @@ static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, * hr_qp->rq.max_gs); } - cap->max_recv_wr = hr_qp->rq.max_post = hr_qp->rq.wqe_cnt; + cap->max_recv_wr = hr_qp->rq.wqe_cnt; cap->max_recv_sge = hr_qp->rq.max_gs; return 0; @@ -332,9 +332,8 @@ static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, u8 max_sq_stride = ilog2(roundup_sq_stride); /* Sanity check SQ size before proceeding */ - if ((u32)(1 << ucmd->log_sq_bb_count) > hr_dev->caps.max_wqes || - ucmd->log_sq_stride > max_sq_stride || - ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) { + if (ucmd->log_sq_stride > max_sq_stride || + ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) { ibdev_err(&hr_dev->ib_dev, "check SQ size error!\n"); return -EINVAL; } @@ -358,13 +357,16 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, u32 max_cnt; int ret; + if (check_shl_overflow(1, ucmd->log_sq_bb_count, &hr_qp->sq.wqe_cnt) || + hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) + return -EINVAL; + ret = check_sq_size_with_integrity(hr_dev, cap, ucmd); if (ret) { ibdev_err(&hr_dev->ib_dev, "Sanity check sq size failed\n"); return ret; } - hr_qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count; hr_qp->sq.wqe_shift = ucmd->log_sq_stride; max_cnt = max(1U, cap->max_send_sge); @@ -391,37 +393,37 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, /* Get buf size, SQ and RQ are aligned to page_szie */ if (hr_dev->caps.max_sq_sg <= 2) { - hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << + hr_qp->buff_size = HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), PAGE_SIZE) + - HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << + HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), PAGE_SIZE); hr_qp->sq.offset = 0; - hr_qp->rq.offset = HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << + hr_qp->rq.offset = HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), PAGE_SIZE); } else { page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); hr_qp->sge.sge_cnt = ex_sge_num ? max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num) : 0; - hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << + hr_qp->buff_size = HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), page_size) + - HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt << + HNS_ROCE_ALIGN_UP((hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift), page_size) + - HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << + HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), page_size); hr_qp->sq.offset = 0; if (ex_sge_num) { - hr_qp->sge.offset = HNS_ROCE_ALOGN_UP( + hr_qp->sge.offset = HNS_ROCE_ALIGN_UP( (hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), page_size); hr_qp->rq.offset = hr_qp->sge.offset + - HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt << + HNS_ROCE_ALIGN_UP((hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift), page_size); } else { - hr_qp->rq.offset = HNS_ROCE_ALOGN_UP( + hr_qp->rq.offset = HNS_ROCE_ALIGN_UP( (hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift), page_size); @@ -591,24 +593,24 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, /* Get buf size, SQ and RQ are aligned to PAGE_SIZE */ page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); hr_qp->sq.offset = 0; - size = HNS_ROCE_ALOGN_UP(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, + size = HNS_ROCE_ALIGN_UP(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, page_size); if (hr_dev->caps.max_sq_sg > 2 && hr_qp->sge.sge_cnt) { hr_qp->sge.sge_cnt = max(page_size/(1 << hr_qp->sge.sge_shift), (u32)hr_qp->sge.sge_cnt); hr_qp->sge.offset = size; - size += HNS_ROCE_ALOGN_UP(hr_qp->sge.sge_cnt << + size += HNS_ROCE_ALIGN_UP(hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift, page_size); } hr_qp->rq.offset = size; - size += HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), + size += HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), page_size); hr_qp->buff_size = size; /* Get wr and sge number which send */ - cap->max_send_wr = hr_qp->sq.max_post = hr_qp->sq.wqe_cnt; + cap->max_send_wr = hr_qp->sq.wqe_cnt; cap->max_send_sge = hr_qp->sq.max_gs; /* We don't support inline sends for kernel QPs (yet) */ @@ -743,7 +745,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, } hr_qp->umem = ib_umem_get(udata, ucmd.buf_addr, - hr_qp->buff_size, 0, 0); + hr_qp->buff_size, 0); if (IS_ERR(hr_qp->umem)) { dev_err(dev, "ib_umem_get error for create qp\n"); ret = PTR_ERR(hr_qp->umem); @@ -1017,7 +1019,6 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_sqp *hr_sqp; struct hns_roce_qp *hr_qp; int ret; @@ -1030,7 +1031,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, 0, hr_qp); if (ret) { - ibdev_err(ibdev, "Create RC QP 0x%06lx failed(%d)\n", + ibdev_err(ibdev, "Create QP 0x%06lx failed(%d)\n", hr_qp->qpn, ret); kfree(hr_qp); return ERR_PTR(ret); @@ -1047,11 +1048,10 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, return ERR_PTR(-EINVAL); } - hr_sqp = kzalloc(sizeof(*hr_sqp), GFP_KERNEL); - if (!hr_sqp) + hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL); + if (!hr_qp) return ERR_PTR(-ENOMEM); - hr_qp = &hr_sqp->hr_qp; hr_qp->port = init_attr->port_num - 1; hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port]; @@ -1066,7 +1066,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, hr_qp->ibqp.qp_num, hr_qp); if (ret) { ibdev_err(ibdev, "Create GSI QP failed!\n"); - kfree(hr_sqp); + kfree(hr_qp); return ERR_PTR(ret); } @@ -1289,7 +1289,7 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, u32 cur; cur = hr_wq->head - hr_wq->tail; - if (likely(cur + nreq < hr_wq->max_post)) + if (likely(cur + nreq < hr_wq->wqe_cnt)) return false; hr_cq = to_hr_cq(ib_cq); @@ -1297,7 +1297,7 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, cur = hr_wq->head - hr_wq->tail; spin_unlock(&hr_cq->lock); - return cur + nreq >= hr_wq->max_post; + return cur + nreq >= hr_wq->wqe_cnt; } int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index 0a31d0a3d657..06871731ac43 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -98,11 +98,15 @@ static int hns_roce_fill_res_cq_entry(struct sk_buff *msg, goto err; table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); - if (!table_attr) + if (!table_attr) { + ret = -EMSGSIZE; goto err; + } - if (hns_roce_fill_cq(msg, context)) + if (hns_roce_fill_cq(msg, context)) { + ret = -EMSGSIZE; goto err_cancel_table; + } nla_nest_end(msg, table_attr); kfree(context); @@ -113,7 +117,7 @@ err_cancel_table: nla_nest_cancel(msg, table_attr); err: kfree(context); - return -EMSGSIZE; + return ret; } int hns_roce_fill_res_entry(struct sk_buff *msg, diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 43ea2c13b212..7113ebfdb4f0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -59,21 +59,21 @@ static void hns_roce_ib_srq_event(struct hns_roce_srq *srq, } } -static int hns_roce_sw2hw_srq(struct hns_roce_dev *dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long srq_num) +static int hns_roce_hw_create_srq(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long srq_num) { return hns_roce_cmd_mbox(dev, mailbox->dma, 0, srq_num, 0, - HNS_ROCE_CMD_SW2HW_SRQ, + HNS_ROCE_CMD_CREATE_SRQ, HNS_ROCE_CMD_TIMEOUT_MSECS); } -static int hns_roce_hw2sw_srq(struct hns_roce_dev *dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long srq_num) +static int hns_roce_hw_destroy_srq(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long srq_num) { return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, srq_num, - mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_SRQ, + mailbox ? 0 : 1, HNS_ROCE_CMD_DESTROY_SRQ, HNS_ROCE_CMD_TIMEOUT_MSECS); } @@ -95,8 +95,7 @@ static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, srq->mtt.first_seg, &dma_handle_wqe); if (!mtts_wqe) { - dev_err(hr_dev->dev, - "SRQ alloc.Failed to find srq buf addr.\n"); + dev_err(hr_dev->dev, "Failed to find mtt for srq buf.\n"); return -EINVAL; } @@ -106,13 +105,14 @@ static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, &dma_handle_idx); if (!mtts_idx) { dev_err(hr_dev->dev, - "SRQ alloc.Failed to find idx que buf addr.\n"); + "Failed to find mtt for srq idx queue buf.\n"); return -EINVAL; } ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn); - if (ret == -1) { - dev_err(hr_dev->dev, "SRQ alloc.Failed to alloc index.\n"); + if (ret) { + dev_err(hr_dev->dev, + "Failed to alloc a bit from srq bitmap.\n"); return -ENOMEM; } @@ -134,7 +134,7 @@ static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, mtts_wqe, mtts_idx, dma_handle_wqe, dma_handle_idx); - ret = hns_roce_sw2hw_srq(hr_dev, mailbox, srq->srqn); + ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) goto err_xa; @@ -160,9 +160,9 @@ static void hns_roce_srq_free(struct hns_roce_dev *hr_dev, struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; int ret; - ret = hns_roce_hw2sw_srq(hr_dev, NULL, srq->srqn); + ret = hns_roce_hw_destroy_srq(hr_dev, NULL, srq->srqn); if (ret) - dev_err(hr_dev->dev, "HW2SW_SRQ failed (%d) for CQN %06lx\n", + dev_err(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n", ret, srq->srqn); xa_erase(&srq_table->xa, srq->srqn); @@ -180,22 +180,23 @@ static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, { struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device); struct hns_roce_ib_create_srq ucmd; - u32 page_shift; - u32 npages; + struct hns_roce_buf *buf; int ret; if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) return -EFAULT; - srq->umem = ib_umem_get(udata, ucmd.buf_addr, srq_buf_size, 0, 0); + srq->umem = ib_umem_get(udata, ucmd.buf_addr, srq_buf_size, 0); if (IS_ERR(srq->umem)) return PTR_ERR(srq->umem); - npages = (ib_umem_page_count(srq->umem) + - (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) / - (1 << hr_dev->caps.srqwqe_buf_pg_sz); - page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; - ret = hns_roce_mtt_init(hr_dev, npages, page_shift, &srq->mtt); + buf = &srq->buf; + buf->npages = (ib_umem_page_count(srq->umem) + + (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) / + (1 << hr_dev->caps.srqwqe_buf_pg_sz); + buf->page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; + ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift, + &srq->mtt); if (ret) goto err_user_buf; @@ -205,16 +206,19 @@ static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, /* config index queue BA */ srq->idx_que.umem = ib_umem_get(udata, ucmd.que_addr, - srq->idx_que.buf_size, 0, 0); + srq->idx_que.buf_size, 0); if (IS_ERR(srq->idx_que.umem)) { dev_err(hr_dev->dev, "ib_umem_get error for index queue\n"); ret = PTR_ERR(srq->idx_que.umem); goto err_user_srq_mtt; } - ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(srq->idx_que.umem), - PAGE_SHIFT, &srq->idx_que.mtt); - + buf = &srq->idx_que.idx_buf; + buf->npages = DIV_ROUND_UP(ib_umem_page_count(srq->idx_que.umem), + 1 << hr_dev->caps.idx_buf_pg_sz); + buf->page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; + ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift, + &srq->idx_que.mtt); if (ret) { dev_err(hr_dev->dev, "hns_roce_mtt_init error for idx que\n"); goto err_user_idx_mtt; @@ -251,7 +255,7 @@ static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq, struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct hns_roce_idx_que *idx_que = &srq->idx_que; - idx_que->bitmap = bitmap_zalloc(srq->max, GFP_KERNEL); + idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL); if (!idx_que->bitmap) return -ENOMEM; @@ -277,7 +281,7 @@ static int create_kernel_srq(struct hns_roce_srq *srq, int srq_buf_size) return -ENOMEM; srq->head = 0; - srq->tail = srq->max - 1; + srq->tail = srq->wqe_cnt - 1; ret = hns_roce_mtt_init(hr_dev, srq->buf.npages, srq->buf.page_shift, &srq->mtt); @@ -308,7 +312,7 @@ static int create_kernel_srq(struct hns_roce_srq *srq, int srq_buf_size) if (ret) goto err_kernel_idx_buf; - srq->wrid = kvmalloc_array(srq->max, sizeof(u64), GFP_KERNEL); + srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL); if (!srq->wrid) { ret = -ENOMEM; goto err_kernel_idx_buf; @@ -354,7 +358,7 @@ static void destroy_kernel_srq(struct hns_roce_dev *hr_dev, } int hns_roce_create_srq(struct ib_srq *ib_srq, - struct ib_srq_init_attr *srq_init_attr, + struct ib_srq_init_attr *init_attr, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device); @@ -366,24 +370,24 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, u32 cqn; /* Check the actual SRQ wqe and SRQ sge num */ - if (srq_init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || - srq_init_attr->attr.max_sge > hr_dev->caps.max_srq_sges) + if (init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || + init_attr->attr.max_sge > hr_dev->caps.max_srq_sges) return -EINVAL; mutex_init(&srq->mutex); spin_lock_init(&srq->lock); - srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1); - srq->max_gs = srq_init_attr->attr.max_sge; + srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1); + srq->max_gs = init_attr->attr.max_sge; srq_desc_size = roundup_pow_of_two(max(16, 16 * srq->max_gs)); srq->wqe_shift = ilog2(srq_desc_size); - srq_buf_size = srq->max * srq_desc_size; + srq_buf_size = srq->wqe_cnt * srq_desc_size; srq->idx_que.entry_sz = HNS_ROCE_IDX_QUE_ENTRY_SZ; - srq->idx_que.buf_size = srq->max * srq->idx_que.entry_sz; + srq->idx_que.buf_size = srq->wqe_cnt * srq->idx_que.entry_sz; srq->mtt.mtt_type = MTT_TYPE_SRQWQE; srq->idx_que.mtt.mtt_type = MTT_TYPE_IDX; @@ -401,8 +405,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, } } - cqn = ib_srq_has_cq(srq_init_attr->srq_type) ? - to_hr_cq(srq_init_attr->ext.cq)->cqn : 0; + cqn = ib_srq_has_cq(init_attr->srq_type) ? + to_hr_cq(init_attr->ext.cq)->cqn : 0; srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG; @@ -449,7 +453,7 @@ void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); } else { kvfree(srq->wrid); - hns_roce_buf_free(hr_dev, srq->max << srq->wqe_shift, + hns_roce_buf_free(hr_dev, srq->wqe_cnt << srq->wqe_shift, &srq->buf); } ib_umem_release(srq->idx_que.umem); diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 2d6a378e8560..bb78d3280acc 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -2079,9 +2079,9 @@ static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev, dst = i40iw_get_dst_ipv6(&src_addr, &dst_addr); if (!dst || dst->error) { if (dst) { - dst_release(dst); i40iw_pr_err("ip6_route_output returned dst->error = %d\n", dst->error); + dst_release(dst); } return rc; } diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index cd9ee1664a69..86375947bc67 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -1763,7 +1763,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, if (length > I40IW_MAX_MR_SIZE) return ERR_PTR(-EINVAL); - region = ib_umem_get(udata, start, length, acc, 0); + region = ib_umem_get(udata, start, length, acc); if (IS_ERR(region)) return (struct ib_mr *)region; diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index a7d238d312f0..306b21281fa2 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -145,7 +145,7 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_udata *udata, int n; *umem = ib_umem_get(udata, buf_addr, cqe * cqe_size, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(*umem)) return PTR_ERR(*umem); diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c index 0f390351cef0..714f9df5bf39 100644 --- a/drivers/infiniband/hw/mlx4/doorbell.c +++ b/drivers/infiniband/hw/mlx4/doorbell.c @@ -64,7 +64,7 @@ int mlx4_ib_db_map_user(struct ib_udata *udata, unsigned long virt, page->user_virt = (virt & PAGE_MASK); page->refcnt = 0; - page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0); + page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0); if (IS_ERR(page->umem)) { err = PTR_ERR(page->umem); kfree(page); diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 57079110af9b..abe68708d6d6 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -966,7 +966,6 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, } mutex_unlock(&dev->counters_table[port_num - 1].mutex); if (stats_avail) { - memset(out_mad->data, 0, sizeof out_mad->data); switch (counter_stats.counter_mode & 0xf) { case 0: edit_counter(&counter_stats, @@ -984,38 +983,31 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index) { struct mlx4_ib_dev *dev = to_mdev(ibdev); - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; enum rdma_link_layer link = rdma_port_get_link_layer(ibdev, port_num); - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad))) - return IB_MAD_RESULT_FAILURE; - /* iboe_process_mad() which uses the HCA flow-counters to implement IB PMA * queries, should be called only by VFs and for that specific purpose */ if (link == IB_LINK_LAYER_INFINIBAND) { if (mlx4_is_slave(dev->dev) && - (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && - (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS || - in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT || - in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO))) - return iboe_process_mad(ibdev, mad_flags, port_num, in_wc, - in_grh, in_mad, out_mad); + (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && + (in->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS || + in->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT || + in->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO))) + return iboe_process_mad(ibdev, mad_flags, port_num, + in_wc, in_grh, in, out); - return ib_process_mad(ibdev, mad_flags, port_num, in_wc, - in_grh, in_mad, out_mad); + return ib_process_mad(ibdev, mad_flags, port_num, in_wc, in_grh, + in, out); } if (link == IB_LINK_LAYER_ETHERNET) return iboe_process_mad(ibdev, mad_flags, port_num, in_wc, - in_grh, in_mad, out_mad); + in_grh, in, out); return -EINVAL; } diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 8d2f1e38b891..0b5dc1d5928f 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -256,6 +256,8 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) int hw_update = 0; int i; struct gid_entry *gids = NULL; + u16 vlan_id = 0xffff; + u8 mac[ETH_ALEN]; if (!rdma_cap_roce_gid_table(attr->device, attr->port_num)) return -EINVAL; @@ -266,12 +268,16 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) if (!context) return -EINVAL; + ret = rdma_read_gid_l2_fields(attr, &vlan_id, &mac[0]); + if (ret) + return ret; port_gid_table = &iboe->gids[attr->port_num - 1]; spin_lock_bh(&iboe->lock); for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) { if (!memcmp(&port_gid_table->gids[i].gid, &attr->gid, sizeof(attr->gid)) && - port_gid_table->gids[i].gid_type == attr->gid_type) { + port_gid_table->gids[i].gid_type == attr->gid_type && + port_gid_table->gids[i].vlan_id == vlan_id) { found = i; break; } @@ -291,6 +297,7 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) memcpy(&port_gid_table->gids[free].gid, &attr->gid, sizeof(attr->gid)); port_gid_table->gids[free].gid_type = attr->gid_type; + port_gid_table->gids[free].vlan_id = vlan_id; port_gid_table->gids[free].ctx->real_index = free; port_gid_table->gids[free].ctx->refcount = 1; hw_update = 1; @@ -1146,7 +1153,8 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) return rdma_user_mmap_io(context, vma, to_mucontext(context)->uar.pfn, PAGE_SIZE, - pgprot_noncached(vma->vm_page_prot)); + pgprot_noncached(vma->vm_page_prot), + NULL); case 1: if (dev->dev->caps.bf_reg_size == 0) @@ -1155,7 +1163,8 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) context, vma, to_mucontext(context)->uar.pfn + dev->dev->caps.num_uars, - PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot)); + PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot), + NULL); case 3: { struct mlx4_clock_params params; @@ -1171,7 +1180,8 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) params.bar) + params.offset) >> PAGE_SHIFT, - PAGE_SIZE, pgprot_noncached(vma->vm_page_prot)); + PAGE_SIZE, pgprot_noncached(vma->vm_page_prot), + NULL); } default: diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index eb53bb4c0c91..d188573187fa 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -508,6 +508,7 @@ struct gid_entry { union ib_gid gid; enum ib_gid_type gid_type; struct gid_cache_context *ctx; + u16 vlan_id; }; struct mlx4_port_gid_table { @@ -786,11 +787,10 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const void *in_mad, void *response_mad); -int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, +int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index); + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index); int mlx4_ib_mad_init(struct mlx4_ib_dev *dev); void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 6ae503cfc526..dfa17bcdcdbc 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -398,7 +398,7 @@ static struct ib_umem *mlx4_get_umem_mr(struct ib_udata *udata, u64 start, up_read(¤t->mm->mmap_sem); } - return ib_umem_get(udata, start, length, access_flags, 0); + return ib_umem_get(udata, start, length, access_flags); } struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index bd4aa04416c6..85f57b76e446 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -916,7 +916,7 @@ static int create_rq(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + (qp->sq.wqe_cnt << qp->sq.wqe_shift); - qp->umem = ib_umem_get(udata, wq.buf_addr, qp->buf_size, 0, 0); + qp->umem = ib_umem_get(udata, wq.buf_addr, qp->buf_size, 0); if (IS_ERR(qp->umem)) { err = PTR_ERR(qp->umem); goto err; @@ -1110,8 +1110,7 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, if (err) goto err; - qp->umem = - ib_umem_get(udata, ucmd.buf_addr, qp->buf_size, 0, 0); + qp->umem = ib_umem_get(udata, ucmd.buf_addr, qp->buf_size, 0); if (IS_ERR(qp->umem)) { err = PTR_ERR(qp->umem); goto err; diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 848db7264cc9..8dcf6e3d9ae2 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -110,7 +110,7 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq, if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) return -EFAULT; - srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0, 0); + srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0); if (IS_ERR(srq->umem)) return PTR_ERR(srq->umem); diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index 9924be8384d8..d0a043ccbe58 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq_cmd.o \ srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o \ - cong.o + cong.o restrack.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 45f48cde6b9d..dd8d24ee8e1d 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -423,9 +423,6 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq, struct mlx5_cqe64 *cqe64; struct mlx5_core_qp *mqp; struct mlx5_ib_wq *wq; - struct mlx5_sig_err_cqe *sig_err_cqe; - struct mlx5_core_mkey *mmkey; - struct mlx5_ib_mr *mr; uint8_t opcode; uint32_t qpn; u16 wqe_ctr; @@ -519,27 +516,29 @@ repoll: } } break; - case MLX5_CQE_SIG_ERR: - sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64; + case MLX5_CQE_SIG_ERR: { + struct mlx5_sig_err_cqe *sig_err_cqe = + (struct mlx5_sig_err_cqe *)cqe64; + struct mlx5_core_sig_ctx *sig; - xa_lock(&dev->mdev->priv.mkey_table); - mmkey = xa_load(&dev->mdev->priv.mkey_table, + xa_lock(&dev->sig_mrs); + sig = xa_load(&dev->sig_mrs, mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); - mr = to_mibmr(mmkey); - get_sig_err_item(sig_err_cqe, &mr->sig->err_item); - mr->sig->sig_err_exists = true; - mr->sig->sigerr_count++; + get_sig_err_item(sig_err_cqe, &sig->err_item); + sig->sig_err_exists = true; + sig->sigerr_count++; mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n", - cq->mcq.cqn, mr->sig->err_item.key, - mr->sig->err_item.err_type, - mr->sig->err_item.sig_err_offset, - mr->sig->err_item.expected, - mr->sig->err_item.actual); + cq->mcq.cqn, sig->err_item.key, + sig->err_item.err_type, + sig->err_item.sig_err_offset, + sig->err_item.expected, + sig->err_item.actual); - xa_unlock(&dev->mdev->priv.mkey_table); + xa_unlock(&dev->sig_mrs); goto repoll; } + } return 0; } @@ -710,7 +709,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, cq->buf.umem = ib_umem_get(udata, ucmd.buf_addr, entries * ucmd.cqe_size, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(cq->buf.umem)) { err = PTR_ERR(cq->buf.umem); return err; @@ -1111,7 +1110,7 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, umem = ib_umem_get(udata, ucmd.buf_addr, (size_t)ucmd.cqe_size * entries, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(umem)) { err = PTR_ERR(umem); return err; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index d609f4659afb..9d0a18cf9e5e 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -100,6 +100,7 @@ struct devx_obj { struct mlx5_ib_devx_mr devx_mr; struct mlx5_core_dct core_dct; struct mlx5_core_cq core_cq; + u32 flow_counter_bulk_size; }; struct list_head event_sub; /* holds devx_event_subscription entries */ }; @@ -192,15 +193,20 @@ bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type) } } -bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id) +bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id) { struct devx_obj *devx_obj = obj; u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) { + + if (offset && offset >= devx_obj->flow_counter_bulk_size) + return false; + *counter_id = MLX5_GET(dealloc_flow_counter_in, devx_obj->dinbox, flow_counter_id); + *counter_id += offset; return true; } @@ -1265,8 +1271,8 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj, mkey->pd = MLX5_GET(mkc, mkc, pd); devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size); - return xa_err(xa_store(&dev->mdev->priv.mkey_table, - mlx5_base_mkey(mkey->key), mkey, GFP_KERNEL)); + return xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mkey->key), mkey, + GFP_KERNEL)); } static int devx_handle_mkey_create(struct mlx5_ib_dev *dev, @@ -1345,9 +1351,9 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, * the mmkey, we must wait for that to stop before freeing the * mkey, as another allocation could get the same mkey #. */ - xa_erase(&obj->ib_dev->mdev->priv.mkey_table, + xa_erase(&obj->ib_dev->odp_mkeys, mlx5_base_mkey(obj->devx_mr.mmkey.key)); - synchronize_srcu(&dev->mr_srcu); + synchronize_srcu(&dev->odp_srcu); } if (obj->flags & DEVX_OBJ_FLAGS_DCT) @@ -1463,6 +1469,13 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( if (err) goto obj_free; + if (opcode == MLX5_CMD_OP_ALLOC_FLOW_COUNTER) { + u8 bulk = MLX5_GET(alloc_flow_counter_in, + cmd_in, + flow_counter_bulk); + obj->flow_counter_bulk_size = 128UL * bulk; + } + uobj->object = obj; INIT_LIST_HEAD(&obj->event_sub); obj->ib_dev = dev; @@ -2121,7 +2134,7 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, if (err) return err; - obj->umem = ib_umem_get(&attrs->driver_udata, addr, size, access, 0); + obj->umem = ib_umem_get(&attrs->driver_udata, addr, size, access); if (IS_ERR(obj->umem)) return PTR_ERR(obj->umem); diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c index 8f4e5f22b84c..12737c509aa2 100644 --- a/drivers/infiniband/hw/mlx5/doorbell.c +++ b/drivers/infiniband/hw/mlx5/doorbell.c @@ -64,7 +64,7 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, page->user_virt = (virt & PAGE_MASK); page->refcnt = 0; - page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0); + page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0); if (IS_ERR(page->umem)) { err = PTR_ERR(page->umem); kfree(page); diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index b198ff10cde9..dbee17d22d50 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -85,6 +85,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata); int len, ret, i; u32 counter_id = 0; + u32 *offset_attr; + u32 offset = 0; if (!capable(CAP_NET_RAW)) return -EPERM; @@ -151,8 +153,27 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( if (len) { devx_obj = arr_flow_actions[0]->object; - if (!mlx5_ib_devx_is_flow_counter(devx_obj, &counter_id)) + if (uverbs_attr_is_valid(attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) { + + int num_offsets = uverbs_attr_ptr_get_array_size( + attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, + sizeof(u32)); + + if (num_offsets != 1) + return -EINVAL; + + offset_attr = uverbs_attr_get_alloced_ptr( + attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET); + offset = *offset_attr; + } + + if (!mlx5_ib_devx_is_flow_counter(devx_obj, offset, + &counter_id)) return -EINVAL; + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; } @@ -598,7 +619,11 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, MLX5_IB_OBJECT_DEVX_OBJ, UVERBS_ACCESS_READ, 1, 1, - UA_OPTIONAL)); + UA_OPTIONAL), + UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET, + UVERBS_ATTR_MIN_SIZE(sizeof(u32)), + UA_OPTIONAL, + UA_ALLOC_AND_COPY)); DECLARE_UVERBS_NAMED_METHOD_DESTROY( MLX5_IB_METHOD_DESTROY_FLOW, diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 4950df3f71b6..ac4d8d1b9a07 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -263,7 +263,7 @@ static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi) }, .sq_sig_type = gsi->sq_sig_type, .qp_type = IB_QPT_UD, - .create_flags = mlx5_ib_create_qp_sqpn_qp1(), + .create_flags = MLX5_IB_QP_CREATE_SQPN_QP1, }; return ib_create_qp(pd, &init_attr); diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c index 649a3364f838..4f0edd4832bd 100644 --- a/drivers/infiniband/hw/mlx5/ib_virt.c +++ b/drivers/infiniband/hw/mlx5/ib_virt.c @@ -201,3 +201,27 @@ int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port, return -EINVAL; } + +int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u8 port, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid) +{ + struct mlx5_ib_dev *dev = to_mdev(device); + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_hca_vport_context *rep; + int err; + + rep = kzalloc(sizeof(*rep), GFP_KERNEL); + if (!rep) + return -ENOMEM; + + err = mlx5_query_hca_vport_context(mdev, 1, 1, vf+1, rep); + if (err) + goto ex; + + port_guid->guid = rep->port_guid; + node_guid->guid = rep->node_guid; +ex: + kfree(rep); + return err; +} diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 348c1df69cdc..14e0c17de6a9 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -74,58 +74,6 @@ static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, port); } -static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad *in_mad, struct ib_mad *out_mad) -{ - u16 slid; - int err; - - slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); - - if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) - return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; - - if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || - in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_SET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS) - return IB_MAD_RESULT_SUCCESS; - - /* Don't process SMInfo queries -- the SMA can't handle them. - */ - if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO) - return IB_MAD_RESULT_SUCCESS; - } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || - in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS1 || - in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS2 || - in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) { - if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_SET) - return IB_MAD_RESULT_SUCCESS; - } else { - return IB_MAD_RESULT_SUCCESS; - } - - err = mlx5_MAD_IFC(to_mdev(ibdev), - mad_flags & IB_MAD_IGNORE_MKEY, - mad_flags & IB_MAD_IGNORE_BKEY, - port_num, in_wc, in_grh, in_mad, out_mad); - if (err) - return IB_MAD_RESULT_FAILURE; - - /* set return bit in status of directed route responses */ - if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - out_mad->mad_hdr.status |= cpu_to_be16(1 << 15); - - if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) - /* no response for trap repress */ - return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; - - return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; -} - static void pma_cnt_ext_assign(struct ib_pma_portcounters_ext *pma_cnt_ext, void *out) { @@ -271,30 +219,66 @@ done: int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; - int ret; + u8 mgmt_class = in->mad_hdr.mgmt_class; + u8 method = in->mad_hdr.method; + u16 slid; + int err; - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad))) - return IB_MAD_RESULT_FAILURE; + slid = in_wc ? ib_lid_cpu16(in_wc->slid) : + be16_to_cpu(IB_LID_PERMISSIVE); - memset(out_mad->data, 0, sizeof(out_mad->data)); + if (method == IB_MGMT_METHOD_TRAP && !slid) + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; - if (MLX5_CAP_GEN(dev->mdev, vport_counters) && - in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT && - in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) { - ret = process_pma_cmd(dev, port_num, in_mad, out_mad); - } else { - ret = process_mad(ibdev, mad_flags, port_num, in_wc, in_grh, - in_mad, out_mad); + switch (mgmt_class) { + case IB_MGMT_CLASS_SUBN_LID_ROUTED: + case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: { + if (method != IB_MGMT_METHOD_GET && + method != IB_MGMT_METHOD_SET && + method != IB_MGMT_METHOD_TRAP_REPRESS) + return IB_MAD_RESULT_SUCCESS; + + /* Don't process SMInfo queries -- the SMA can't handle them. + */ + if (in->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO) + return IB_MAD_RESULT_SUCCESS; + } break; + case IB_MGMT_CLASS_PERF_MGMT: + if (MLX5_CAP_GEN(dev->mdev, vport_counters) && + method == IB_MGMT_METHOD_GET) + return process_pma_cmd(dev, port_num, in, out); + /* fallthrough */ + case MLX5_IB_VENDOR_CLASS1: + /* fallthrough */ + case MLX5_IB_VENDOR_CLASS2: + case IB_MGMT_CLASS_CONG_MGMT: { + if (method != IB_MGMT_METHOD_GET && + method != IB_MGMT_METHOD_SET) + return IB_MAD_RESULT_SUCCESS; + } break; + default: + return IB_MAD_RESULT_SUCCESS; } - return ret; + + err = mlx5_MAD_IFC(to_mdev(ibdev), mad_flags & IB_MAD_IGNORE_MKEY, + mad_flags & IB_MAD_IGNORE_BKEY, port_num, in_wc, + in_grh, in, out); + if (err) + return IB_MAD_RESULT_FAILURE; + + /* set return bit in status of directed route responses */ + if (mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + out->mad_hdr.status |= cpu_to_be16(1 << 15); + + if (method == IB_MGMT_METHOD_TRAP_REPRESS) + /* no response for trap repress */ + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; } int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 46ea4f0b9b51..97b26e9a5234 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -67,6 +67,7 @@ #include <rdma/uverbs_std_types.h> #include <rdma/mlx5_user_ioctl_verbs.h> #include <rdma/mlx5_user_ioctl_cmds.h> +#include <rdma/ib_umem_odp.h> #define UVERBS_MODULE_NAME mlx5_ib #include <rdma/uverbs_named_ioctl.h> @@ -693,21 +694,6 @@ static void get_atomic_caps_qp(struct mlx5_ib_dev *dev, get_atomic_caps(dev, atomic_size_qp, props); } -static void get_atomic_caps_dc(struct mlx5_ib_dev *dev, - struct ib_device_attr *props) -{ - u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc); - - get_atomic_caps(dev, atomic_size_qp, props); -} - -bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev) -{ - struct ib_device_attr props = {}; - - get_atomic_caps_dc(dev, &props); - return (props.atomic_cap == IB_ATOMIC_HCA) ? true : false; -} static int mlx5_query_system_image_guid(struct ib_device *ibdev, __be64 *sys_image_guid) { @@ -844,8 +830,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length); if (uhw->outlen && uhw->outlen < resp_len) return -EINVAL; - else - resp.response_length = resp_len; + + resp.response_length = resp_len; if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen)) return -EINVAL; @@ -1011,6 +997,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size); props->max_pi_fast_reg_page_list_len = props->max_fast_reg_page_list_len / 2; + props->max_sgl_rd = + MLX5_CAP_GEN(mdev, max_sgl_for_optimized_performance); get_atomic_caps_qp(dev, props); props->masked_atomic_cap = IB_ATOMIC_NONE; props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); @@ -1161,8 +1149,14 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES; resp.striding_rq_caps.max_single_stride_log_num_of_bytes = MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES; - resp.striding_rq_caps.min_single_wqe_log_num_of_strides = - MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES; + if (MLX5_CAP_GEN(dev->mdev, ext_stride_num_range)) + resp.striding_rq_caps + .min_single_wqe_log_num_of_strides = + MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES; + else + resp.striding_rq_caps + .min_single_wqe_log_num_of_strides = + MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES; resp.striding_rq_caps.max_single_wqe_log_num_of_strides = MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES; resp.striding_rq_caps.supported_qpts = @@ -1808,7 +1802,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, return -EINVAL; resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); - if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf)) + if (dev->wc_support) resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); resp.cache_line_size = cache_line_size(); resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); @@ -2168,7 +2162,7 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn); err = rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE, - prot); + prot, NULL); if (err) { mlx5_ib_err(dev, "rdma_user_mmap_io failed with error=%d, mmap_cmd=%s\n", @@ -2210,7 +2204,8 @@ static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) PAGE_SHIFT) + page_idx; return rdma_user_mmap_io(context, vma, pfn, map_size, - pgprot_writecombine(vma->vm_page_prot)); + pgprot_writecombine(vma->vm_page_prot), + NULL); } static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) @@ -2248,7 +2243,8 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm PAGE_SHIFT; return rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE, - pgprot_noncached(vma->vm_page_prot)); + pgprot_noncached(vma->vm_page_prot), + NULL); case MLX5_IB_MMAP_CLOCK_INFO: return mlx5_ib_mmap_clock_info_page(dev, vma, context); @@ -6140,11 +6136,10 @@ static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device, static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - srcu_barrier(&dev->mr_srcu); - cleanup_srcu_struct(&dev->mr_srcu); - } + WARN_ON(!xa_empty(&dev->odp_mkeys)); + cleanup_srcu_struct(&dev->odp_srcu); + WARN_ON(!xa_empty(&dev->sig_mrs)); WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES)); } @@ -6196,15 +6191,15 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) mutex_init(&dev->cap_mask_mutex); INIT_LIST_HEAD(&dev->qp_list); spin_lock_init(&dev->reset_flow_resource_lock); + xa_init(&dev->odp_mkeys); + xa_init(&dev->sig_mrs); spin_lock_init(&dev->dm.lock); dev->dm.dev = mdev; - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - err = init_srcu_struct(&dev->mr_srcu); - if (err) - goto err_mp; - } + err = init_srcu_struct(&dev->odp_srcu); + if (err) + goto err_mp; return 0; @@ -6264,6 +6259,9 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .disassociate_ucontext = mlx5_ib_disassociate_ucontext, .drain_rq = mlx5_ib_drain_rq, .drain_sq = mlx5_ib_drain_sq, + .enable_driver = mlx5_ib_enable_driver, + .fill_res_entry = mlx5_ib_fill_res_entry, + .fill_stat_entry = mlx5_ib_fill_stat_entry, .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = mlx5_ib_get_dma_mr, .get_link_layer = mlx5_ib_port_link_layer, @@ -6310,6 +6308,7 @@ static const struct ib_device_ops mlx5_ib_dev_ipoib_enhanced_ops = { static const struct ib_device_ops mlx5_ib_dev_sriov_ops = { .get_vf_config = mlx5_ib_get_vf_config, + .get_vf_guid = mlx5_ib_get_vf_guid, .get_vf_stats = mlx5_ib_get_vf_stats, .set_vf_guid = mlx5_ib_set_vf_guid, .set_vf_link_state = mlx5_ib_set_vf_link_state, @@ -6705,6 +6704,18 @@ static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev) } } +int mlx5_ib_enable_driver(struct ib_device *dev) +{ + struct mlx5_ib_dev *mdev = to_mdev(dev); + int ret; + + ret = mlx5_ib_test_wc(mdev); + mlx5_ib_dbg(mdev, "Write-Combining %s", + mdev->wc_support ? "supported" : "not supported"); + + return ret; +} + void __mlx5_ib_remove(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile, int stage) diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index b5aece786b36..048f4e974a61 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -34,6 +34,7 @@ #include <rdma/ib_umem.h> #include <rdma/ib_umem_odp.h> #include "mlx5_ib.h" +#include <linux/jiffies.h> /* @umem: umem object to scan * @addr: ib virtual address requested by the user @@ -216,3 +217,201 @@ int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) *offset = buf_off >> ilog2(off_size); return 0; } + +#define WR_ID_BF 0xBF +#define WR_ID_END 0xBAD +#define TEST_WC_NUM_WQES 255 +#define TEST_WC_POLLING_MAX_TIME_JIFFIES msecs_to_jiffies(100) +static int post_send_nop(struct mlx5_ib_dev *dev, struct ib_qp *ibqp, u64 wr_id, + bool signaled) +{ + struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_wqe_ctrl_seg *ctrl; + struct mlx5_bf *bf = &qp->bf; + __be32 mmio_wqe[16] = {}; + unsigned long flags; + unsigned int idx; + int i; + + if (unlikely(dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)) + return -EIO; + + spin_lock_irqsave(&qp->sq.lock, flags); + + idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); + ctrl = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx); + + memset(ctrl, 0, sizeof(struct mlx5_wqe_ctrl_seg)); + ctrl->fm_ce_se = signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0; + ctrl->opmod_idx_opcode = + cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | MLX5_OPCODE_NOP); + ctrl->qpn_ds = cpu_to_be32((sizeof(struct mlx5_wqe_ctrl_seg) / 16) | + (qp->trans_qp.base.mqp.qpn << 8)); + + qp->sq.wrid[idx] = wr_id; + qp->sq.w_list[idx].opcode = MLX5_OPCODE_NOP; + qp->sq.wqe_head[idx] = qp->sq.head + 1; + qp->sq.cur_post += DIV_ROUND_UP(sizeof(struct mlx5_wqe_ctrl_seg), + MLX5_SEND_WQE_BB); + qp->sq.w_list[idx].next = qp->sq.cur_post; + qp->sq.head++; + + memcpy(mmio_wqe, ctrl, sizeof(*ctrl)); + ((struct mlx5_wqe_ctrl_seg *)&mmio_wqe)->fm_ce_se |= + MLX5_WQE_CTRL_CQ_UPDATE; + + /* Make sure that descriptors are written before + * updating doorbell record and ringing the doorbell + */ + wmb(); + + qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); + + /* Make sure doorbell record is visible to the HCA before + * we hit doorbell + */ + wmb(); + for (i = 0; i < 8; i++) + mlx5_write64(&mmio_wqe[i * 2], + bf->bfreg->map + bf->offset + i * 8); + + bf->offset ^= bf->buf_size; + + spin_unlock_irqrestore(&qp->sq.lock, flags); + + return 0; +} + +static int test_wc_poll_cq_result(struct mlx5_ib_dev *dev, struct ib_cq *cq) +{ + int ret; + struct ib_wc wc = {}; + unsigned long end = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES; + + do { + ret = ib_poll_cq(cq, 1, &wc); + if (ret < 0 || wc.status) + return ret < 0 ? ret : -EINVAL; + if (ret) + break; + } while (!time_after(jiffies, end)); + + if (!ret) + return -ETIMEDOUT; + + if (wc.wr_id != WR_ID_BF) + ret = 0; + + return ret; +} + +static int test_wc_do_send(struct mlx5_ib_dev *dev, struct ib_qp *qp) +{ + int err, i; + + for (i = 0; i < TEST_WC_NUM_WQES; i++) { + err = post_send_nop(dev, qp, WR_ID_BF, false); + if (err) + return err; + } + + return post_send_nop(dev, qp, WR_ID_END, true); +} + +int mlx5_ib_test_wc(struct mlx5_ib_dev *dev) +{ + struct ib_cq_init_attr cq_attr = { .cqe = TEST_WC_NUM_WQES + 1 }; + int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type); + struct ib_qp_init_attr qp_init_attr = { + .cap = { .max_send_wr = TEST_WC_NUM_WQES }, + .qp_type = IB_QPT_UD, + .sq_sig_type = IB_SIGNAL_REQ_WR, + .create_flags = MLX5_IB_QP_CREATE_WC_TEST, + }; + struct ib_qp_attr qp_attr = { .port_num = 1 }; + struct ib_device *ibdev = &dev->ib_dev; + struct ib_qp *qp; + struct ib_cq *cq; + struct ib_pd *pd; + int ret; + + if (!MLX5_CAP_GEN(dev->mdev, bf)) + return 0; + + if (!dev->mdev->roce.roce_en && + port_type_cap == MLX5_CAP_PORT_TYPE_ETH) { + if (mlx5_core_is_pf(dev->mdev)) + dev->wc_support = true; + return 0; + } + + ret = mlx5_alloc_bfreg(dev->mdev, &dev->wc_bfreg, true, false); + if (ret) + goto print_err; + + if (!dev->wc_bfreg.wc) + goto out1; + + pd = ib_alloc_pd(ibdev, 0); + if (IS_ERR(pd)) { + ret = PTR_ERR(pd); + goto out1; + } + + cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr); + if (IS_ERR(cq)) { + ret = PTR_ERR(cq); + goto out2; + } + + qp_init_attr.recv_cq = cq; + qp_init_attr.send_cq = cq; + qp = ib_create_qp(pd, &qp_init_attr); + if (IS_ERR(qp)) { + ret = PTR_ERR(qp); + goto out3; + } + + qp_attr.qp_state = IB_QPS_INIT; + ret = ib_modify_qp(qp, &qp_attr, + IB_QP_STATE | IB_QP_PORT | IB_QP_PKEY_INDEX | + IB_QP_QKEY); + if (ret) + goto out4; + + qp_attr.qp_state = IB_QPS_RTR; + ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); + if (ret) + goto out4; + + qp_attr.qp_state = IB_QPS_RTS; + ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); + if (ret) + goto out4; + + ret = test_wc_do_send(dev, qp); + if (ret < 0) + goto out4; + + ret = test_wc_poll_cq_result(dev, cq); + if (ret > 0) { + dev->wc_support = true; + ret = 0; + } + +out4: + ib_destroy_qp(qp); +out3: + ib_destroy_cq(cq); +out2: + ib_dealloc_pd(pd); +out1: + mlx5_free_bfreg(dev->mdev, &dev->wc_bfreg); +print_err: + if (ret) + mlx5_ib_err( + dev, + "Error %d while trying to test write-combining support\n", + ret); + return ret; +} diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 1a98ee2e01c4..b983e385a8c5 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -247,12 +247,8 @@ struct mlx5_ib_flow_db { * These flags are intended for internal use by the mlx5_ib driver, and they * rely on the range reserved for that use in the ib_qp_create_flags enum. */ - -/* Create a UD QP whose source QP number is 1 */ -static inline enum ib_qp_create_flags mlx5_ib_create_qp_sqpn_qp1(void) -{ - return IB_QP_CREATE_RESERVED_START; -} +#define MLX5_IB_QP_CREATE_SQPN_QP1 IB_QP_CREATE_RESERVED_START +#define MLX5_IB_QP_CREATE_WC_TEST (IB_QP_CREATE_RESERVED_START << 1) struct wr_list { u16 opcode; @@ -295,6 +291,7 @@ enum mlx5_ib_wq_flags { #define MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES 16 #define MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES 6 #define MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES 13 +#define MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES 3 struct mlx5_ib_rwq { struct ib_wq ibwq; @@ -585,6 +582,9 @@ struct mlx5_ib_dm { IB_ACCESS_REMOTE_READ |\ IB_ZERO_BASED) +#define mlx5_update_odp_stats(mr, counter_name, value) \ + atomic64_add(value, &((mr)->odp_stats.counter_name)) + struct mlx5_ib_mr { struct ib_mr ibmr; void *descs; @@ -606,7 +606,6 @@ struct mlx5_ib_mr { struct mlx5_ib_dev *dev; u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; struct mlx5_core_sig_ctx *sig; - unsigned int live; void *descs_alloc; int access_flags; /* Needed for rereg MR */ @@ -618,10 +617,18 @@ struct mlx5_ib_mr { u64 data_iova; u64 pi_iova; - atomic_t num_leaf_free; - wait_queue_head_t q_leaf_free; + /* For ODP and implicit */ + atomic_t num_deferred_work; + struct xarray implicit_children; + union { + struct rcu_head rcu; + struct list_head elm; + struct work_struct work; + } odp_destroy; + struct ib_odp_counters odp_stats; + bool is_odp_implicit; + struct mlx5_async_work cb_work; - atomic_t num_pending_prefetch; }; static inline bool is_odp_mr(struct mlx5_ib_mr *mr) @@ -957,7 +964,11 @@ struct mlx5_ib_dev { /* serialize update of capability mask */ struct mutex cap_mask_mutex; - bool ib_active; + u8 ib_active:1; + u8 fill_delay:1; + u8 is_rep:1; + u8 lag_active:1; + u8 wc_support:1; struct umr_common umrc; /* sync used page count stats */ @@ -966,7 +977,6 @@ struct mlx5_ib_dev { struct timer_list delay_timer; /* Prevents soft lock on massive reg MRs */ struct mutex slow_path_mutex; - int fill_delay; struct ib_odp_caps odp_caps; u64 odp_max_size; struct mlx5_ib_pf_eq odp_pf_eq; @@ -975,7 +985,9 @@ struct mlx5_ib_dev { * Sleepable RCU that prevents destruction of MRs while they are still * being used by a page fault handler. */ - struct srcu_struct mr_srcu; + struct srcu_struct odp_srcu; + struct xarray odp_mkeys; + u32 null_mkey; struct mlx5_ib_flow_db *flow_db; /* protect resources needed as part of reset flow */ @@ -984,11 +996,10 @@ struct mlx5_ib_dev { /* Array with num_ports elements */ struct mlx5_ib_port *port; struct mlx5_sq_bfreg bfreg; + struct mlx5_sq_bfreg wc_bfreg; struct mlx5_sq_bfreg fp_bfreg; struct mlx5_ib_delay_drop delay_drop; const struct mlx5_ib_profile *profile; - bool is_rep; - int lag_active; struct mlx5_ib_lb_state lb; u8 umr_fence; @@ -999,6 +1010,8 @@ struct mlx5_ib_dev { struct mlx5_srq_table srq_table; struct mlx5_async_ctx async_ctx; struct mlx5_devx_event_table devx_event_table; + + struct xarray sig_mrs; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -1162,6 +1175,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, struct ib_udata *udata, int access_flags); void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); +void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr); int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_pd *pd, struct ib_udata *udata); @@ -1179,9 +1193,8 @@ int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, unsigned int *meta_sg_offset); int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index); + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index); struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, struct ib_udata *udata); int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); @@ -1223,6 +1236,8 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry); void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); +int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr); + int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status); struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, @@ -1235,7 +1250,6 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, struct ib_rwq_ind_table_init_attr *init_attr, struct ib_udata *udata); int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); -bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev); struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_dm_alloc_attr *attr, @@ -1300,6 +1314,9 @@ int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, int state); int mlx5_ib_get_vf_stats(struct ib_device *device, int vf, u8 port, struct ifla_vf_stats *stats); +int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u8 port, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid); int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, int type); @@ -1334,6 +1351,10 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev, u8 *native_port_num); void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, u8 port_num); +int mlx5_ib_fill_res_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res); +int mlx5_ib_fill_stat_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res); #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); @@ -1349,7 +1370,7 @@ struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( struct mlx5_flow_act *flow_act, u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type); bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type); -bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id); +bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id); int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root); void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction); #else @@ -1491,4 +1512,7 @@ static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev, return true; } + +int mlx5_ib_enable_driver(struct ib_device *dev); +int mlx5_ib_test_wc(struct mlx5_ib_dev *dev); #endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 7019c12005f4..60d39b9ec41c 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -50,7 +50,6 @@ enum { static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static int mr_cache_max_order(struct mlx5_ib_dev *dev); -static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) { @@ -59,13 +58,9 @@ static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { - int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); + WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key))); - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) - /* Wait until all page fault handlers using the mr complete. */ - synchronize_srcu(&dev->mr_srcu); - - return err; + return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } static int order2idx(struct mlx5_ib_dev *dev, int order) @@ -94,8 +89,6 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context) struct mlx5_cache_ent *ent = &cache->ent[c]; u8 key; unsigned long flags; - struct xarray *mkeys = &dev->mdev->priv.mkey_table; - int err; spin_lock_irqsave(&ent->lock, flags); ent->pending--; @@ -122,13 +115,6 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context) ent->size++; spin_unlock_irqrestore(&ent->lock, flags); - xa_lock_irqsave(mkeys, flags); - err = xa_err(__xa_store(mkeys, mlx5_base_mkey(mr->mmkey.key), - &mr->mmkey, GFP_ATOMIC)); - xa_unlock_irqrestore(mkeys, flags); - if (err) - pr_err("Error inserting to mkey tree. 0x%x\n", -err); - if (!completion_done(&ent->compl)) complete(&ent->compl); } @@ -218,9 +204,6 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) - synchronize_srcu(&dev->mr_srcu); - list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { list_del(&mr->list); kfree(mr); @@ -428,7 +411,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry) if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) { mlx5_ib_err(dev, "cache entry %d is out of range\n", entry); - return NULL; + return ERR_PTR(-EINVAL); } ent = &cache->ent[entry]; @@ -511,7 +494,7 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) c = order2idx(dev, mr->order); WARN_ON(c < 0 || c >= MAX_MR_CACHE_ENTRIES); - if (unreg_umr(dev, mr)) { + if (mlx5_mr_cache_invalidate(mr)) { mr->allocated_from_cache = false; destroy_mkey(dev, mr); ent = &cache->ent[c]; @@ -555,10 +538,6 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - synchronize_srcu(&dev->mr_srcu); -#endif - list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { list_del(&mr->list); kfree(mr); @@ -679,6 +658,20 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) return 0; } +static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, + struct ib_pd *pd) +{ + MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); + MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); + MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); + MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); + MLX5_SET(mkc, mkc, lr, 1); + + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET64(mkc, mkc, start_addr, start_addr); +} + struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) { struct mlx5_ib_dev *dev = to_mdev(pd->device); @@ -702,16 +695,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); - MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); - MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); - MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); - MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); - MLX5_SET(mkc, mkc, lr, 1); - MLX5_SET(mkc, mkc, length64, 1); - MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); - MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET64(mkc, mkc, start_addr, 0); + set_mkc_access_pd_addr_fields(mkc, acc, 0, pd); err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); if (err) @@ -779,7 +764,7 @@ static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, if (order) *order = ilog2(roundup_pow_of_two(*ncont)); } else { - u = ib_umem_get(udata, start, length, access_flags, 0); + u = ib_umem_get(udata, start, length, access_flags); if (IS_ERR(u)) { mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u)); return PTR_ERR(u); @@ -1169,16 +1154,8 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3); MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7); - MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); - MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); - MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); - MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); - MLX5_SET(mkc, mkc, lr, 1); - MLX5_SET64(mkc, mkc, len, length); - MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); - MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET64(mkc, mkc, start_addr, start_addr); + set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd); err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); if (err) @@ -1337,10 +1314,15 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (is_odp_mr(mr)) { to_ib_umem_odp(mr->umem)->private = mr; - atomic_set(&mr->num_pending_prefetch, 0); + atomic_set(&mr->num_deferred_work, 0); + err = xa_err(xa_store(&dev->odp_mkeys, + mlx5_base_mkey(mr->mmkey.key), &mr->mmkey, + GFP_KERNEL)); + if (err) { + dereg_mr(dev, mr); + return ERR_PTR(err); + } } - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) - smp_store_release(&mr->live, 1); return &mr->ibmr; error: @@ -1348,22 +1330,29 @@ error: return ERR_PTR(err); } -static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) +/** + * mlx5_mr_cache_invalidate - Fence all DMA on the MR + * @mr: The MR to fence + * + * Upon return the NIC will not be doing any DMA to the pages under the MR, + * and any DMA inprogress will be completed. Failure of this function + * indicates the HW has failed catastrophically. + */ +int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr) { - struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_umr_wr umrwr = {}; - if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + if (mr->dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) return 0; umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR | MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; umrwr.wr.opcode = MLX5_IB_WR_UMR; - umrwr.pd = dev->umrc.pd; + umrwr.pd = mr->dev->umrc.pd; umrwr.mkey = mr->mmkey.key; umrwr.ignore_free_state = 1; - return mlx5_ib_post_send_wait(dev, &umrwr); + return mlx5_ib_post_send_wait(mr->dev, &umrwr); } static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, @@ -1447,7 +1436,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, * UMR can't be used - MKey needs to be replaced. */ if (mr->allocated_from_cache) - err = unreg_umr(dev, mr); + err = mlx5_mr_cache_invalidate(mr); else err = destroy_mkey(dev, mr); if (err) @@ -1560,6 +1549,7 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) mr->sig->psv_wire.psv_idx)) mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", mr->sig->psv_wire.psv_idx); + xa_erase(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key)); kfree(mr->sig); mr->sig = NULL; } @@ -1575,54 +1565,20 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) int npages = mr->npages; struct ib_umem *umem = mr->umem; - if (is_odp_mr(mr)) { - struct ib_umem_odp *umem_odp = to_ib_umem_odp(umem); - - /* Prevent new page faults and - * prefetch requests from succeeding - */ - WRITE_ONCE(mr->live, 0); - - /* Wait for all running page-fault handlers to finish. */ - synchronize_srcu(&dev->mr_srcu); - - /* dequeue pending prefetch requests for the mr */ - if (atomic_read(&mr->num_pending_prefetch)) - flush_workqueue(system_unbound_wq); - WARN_ON(atomic_read(&mr->num_pending_prefetch)); - - /* Destroy all page mappings */ - if (!umem_odp->is_implicit_odp) - mlx5_ib_invalidate_range(umem_odp, - ib_umem_start(umem_odp), - ib_umem_end(umem_odp)); - else - mlx5_ib_free_implicit_mr(mr); - /* - * We kill the umem before the MR for ODP, - * so that there will not be any invalidations in - * flight, looking at the *mr struct. - */ - ib_umem_odp_release(umem_odp); - atomic_sub(npages, &dev->mdev->priv.reg_pages); - - /* Avoid double-freeing the umem. */ - umem = NULL; - } + /* Stop all DMA */ + if (is_odp_mr(mr)) + mlx5_ib_fence_odp_mr(mr); + else + clean_mr(dev, mr); - clean_mr(dev, mr); + if (mr->allocated_from_cache) + mlx5_mr_cache_free(dev, mr); + else + kfree(mr); - /* - * We should unregister the DMA address from the HCA before - * remove the DMA mapping. - */ - mlx5_mr_cache_free(dev, mr); ib_umem_release(umem); - if (umem) - atomic_sub(npages, &dev->mdev->priv.reg_pages); + atomic_sub(npages, &dev->mdev->priv.reg_pages); - if (!mr->allocated_from_cache) - kfree(mr); } int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) @@ -1634,6 +1590,11 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) dereg_mr(to_mdev(mmr->klm_mr->ibmr.device), mmr->klm_mr); } + if (is_odp_mr(mmr) && to_ib_umem_odp(mmr->umem)->is_implicit_odp) { + mlx5_ib_free_implicit_mr(mmr); + return 0; + } + dereg_mr(to_mdev(ibmr->device), mmr); return 0; @@ -1797,8 +1758,15 @@ static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, if (err) goto err_free_mtt_mr; + err = xa_err(xa_store(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key), + mr->sig, GFP_KERNEL)); + if (err) + goto err_free_descs; return 0; +err_free_descs: + destroy_mkey(dev, mr); + mlx5_free_priv_descs(mr); err_free_mtt_mr: dereg_mr(to_mdev(mr->mtt_mr->ibmr.device), mr->mtt_mr); mr->mtt_mr = NULL; @@ -1951,9 +1919,19 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, } } + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { + err = xa_err(xa_store(&dev->odp_mkeys, + mlx5_base_mkey(mw->mmkey.key), &mw->mmkey, + GFP_KERNEL)); + if (err) + goto free_mkey; + } + kfree(in); return &mw->ibmw; +free_mkey: + mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey); free: kfree(mw); kfree(in); @@ -1967,13 +1945,12 @@ int mlx5_ib_dealloc_mw(struct ib_mw *mw) int err; if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - xa_erase_irq(&dev->mdev->priv.mkey_table, - mlx5_base_mkey(mmw->mmkey.key)); + xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key)); /* * pagefault_single_data_segment() may be accessing mmw under * SRCU if the user bound an ODP MR to this MW. */ - synchronize_srcu(&dev->mr_srcu); + synchronize_srcu(&dev->odp_srcu); } err = mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 3f9478d19376..45ee40c2f36e 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -93,158 +93,152 @@ struct mlx5_pagefault { static u64 mlx5_imr_ksm_entries; -static int check_parent(struct ib_umem_odp *odp, - struct mlx5_ib_mr *parent) +void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, + struct mlx5_ib_mr *imr, int flags) { - struct mlx5_ib_mr *mr = odp->private; - - return mr && mr->parent == parent && !odp->dying; -} - -static struct ib_ucontext_per_mm *mr_to_per_mm(struct mlx5_ib_mr *mr) -{ - if (WARN_ON(!mr || !is_odp_mr(mr))) - return NULL; - - return to_ib_umem_odp(mr->umem)->per_mm; -} - -static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp) -{ - struct mlx5_ib_mr *mr = odp->private, *parent = mr->parent; - struct ib_ucontext_per_mm *per_mm = odp->per_mm; - struct rb_node *rb; - - down_read(&per_mm->umem_rwsem); - while (1) { - rb = rb_next(&odp->interval_tree.rb); - if (!rb) - goto not_found; - odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb); - if (check_parent(odp, parent)) - goto end; - } -not_found: - odp = NULL; -end: - up_read(&per_mm->umem_rwsem); - return odp; -} - -static struct ib_umem_odp *odp_lookup(u64 start, u64 length, - struct mlx5_ib_mr *parent) -{ - struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(parent); - struct ib_umem_odp *odp; - struct rb_node *rb; - - down_read(&per_mm->umem_rwsem); - odp = rbt_ib_umem_lookup(&per_mm->umem_tree, start, length); - if (!odp) - goto end; - - while (1) { - if (check_parent(odp, parent)) - goto end; - rb = rb_next(&odp->interval_tree.rb); - if (!rb) - goto not_found; - odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb); - if (ib_umem_start(odp) > start + length) - goto not_found; - } -not_found: - odp = NULL; -end: - up_read(&per_mm->umem_rwsem); - return odp; -} - -void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, - size_t nentries, struct mlx5_ib_mr *mr, int flags) -{ - struct ib_pd *pd = mr->ibmr.pd; - struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct ib_umem_odp *odp; - unsigned long va; - int i; + struct mlx5_klm *end = pklm + nentries; if (flags & MLX5_IB_UPD_XLT_ZAP) { - for (i = 0; i < nentries; i++, pklm++) { + for (; pklm != end; pklm++, idx++) { pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); - pklm->key = cpu_to_be32(dev->null_mkey); + pklm->key = cpu_to_be32(imr->dev->null_mkey); pklm->va = 0; } return; } /* - * The locking here is pretty subtle. Ideally the implicit children - * list would be protected by the umem_mutex, however that is not + * The locking here is pretty subtle. Ideally the implicit_children + * xarray would be protected by the umem_mutex, however that is not * possible. Instead this uses a weaker update-then-lock pattern: * * srcu_read_lock() - * <change children list> + * xa_store() * mutex_lock(umem_mutex) * mlx5_ib_update_xlt() * mutex_unlock(umem_mutex) * destroy lkey * - * ie any change the children list must be followed by the locked - * update_xlt before destroying. + * ie any change the xarray must be followed by the locked update_xlt + * before destroying. * * The umem_mutex provides the acquire/release semantic needed to make - * the children list visible to a racing thread. While SRCU is not + * the xa_store() visible to a racing thread. While SRCU is not * technically required, using it gives consistent use of the SRCU - * locking around the children list. + * locking around the xarray. */ - lockdep_assert_held(&to_ib_umem_odp(mr->umem)->umem_mutex); - lockdep_assert_held(&mr->dev->mr_srcu); + lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex); + lockdep_assert_held(&imr->dev->odp_srcu); - odp = odp_lookup(offset * MLX5_IMR_MTT_SIZE, - nentries * MLX5_IMR_MTT_SIZE, mr); + for (; pklm != end; pklm++, idx++) { + struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx); - for (i = 0; i < nentries; i++, pklm++) { pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); - va = (offset + i) * MLX5_IMR_MTT_SIZE; - if (odp && ib_umem_start(odp) == va) { - struct mlx5_ib_mr *mtt = odp->private; - + if (mtt) { pklm->key = cpu_to_be32(mtt->ibmr.lkey); - odp = odp_next(odp); + pklm->va = cpu_to_be64(idx * MLX5_IMR_MTT_SIZE); } else { - pklm->key = cpu_to_be32(dev->null_mkey); + pklm->key = cpu_to_be32(imr->dev->null_mkey); + pklm->va = 0; } - mlx5_ib_dbg(dev, "[%d] va %lx key %x\n", - i, va, be32_to_cpu(pklm->key)); } } -static void mr_leaf_free_action(struct work_struct *work) +static void dma_fence_odp_mr(struct mlx5_ib_mr *mr) +{ + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + + /* Ensure mlx5_ib_invalidate_range() will not touch the MR any more */ + mutex_lock(&odp->umem_mutex); + if (odp->npages) { + mlx5_mr_cache_invalidate(mr); + ib_umem_odp_unmap_dma_pages(odp, ib_umem_start(odp), + ib_umem_end(odp)); + WARN_ON(odp->npages); + } + odp->private = NULL; + mutex_unlock(&odp->umem_mutex); + + if (!mr->allocated_from_cache) { + mlx5_core_destroy_mkey(mr->dev->mdev, &mr->mmkey); + WARN_ON(mr->descs); + } +} + +/* + * This must be called after the mr has been removed from implicit_children + * and the SRCU synchronized. NOTE: The MR does not necessarily have to be + * empty here, parallel page faults could have raced with the free process and + * added pages to it. + */ +static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) { - struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work); - int idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; - struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent; + struct mlx5_ib_mr *imr = mr->parent; struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; int srcu_key; - mr->parent = NULL; - synchronize_srcu(&mr->dev->mr_srcu); + /* implicit_child_mr's are not allowed to have deferred work */ + WARN_ON(atomic_read(&mr->num_deferred_work)); - if (smp_load_acquire(&imr->live)) { - srcu_key = srcu_read_lock(&mr->dev->mr_srcu); + if (need_imr_xlt) { + srcu_key = srcu_read_lock(&mr->dev->odp_srcu); mutex_lock(&odp_imr->umem_mutex); - mlx5_ib_update_xlt(imr, idx, 1, 0, + mlx5_ib_update_xlt(mr->parent, idx, 1, 0, MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&odp_imr->umem_mutex); - srcu_read_unlock(&mr->dev->mr_srcu, srcu_key); + srcu_read_unlock(&mr->dev->odp_srcu, srcu_key); } - ib_umem_odp_release(odp); + + dma_fence_odp_mr(mr); + + mr->parent = NULL; mlx5_mr_cache_free(mr->dev, mr); + ib_umem_odp_release(odp); + atomic_dec(&imr->num_deferred_work); +} + +static void free_implicit_child_mr_work(struct work_struct *work) +{ + struct mlx5_ib_mr *mr = + container_of(work, struct mlx5_ib_mr, odp_destroy.work); - if (atomic_dec_and_test(&imr->num_leaf_free)) - wake_up(&imr->q_leaf_free); + free_implicit_child_mr(mr, true); +} + +static void free_implicit_child_mr_rcu(struct rcu_head *head) +{ + struct mlx5_ib_mr *mr = + container_of(head, struct mlx5_ib_mr, odp_destroy.rcu); + + /* Freeing a MR is a sleeping operation, so bounce to a work queue */ + INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work); + queue_work(system_unbound_wq, &mr->odp_destroy.work); +} + +static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) +{ + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; + struct mlx5_ib_mr *imr = mr->parent; + + xa_lock(&imr->implicit_children); + /* + * This can race with mlx5_ib_free_implicit_mr(), the first one to + * reach the xa lock wins the race and destroys the MR. + */ + if (__xa_cmpxchg(&imr->implicit_children, idx, mr, NULL, GFP_ATOMIC) != + mr) + goto out_unlock; + + atomic_inc(&imr->num_deferred_work); + call_srcu(&mr->dev->odp_srcu, &mr->odp_destroy.rcu, + free_implicit_child_mr_rcu); + +out_unlock: + xa_unlock(&imr->implicit_children); } void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, @@ -254,19 +248,19 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT / sizeof(struct mlx5_mtt)) - 1; u64 idx = 0, blk_start_idx = 0; + u64 invalidations = 0; int in_block = 0; u64 addr; - if (!umem_odp) { - pr_err("invalidation called on NULL umem or non-ODP umem\n"); - return; - } - + mutex_lock(&umem_odp->umem_mutex); + /* + * If npages is zero then umem_odp->private may not be setup yet. This + * does not complete until after the first page is mapped for DMA. + */ + if (!umem_odp->npages) + goto out; mr = umem_odp->private; - if (!mr || !mr->ibmr.pd) - return; - start = max_t(u64, ib_umem_start(umem_odp), start); end = min_t(u64, ib_umem_end(umem_odp), end); @@ -276,7 +270,6 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, * overwrite the same MTTs. Concurent invalidations might race us, * but they will write 0s as well, so no difference in the end result. */ - mutex_lock(&umem_odp->umem_mutex); for (addr = start; addr < end; addr += BIT(umem_odp->page_shift)) { idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift; /* @@ -291,6 +284,9 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, blk_start_idx = idx; in_block = 1; } + + /* Count page invalidations */ + invalidations += idx - blk_start_idx + 1; } else { u64 umr_offset = idx & umr_block_mask; @@ -308,6 +304,9 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, idx - blk_start_idx + 1, 0, MLX5_IB_UPD_XLT_ZAP | MLX5_IB_UPD_XLT_ATOMIC); + + mlx5_update_odp_stats(mr, invalidations, invalidations); + /* * We are now sure that the device will not access the * memory. We can safely unmap it, and mark it as dirty if @@ -316,13 +315,9 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, ib_umem_odp_unmap_dma_pages(umem_odp, start, end); - if (unlikely(!umem_odp->npages && mr->parent && - !umem_odp->dying)) { - WRITE_ONCE(mr->live, 0); - umem_odp->dying = 1; - atomic_inc(&mr->parent->num_leaf_free); - schedule_work(&umem_odp->work); - } + if (unlikely(!umem_odp->npages && mr->parent)) + destroy_unused_implicit_child_mr(mr); +out: mutex_unlock(&umem_odp->umem_mutex); } @@ -390,8 +385,6 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) && !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled)) caps->general_caps |= IB_ODP_SUPPORT_IMPLICIT; - - return; } static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, @@ -416,237 +409,213 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, wq_num, err); } -static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, - struct ib_umem_odp *umem_odp, - bool ksm, int access_flags) +static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, + unsigned long idx) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct ib_umem_odp *odp; struct mlx5_ib_mr *mr; + struct mlx5_ib_mr *ret; int err; - mr = mlx5_mr_cache_alloc(dev, ksm ? MLX5_IMR_KSM_CACHE_ENTRY : - MLX5_IMR_MTT_CACHE_ENTRY); + odp = ib_umem_odp_alloc_child(to_ib_umem_odp(imr->umem), + idx * MLX5_IMR_MTT_SIZE, + MLX5_IMR_MTT_SIZE); + if (IS_ERR(odp)) + return ERR_CAST(odp); + ret = mr = mlx5_mr_cache_alloc(imr->dev, MLX5_IMR_MTT_CACHE_ENTRY); if (IS_ERR(mr)) - return mr; - - mr->ibmr.pd = pd; - - mr->dev = dev; - mr->access_flags = access_flags; - mr->mmkey.iova = 0; - mr->umem = &umem_odp->umem; - - if (ksm) { - err = mlx5_ib_update_xlt(mr, 0, - mlx5_imr_ksm_entries, - MLX5_KSM_PAGE_SHIFT, - MLX5_IB_UPD_XLT_INDIRECT | - MLX5_IB_UPD_XLT_ZAP | - MLX5_IB_UPD_XLT_ENABLE); - - } else { - err = mlx5_ib_update_xlt(mr, 0, - MLX5_IMR_MTT_ENTRIES, - PAGE_SHIFT, - MLX5_IB_UPD_XLT_ZAP | - MLX5_IB_UPD_XLT_ENABLE | - MLX5_IB_UPD_XLT_ATOMIC); - } - - if (err) - goto fail; + goto out_umem; + mr->ibmr.pd = imr->ibmr.pd; + mr->access_flags = imr->access_flags; + mr->umem = &odp->umem; mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; - - mlx5_ib_dbg(dev, "key %x dev %p mr %p\n", - mr->mmkey.key, dev->mdev, mr); - - return mr; - -fail: - mlx5_ib_err(dev, "Failed to register MKEY %d\n", err); - mlx5_mr_cache_free(dev, mr); - - return ERR_PTR(err); -} - -static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr, - u64 io_virt, size_t bcnt) -{ - struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.pd->device); - struct ib_umem_odp *odp, *result = NULL; - struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); - u64 addr = io_virt & MLX5_IMR_MTT_MASK; - int nentries = 0, start_idx = 0, ret; - struct mlx5_ib_mr *mtt; - - mutex_lock(&odp_mr->umem_mutex); - odp = odp_lookup(addr, 1, mr); - - mlx5_ib_dbg(dev, "io_virt:%llx bcnt:%zx addr:%llx odp:%p\n", - io_virt, bcnt, addr, odp); - -next_mr: - if (likely(odp)) { - if (nentries) - nentries++; - } else { - odp = ib_umem_odp_alloc_child(odp_mr, addr, MLX5_IMR_MTT_SIZE); - if (IS_ERR(odp)) { - mutex_unlock(&odp_mr->umem_mutex); - return ERR_CAST(odp); - } - - mtt = implicit_mr_alloc(mr->ibmr.pd, odp, 0, - mr->access_flags); - if (IS_ERR(mtt)) { - mutex_unlock(&odp_mr->umem_mutex); - ib_umem_odp_release(odp); - return ERR_CAST(mtt); - } - - odp->private = mtt; - mtt->umem = &odp->umem; - mtt->mmkey.iova = addr; - mtt->parent = mr; - INIT_WORK(&odp->work, mr_leaf_free_action); - - smp_store_release(&mtt->live, 1); - - if (!nentries) - start_idx = addr >> MLX5_IMR_MTT_SHIFT; - nentries++; - } - - /* Return first odp if region not covered by single one */ - if (likely(!result)) - result = odp; - - addr += MLX5_IMR_MTT_SIZE; - if (unlikely(addr < io_virt + bcnt)) { - odp = odp_next(odp); - if (odp && ib_umem_start(odp) != addr) - odp = NULL; - goto next_mr; + mr->mmkey.iova = idx * MLX5_IMR_MTT_SIZE; + mr->parent = imr; + odp->private = mr; + + err = mlx5_ib_update_xlt(mr, 0, + MLX5_IMR_MTT_ENTRIES, + PAGE_SHIFT, + MLX5_IB_UPD_XLT_ZAP | + MLX5_IB_UPD_XLT_ENABLE); + if (err) { + ret = ERR_PTR(err); + goto out_mr; } - if (unlikely(nentries)) { - ret = mlx5_ib_update_xlt(mr, start_idx, nentries, 0, - MLX5_IB_UPD_XLT_INDIRECT | - MLX5_IB_UPD_XLT_ATOMIC); - if (ret) { - mlx5_ib_err(dev, "Failed to update PAS\n"); - result = ERR_PTR(ret); + /* + * Once the store to either xarray completes any error unwind has to + * use synchronize_srcu(). Avoid this with xa_reserve() + */ + ret = xa_cmpxchg(&imr->implicit_children, idx, NULL, mr, + GFP_KERNEL); + if (unlikely(ret)) { + if (xa_is_err(ret)) { + ret = ERR_PTR(xa_err(ret)); + goto out_mr; } + /* + * Another thread beat us to creating the child mr, use + * theirs. + */ + goto out_mr; } - mutex_unlock(&odp_mr->umem_mutex); - return result; + mlx5_ib_dbg(imr->dev, "key %x mr %p\n", mr->mmkey.key, mr); + return mr; + +out_mr: + mlx5_mr_cache_free(imr->dev, mr); +out_umem: + ib_umem_odp_release(odp); + return ret; } struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, struct ib_udata *udata, int access_flags) { - struct mlx5_ib_mr *imr; + struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device); struct ib_umem_odp *umem_odp; + struct mlx5_ib_mr *imr; + int err; umem_odp = ib_umem_odp_alloc_implicit(udata, access_flags); if (IS_ERR(umem_odp)) return ERR_CAST(umem_odp); - imr = implicit_mr_alloc(&pd->ibpd, umem_odp, 1, access_flags); + imr = mlx5_mr_cache_alloc(dev, MLX5_IMR_KSM_CACHE_ENTRY); if (IS_ERR(imr)) { - ib_umem_odp_release(umem_odp); - return ERR_CAST(imr); + err = PTR_ERR(imr); + goto out_umem; } + imr->ibmr.pd = &pd->ibpd; + imr->access_flags = access_flags; + imr->mmkey.iova = 0; imr->umem = &umem_odp->umem; - init_waitqueue_head(&imr->q_leaf_free); - atomic_set(&imr->num_leaf_free, 0); - atomic_set(&imr->num_pending_prefetch, 0); - smp_store_release(&imr->live, 1); + imr->ibmr.lkey = imr->mmkey.key; + imr->ibmr.rkey = imr->mmkey.key; + imr->umem = &umem_odp->umem; + imr->is_odp_implicit = true; + atomic_set(&imr->num_deferred_work, 0); + xa_init(&imr->implicit_children); + + err = mlx5_ib_update_xlt(imr, 0, + mlx5_imr_ksm_entries, + MLX5_KSM_PAGE_SHIFT, + MLX5_IB_UPD_XLT_INDIRECT | + MLX5_IB_UPD_XLT_ZAP | + MLX5_IB_UPD_XLT_ENABLE); + if (err) + goto out_mr; + err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key), + &imr->mmkey, GFP_KERNEL)); + if (err) + goto out_mr; + + mlx5_ib_dbg(dev, "key %x mr %p\n", imr->mmkey.key, imr); return imr; +out_mr: + mlx5_ib_err(dev, "Failed to register MKEY %d\n", err); + mlx5_mr_cache_free(dev, imr); +out_umem: + ib_umem_odp_release(umem_odp); + return ERR_PTR(err); } void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) { - struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(imr); - struct rb_node *node; + struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); + struct mlx5_ib_dev *dev = imr->dev; + struct list_head destroy_list; + struct mlx5_ib_mr *mtt; + struct mlx5_ib_mr *tmp; + unsigned long idx; - down_read(&per_mm->umem_rwsem); - for (node = rb_first_cached(&per_mm->umem_tree); node; - node = rb_next(node)) { - struct ib_umem_odp *umem_odp = - rb_entry(node, struct ib_umem_odp, interval_tree.rb); - struct mlx5_ib_mr *mr = umem_odp->private; + INIT_LIST_HEAD(&destroy_list); - if (mr->parent != imr) - continue; + xa_erase(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key)); + /* + * This stops the SRCU protected page fault path from touching either + * the imr or any children. The page fault path can only reach the + * children xarray via the imr. + */ + synchronize_srcu(&dev->odp_srcu); - mutex_lock(&umem_odp->umem_mutex); - ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), - ib_umem_end(umem_odp)); + xa_lock(&imr->implicit_children); + xa_for_each (&imr->implicit_children, idx, mtt) { + __xa_erase(&imr->implicit_children, idx); + list_add(&mtt->odp_destroy.elm, &destroy_list); + } + xa_unlock(&imr->implicit_children); - if (umem_odp->dying) { - mutex_unlock(&umem_odp->umem_mutex); - continue; - } + /* + * num_deferred_work can only be incremented inside the odp_srcu, or + * under xa_lock while the child is in the xarray. Thus at this point + * it is only decreasing, and all work holding it is now on the wq. + */ + if (atomic_read(&imr->num_deferred_work)) { + flush_workqueue(system_unbound_wq); + WARN_ON(atomic_read(&imr->num_deferred_work)); + } + + /* + * Fence the imr before we destroy the children. This allows us to + * skip updating the XLT of the imr during destroy of the child mkey + * the imr points to. + */ + mlx5_mr_cache_invalidate(imr); + + list_for_each_entry_safe (mtt, tmp, &destroy_list, odp_destroy.elm) + free_implicit_child_mr(mtt, false); + + mlx5_mr_cache_free(dev, imr); + ib_umem_odp_release(odp_imr); +} - umem_odp->dying = 1; - atomic_inc(&imr->num_leaf_free); - schedule_work(&umem_odp->work); - mutex_unlock(&umem_odp->umem_mutex); +/** + * mlx5_ib_fence_odp_mr - Stop all access to the ODP MR + * @mr: to fence + * + * On return no parallel threads will be touching this MR and no DMA will be + * active. + */ +void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr) +{ + /* Prevent new page faults and prefetch requests from succeeding */ + xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); + + /* Wait for all running page-fault handlers to finish. */ + synchronize_srcu(&mr->dev->odp_srcu); + + if (atomic_read(&mr->num_deferred_work)) { + flush_workqueue(system_unbound_wq); + WARN_ON(atomic_read(&mr->num_deferred_work)); } - up_read(&per_mm->umem_rwsem); - wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free)); + dma_fence_odp_mr(mr); } -#define MLX5_PF_FLAGS_PREFETCH BIT(0) #define MLX5_PF_FLAGS_DOWNGRADE BIT(1) -static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, - u64 io_virt, size_t bcnt, u32 *bytes_mapped, - u32 flags) +static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, + u64 user_va, size_t bcnt, u32 *bytes_mapped, + u32 flags) { - int npages = 0, current_seq, page_shift, ret, np; - struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); + int current_seq, page_shift, ret, np; bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE; - bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH; u64 access_mask; u64 start_idx, page_mask; - struct ib_umem_odp *odp; - size_t size; - - if (odp_mr->is_implicit_odp) { - odp = implicit_mr_get_data(mr, io_virt, bcnt); - - if (IS_ERR(odp)) - return PTR_ERR(odp); - mr = odp->private; - } else { - odp = odp_mr; - } - -next_mr: - size = min_t(size_t, bcnt, ib_umem_end(odp) - io_virt); page_shift = odp->page_shift; page_mask = ~(BIT(page_shift) - 1); - start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift; + start_idx = (user_va - (mr->mmkey.iova & page_mask)) >> page_shift; access_mask = ODP_READ_ALLOWED_BIT; - if (prefetch && !downgrade && !odp->umem.writable) { - /* prefetch with write-access must - * be supported by the MR - */ - ret = -EINVAL; - goto out; - } - if (odp->umem.writable && !downgrade) access_mask |= ODP_WRITE_ALLOWED_BIT; @@ -657,13 +626,10 @@ next_mr: */ smp_rmb(); - ret = ib_umem_odp_map_dma_pages(odp, io_virt, size, access_mask, - current_seq); - - if (ret < 0) - goto out; - - np = ret; + np = ib_umem_odp_map_dma_pages(odp, user_va, bcnt, access_mask, + current_seq); + if (np < 0) + return np; mutex_lock(&odp->umem_mutex); if (!ib_umem_mmu_notifier_retry(odp, current_seq)) { @@ -681,35 +647,19 @@ next_mr: if (ret < 0) { if (ret != -EAGAIN) - mlx5_ib_err(dev, "Failed to update mkey page tables\n"); + mlx5_ib_err(mr->dev, + "Failed to update mkey page tables\n"); goto out; } if (bytes_mapped) { u32 new_mappings = (np << page_shift) - - (io_virt - round_down(io_virt, 1 << page_shift)); - *bytes_mapped += min_t(u32, new_mappings, size); - } - - npages += np << (page_shift - PAGE_SHIFT); - bcnt -= size; - - if (unlikely(bcnt)) { - struct ib_umem_odp *next; + (user_va - round_down(user_va, 1 << page_shift)); - io_virt += size; - next = odp_next(odp); - if (unlikely(!next || ib_umem_start(next) != io_virt)) { - mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n", - io_virt, next); - return -EAGAIN; - } - odp = next; - mr = odp->private; - goto next_mr; + *bytes_mapped += min_t(u32, new_mappings, bcnt); } - return npages; + return np << (page_shift - PAGE_SHIFT); out: if (ret == -EAGAIN) { @@ -718,7 +668,7 @@ out: if (!wait_for_completion_timeout(&odp->notifier_completion, timeout)) { mlx5_ib_warn( - dev, + mr->dev, "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n", current_seq, odp->notifiers_seq, odp->notifiers_count); @@ -728,6 +678,109 @@ out: return ret; } +static int pagefault_implicit_mr(struct mlx5_ib_mr *imr, + struct ib_umem_odp *odp_imr, u64 user_va, + size_t bcnt, u32 *bytes_mapped, u32 flags) +{ + unsigned long end_idx = (user_va + bcnt - 1) >> MLX5_IMR_MTT_SHIFT; + unsigned long upd_start_idx = end_idx + 1; + unsigned long upd_len = 0; + unsigned long npages = 0; + int err; + int ret; + + if (unlikely(user_va >= mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE || + mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE - user_va < bcnt)) + return -EFAULT; + + /* Fault each child mr that intersects with our interval. */ + while (bcnt) { + unsigned long idx = user_va >> MLX5_IMR_MTT_SHIFT; + struct ib_umem_odp *umem_odp; + struct mlx5_ib_mr *mtt; + u64 len; + + mtt = xa_load(&imr->implicit_children, idx); + if (unlikely(!mtt)) { + mtt = implicit_get_child_mr(imr, idx); + if (IS_ERR(mtt)) { + ret = PTR_ERR(mtt); + goto out; + } + upd_start_idx = min(upd_start_idx, idx); + upd_len = idx - upd_start_idx + 1; + } + + umem_odp = to_ib_umem_odp(mtt->umem); + len = min_t(u64, user_va + bcnt, ib_umem_end(umem_odp)) - + user_va; + + ret = pagefault_real_mr(mtt, umem_odp, user_va, len, + bytes_mapped, flags); + if (ret < 0) + goto out; + user_va += len; + bcnt -= len; + npages += ret; + } + + ret = npages; + + /* + * Any time the implicit_children are changed we must perform an + * update of the xlt before exiting to ensure the HW and the + * implicit_children remains synchronized. + */ +out: + if (likely(!upd_len)) + return ret; + + /* + * Notice this is not strictly ordered right, the KSM is updated after + * the implicit_children is updated, so a parallel page fault could + * see a MR that is not yet visible in the KSM. This is similar to a + * parallel page fault seeing a MR that is being concurrently removed + * from the KSM. Both of these improbable situations are resolved + * safely by resuming the HW and then taking another page fault. The + * next pagefault handler will see the new information. + */ + mutex_lock(&odp_imr->umem_mutex); + err = mlx5_ib_update_xlt(imr, upd_start_idx, upd_len, 0, + MLX5_IB_UPD_XLT_INDIRECT | + MLX5_IB_UPD_XLT_ATOMIC); + mutex_unlock(&odp_imr->umem_mutex); + if (err) { + mlx5_ib_err(imr->dev, "Failed to update PAS\n"); + return err; + } + return ret; +} + +/* + * Returns: + * -EFAULT: The io_virt->bcnt is not within the MR, it covers pages that are + * not accessible, or the MR is no longer valid. + * -EAGAIN/-ENOMEM: The operation should be retried + * + * -EINVAL/others: General internal malfunction + * >0: Number of pages mapped + */ +static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, + u32 *bytes_mapped, u32 flags) +{ + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + + if (!odp->is_implicit_odp) { + if (unlikely(io_virt < ib_umem_start(odp) || + ib_umem_end(odp) - io_virt < bcnt)) + return -EFAULT; + return pagefault_real_mr(mr, odp, io_virt, bcnt, bytes_mapped, + flags); + } + return pagefault_implicit_mr(mr, odp, io_virt, bcnt, bytes_mapped, + flags); +} + struct pf_frame { struct pf_frame *next; u32 key; @@ -775,10 +828,9 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, struct ib_pd *pd, u32 key, u64 io_virt, size_t bcnt, u32 *bytes_committed, - u32 *bytes_mapped, u32 flags) + u32 *bytes_mapped) { int npages = 0, srcu_key, ret, i, outlen, cur_outlen = 0, depth = 0; - bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH; struct pf_frame *head = NULL, *frame; struct mlx5_core_mkey *mmkey; struct mlx5_ib_mr *mr; @@ -787,58 +839,49 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, size_t offset; int ndescs; - srcu_key = srcu_read_lock(&dev->mr_srcu); + srcu_key = srcu_read_lock(&dev->odp_srcu); io_virt += *bytes_committed; bcnt -= *bytes_committed; next_mr: - mmkey = xa_load(&dev->mdev->priv.mkey_table, mlx5_base_mkey(key)); + mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(key)); + if (!mmkey) { + mlx5_ib_dbg( + dev, + "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", + key); + if (bytes_mapped) + *bytes_mapped += bcnt; + /* + * The user could specify a SGL with multiple lkeys and only + * some of them are ODP. Treat the non-ODP ones as fully + * faulted. + */ + ret = 0; + goto srcu_unlock; + } if (!mkey_is_eq(mmkey, key)) { mlx5_ib_dbg(dev, "failed to find mkey %x\n", key); ret = -EFAULT; goto srcu_unlock; } - if (prefetch && mmkey->type != MLX5_MKEY_MR) { - mlx5_ib_dbg(dev, "prefetch is allowed only for MR\n"); - ret = -EINVAL; - goto srcu_unlock; - } - switch (mmkey->type) { case MLX5_MKEY_MR: mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - if (!smp_load_acquire(&mr->live) || !mr->ibmr.pd) { - mlx5_ib_dbg(dev, "got dead MR\n"); - ret = -EFAULT; - goto srcu_unlock; - } - if (prefetch) { - if (!is_odp_mr(mr) || - mr->ibmr.pd != pd) { - mlx5_ib_dbg(dev, "Invalid prefetch request: %s\n", - is_odp_mr(mr) ? "MR is not ODP" : - "PD is not of the MR"); - ret = -EINVAL; - goto srcu_unlock; - } - } - - if (!is_odp_mr(mr)) { - mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", - key); - if (bytes_mapped) - *bytes_mapped += bcnt; - ret = 0; - goto srcu_unlock; - } - - ret = pagefault_mr(dev, mr, io_virt, bcnt, bytes_mapped, flags); + ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0); if (ret < 0) goto srcu_unlock; + /* + * When prefetching a page, page fault is generated + * in order to bring the page to the main memory. + * In the current flow, page faults are being counted. + */ + mlx5_update_odp_stats(mr, faults, ret); + npages += ret; ret = 0; break; @@ -928,7 +971,7 @@ srcu_unlock: } kfree(out); - srcu_read_unlock(&dev->mr_srcu, srcu_key); + srcu_read_unlock(&dev->odp_srcu, srcu_key); *bytes_committed = 0; return ret ? ret : npages; } @@ -1009,7 +1052,7 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, ret = pagefault_single_data_segment(dev, NULL, key, io_virt, bcnt, &pfault->bytes_committed, - bytes_mapped, 0); + bytes_mapped); if (ret < 0) break; npages += ret; @@ -1292,8 +1335,7 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, } ret = pagefault_single_data_segment(dev, NULL, rkey, address, length, - &pfault->bytes_committed, NULL, - 0); + &pfault->bytes_committed, NULL); if (ret == -EAGAIN) { /* We're racing with an invalidation, don't prefetch */ prefetch_activated = 0; @@ -1320,8 +1362,7 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, ret = pagefault_single_data_segment(dev, NULL, rkey, address, prefetch_len, - &bytes_committed, NULL, - 0); + &bytes_committed, NULL); if (ret < 0 && ret != -EAGAIN) { mlx5_ib_dbg(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n", ret, pfault->token, address, prefetch_len); @@ -1624,114 +1665,128 @@ int mlx5_ib_odp_init(void) struct prefetch_mr_work { struct work_struct work; - struct ib_pd *pd; u32 pf_flags; u32 num_sge; - struct ib_sge sg_list[0]; + struct { + u64 io_virt; + struct mlx5_ib_mr *mr; + size_t length; + } frags[]; }; -static void num_pending_prefetch_dec(struct mlx5_ib_dev *dev, - struct ib_sge *sg_list, u32 num_sge, - u32 from) +static void destroy_prefetch_work(struct prefetch_mr_work *work) { u32 i; - int srcu_key; - - srcu_key = srcu_read_lock(&dev->mr_srcu); - for (i = from; i < num_sge; ++i) { - struct mlx5_core_mkey *mmkey; - struct mlx5_ib_mr *mr; - - mmkey = xa_load(&dev->mdev->priv.mkey_table, - mlx5_base_mkey(sg_list[i].lkey)); - mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - atomic_dec(&mr->num_pending_prefetch); - } - - srcu_read_unlock(&dev->mr_srcu, srcu_key); + for (i = 0; i < work->num_sge; ++i) + atomic_dec(&work->frags[i].mr->num_deferred_work); + kvfree(work); } -static bool num_pending_prefetch_inc(struct ib_pd *pd, - struct ib_sge *sg_list, u32 num_sge) +static struct mlx5_ib_mr * +get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, + u32 lkey) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - bool ret = true; - u32 i; + struct mlx5_core_mkey *mmkey; + struct ib_umem_odp *odp; + struct mlx5_ib_mr *mr; - for (i = 0; i < num_sge; ++i) { - struct mlx5_core_mkey *mmkey; - struct mlx5_ib_mr *mr; + lockdep_assert_held(&dev->odp_srcu); - mmkey = xa_load(&dev->mdev->priv.mkey_table, - mlx5_base_mkey(sg_list[i].lkey)); - if (!mmkey || mmkey->key != sg_list[i].lkey) { - ret = false; - break; - } + mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(lkey)); + if (!mmkey || mmkey->key != lkey || mmkey->type != MLX5_MKEY_MR) + return NULL; - if (mmkey->type != MLX5_MKEY_MR) { - ret = false; - break; - } + mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); + if (mr->ibmr.pd != pd) + return NULL; - if (!smp_load_acquire(&mr->live)) { - ret = false; - break; - } + odp = to_ib_umem_odp(mr->umem); - if (mr->ibmr.pd != pd) { - ret = false; - break; - } + /* prefetch with write-access must be supported by the MR */ + if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE && + !odp->umem.writable) + return NULL; - atomic_inc(&mr->num_pending_prefetch); - } + return mr; +} - if (!ret) - num_pending_prefetch_dec(dev, sg_list, i, 0); +static void mlx5_ib_prefetch_mr_work(struct work_struct *w) +{ + struct prefetch_mr_work *work = + container_of(w, struct prefetch_mr_work, work); + u32 bytes_mapped = 0; + u32 i; - return ret; + for (i = 0; i < work->num_sge; ++i) + pagefault_mr(work->frags[i].mr, work->frags[i].io_virt, + work->frags[i].length, &bytes_mapped, + work->pf_flags); + + destroy_prefetch_work(work); } -static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, u32 pf_flags, - struct ib_sge *sg_list, u32 num_sge) +static bool init_prefetch_work(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, + u32 pf_flags, struct prefetch_mr_work *work, + struct ib_sge *sg_list, u32 num_sge) { u32 i; - int ret = 0; - struct mlx5_ib_dev *dev = to_mdev(pd->device); + + INIT_WORK(&work->work, mlx5_ib_prefetch_mr_work); + work->pf_flags = pf_flags; for (i = 0; i < num_sge; ++i) { - struct ib_sge *sg = &sg_list[i]; - int bytes_committed = 0; + work->frags[i].io_virt = sg_list[i].addr; + work->frags[i].length = sg_list[i].length; + work->frags[i].mr = + get_prefetchable_mr(pd, advice, sg_list[i].lkey); + if (!work->frags[i].mr) { + work->num_sge = i - 1; + if (i) + destroy_prefetch_work(work); + return false; + } - ret = pagefault_single_data_segment(dev, pd, sg->lkey, sg->addr, - sg->length, - &bytes_committed, NULL, - pf_flags); - if (ret < 0) - break; + /* Keep the MR pointer will valid outside the SRCU */ + atomic_inc(&work->frags[i].mr->num_deferred_work); } - - return ret < 0 ? ret : 0; + work->num_sge = num_sge; + return true; } -static void mlx5_ib_prefetch_mr_work(struct work_struct *work) +static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, + u32 pf_flags, struct ib_sge *sg_list, + u32 num_sge) { - struct prefetch_mr_work *w = - container_of(work, struct prefetch_mr_work, work); + struct mlx5_ib_dev *dev = to_mdev(pd->device); + u32 bytes_mapped = 0; + int srcu_key; + int ret = 0; + u32 i; + + srcu_key = srcu_read_lock(&dev->odp_srcu); + for (i = 0; i < num_sge; ++i) { + struct mlx5_ib_mr *mr; - if (ib_device_try_get(w->pd->device)) { - mlx5_ib_prefetch_sg_list(w->pd, w->pf_flags, w->sg_list, - w->num_sge); - ib_device_put(w->pd->device); + mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey); + if (!mr) { + ret = -ENOENT; + goto out; + } + ret = pagefault_mr(mr, sg_list[i].addr, sg_list[i].length, + &bytes_mapped, pf_flags); + if (ret < 0) + goto out; } + ret = 0; - num_pending_prefetch_dec(to_mdev(w->pd->device), w->sg_list, - w->num_sge, 0); - kvfree(w); +out: + srcu_read_unlock(&dev->odp_srcu, srcu_key); + return ret; } int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, @@ -1739,43 +1794,27 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, u32 flags, struct ib_sge *sg_list, u32 num_sge) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - u32 pf_flags = MLX5_PF_FLAGS_PREFETCH; + u32 pf_flags = 0; struct prefetch_mr_work *work; - bool valid_req; int srcu_key; if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH) pf_flags |= MLX5_PF_FLAGS_DOWNGRADE; if (flags & IB_UVERBS_ADVISE_MR_FLAG_FLUSH) - return mlx5_ib_prefetch_sg_list(pd, pf_flags, sg_list, + return mlx5_ib_prefetch_sg_list(pd, advice, pf_flags, sg_list, num_sge); - work = kvzalloc(struct_size(work, sg_list, num_sge), GFP_KERNEL); + work = kvzalloc(struct_size(work, frags, num_sge), GFP_KERNEL); if (!work) return -ENOMEM; - memcpy(work->sg_list, sg_list, num_sge * sizeof(struct ib_sge)); - - /* It is guaranteed that the pd when work is executed is the pd when - * work was queued since pd can't be destroyed while it holds MRs and - * destroying a MR leads to flushing the workquque - */ - work->pd = pd; - work->pf_flags = pf_flags; - work->num_sge = num_sge; - - INIT_WORK(&work->work, mlx5_ib_prefetch_mr_work); - - srcu_key = srcu_read_lock(&dev->mr_srcu); - - valid_req = num_pending_prefetch_inc(pd, sg_list, num_sge); - if (valid_req) - queue_work(system_unbound_wq, &work->work); - else - kvfree(work); - - srcu_read_unlock(&dev->mr_srcu, srcu_key); - - return valid_req ? 0 : -EINVAL; + srcu_key = srcu_read_lock(&dev->odp_srcu); + if (!init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge)) { + srcu_read_unlock(&dev->odp_srcu, srcu_key); + return -EINVAL; + } + queue_work(system_unbound_wq, &work->work); + srcu_read_unlock(&dev->odp_srcu, srcu_key); + return 0; } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 5fd071c05944..7e51870e9e01 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -749,7 +749,7 @@ static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, { int err; - *umem = ib_umem_get(udata, addr, size, 0, 0); + *umem = ib_umem_get(udata, addr, size, 0); if (IS_ERR(*umem)) { mlx5_ib_dbg(dev, "umem_get failed\n"); return PTR_ERR(*umem); @@ -806,7 +806,7 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (!ucmd->buf_addr) return -EINVAL; - rwq->umem = ib_umem_get(udata, ucmd->buf_addr, rwq->buf_size, 0, 0); + rwq->umem = ib_umem_get(udata, ucmd->buf_addr, rwq->buf_size, 0); if (IS_ERR(rwq->umem)) { mlx5_ib_dbg(dev, "umem_get failed\n"); err = PTR_ERR(rwq->umem); @@ -1041,11 +1041,14 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | IB_QP_CREATE_IPOIB_UD_LSO | IB_QP_CREATE_NETIF_QP | - mlx5_ib_create_qp_sqpn_qp1())) + MLX5_IB_QP_CREATE_SQPN_QP1 | + MLX5_IB_QP_CREATE_WC_TEST)) return -EINVAL; if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) qp->bf.bfreg = &dev->fp_bfreg; + else if (init_attr->create_flags & MLX5_IB_QP_CREATE_WC_TEST) + qp->bf.bfreg = &dev->wc_bfreg; else qp->bf.bfreg = &dev->bfreg; @@ -1104,7 +1107,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, MLX5_SET(qpc, qpc, fre, 1); MLX5_SET(qpc, qpc, rlky, 1); - if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) { + if (init_attr->create_flags & MLX5_IB_QP_CREATE_SQPN_QP1) { MLX5_SET(qpc, qpc, deth_sqpn, 1); qp->flags |= MLX5_IB_QP_SQPN_QP1; } @@ -2140,7 +2143,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, return -EINVAL; } if (init_attr->create_flags & - mlx5_ib_create_qp_sqpn_qp1()) { + MLX5_IB_QP_CREATE_SQPN_QP1) { mlx5_ib_dbg(dev, "user-space is not allowed to create UD QPs spoofing as QP1\n"); return -EINVAL; } @@ -5330,7 +5333,6 @@ out: * we hit doorbell */ wmb(); - /* currently we support only regular doorbells */ mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset); /* Make sure doorbells don't leak out of SQ spinlock * and reach the HCA out of order. @@ -5825,7 +5827,7 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, if (qp->flags & MLX5_IB_QP_MANAGED_RECV) qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV; if (qp->flags & MLX5_IB_QP_SQPN_QP1) - qp_init_attr->create_flags |= mlx5_ib_create_qp_sqpn_qp1(); + qp_init_attr->create_flags |= MLX5_IB_QP_CREATE_SQPN_QP1; qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; @@ -5957,12 +5959,21 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, } MLX5_SET(wq, wq, log_wq_stride, rwq->log_rq_stride); if (rwq->create_flags & MLX5_IB_WQ_FLAGS_STRIDING_RQ) { + /* + * In Firmware number of strides in each WQE is: + * "512 * 2^single_wqe_log_num_of_strides" + * Values 3 to 8 are accepted as 10 to 15, 9 to 18 are + * accepted as 0 to 9 + */ + static const u8 fw_map[] = { 10, 11, 12, 13, 14, 15, 0, 1, + 2, 3, 4, 5, 6, 7, 8, 9 }; MLX5_SET(wq, wq, two_byte_shift_en, rwq->two_byte_shift_en); MLX5_SET(wq, wq, log_wqe_stride_size, rwq->single_stride_log_num_of_bytes - MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES); - MLX5_SET(wq, wq, log_wqe_num_of_strides, rwq->log_num_strides - - MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES); + MLX5_SET(wq, wq, log_wqe_num_of_strides, + fw_map[rwq->log_num_strides - + MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES]); } MLX5_SET(wq, wq, log_wq_sz, rwq->log_rq_size); MLX5_SET(wq, wq, pd, to_mpd(pd)->pdn); @@ -6037,6 +6048,19 @@ static int set_user_rq_size(struct mlx5_ib_dev *dev, return 0; } +static bool log_of_strides_valid(struct mlx5_ib_dev *dev, u32 log_num_strides) +{ + if ((log_num_strides > MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES) || + (log_num_strides < MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES)) + return false; + + if (!MLX5_CAP_GEN(dev->mdev, ext_stride_num_range) && + (log_num_strides < MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES)) + return false; + + return true; +} + static int prepare_user_rq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata, @@ -6084,14 +6108,16 @@ static int prepare_user_rq(struct ib_pd *pd, MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES); return -EINVAL; } - if ((ucmd.single_wqe_log_num_of_strides > - MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES) || - (ucmd.single_wqe_log_num_of_strides < - MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES)) { - mlx5_ib_dbg(dev, "Invalid log num strides (%u. Range is %u - %u)\n", - ucmd.single_wqe_log_num_of_strides, - MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES, - MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES); + if (!log_of_strides_valid(dev, + ucmd.single_wqe_log_num_of_strides)) { + mlx5_ib_dbg( + dev, + "Invalid log num strides (%u. Range is %u - %u)\n", + ucmd.single_wqe_log_num_of_strides, + MLX5_CAP_GEN(dev->mdev, ext_stride_num_range) ? + MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES : + MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES, + MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES); return -EINVAL; } rwq->single_stride_log_num_of_bytes = diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c new file mode 100644 index 000000000000..8f6c04f12531 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/restrack.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved. + */ + +#include <uapi/rdma/rdma_netlink.h> +#include <rdma/ib_umem_odp.h> +#include <rdma/restrack.h> +#include "mlx5_ib.h" + +static int fill_stat_mr_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + struct ib_mr *ibmr = container_of(res, struct ib_mr, res); + struct mlx5_ib_mr *mr = to_mmr(ibmr); + struct nlattr *table_attr; + + if (!(mr->access_flags & IB_ACCESS_ON_DEMAND)) + return 0; + + table_attr = nla_nest_start(msg, + RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); + + if (!table_attr) + goto err; + + if (rdma_nl_stat_hwcounter_entry(msg, "page_faults", + atomic64_read(&mr->odp_stats.faults))) + goto err_table; + if (rdma_nl_stat_hwcounter_entry( + msg, "page_invalidations", + atomic64_read(&mr->odp_stats.invalidations))) + goto err_table; + + nla_nest_end(msg, table_attr); + return 0; + +err_table: + nla_nest_cancel(msg, table_attr); +err: + return -EMSGSIZE; +} + +static int fill_res_mr_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + struct ib_mr *ibmr = container_of(res, struct ib_mr, res); + struct mlx5_ib_mr *mr = to_mmr(ibmr); + struct nlattr *table_attr; + + if (!(mr->access_flags & IB_ACCESS_ON_DEMAND)) + return 0; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + goto err; + + if (mr->is_odp_implicit) { + if (rdma_nl_put_driver_string(msg, "odp", "implicit")) + goto err; + } else { + if (rdma_nl_put_driver_string(msg, "odp", "explicit")) + goto err; + } + + nla_nest_end(msg, table_attr); + return 0; + +err: + nla_nest_cancel(msg, table_attr); + return -EMSGSIZE; +} + +int mlx5_ib_fill_res_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + if (res->type == RDMA_RESTRACK_MR) + return fill_res_mr_entry(msg, res); + + return 0; +} + +int mlx5_ib_fill_stat_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + if (res->type == RDMA_RESTRACK_MR) + return fill_stat_mr_entry(msg, res); + + return 0; +} diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 4e7fde86c96b..62939df3c692 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -80,7 +80,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE); - srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0, 0); + srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0); if (IS_ERR(srq->umem)) { mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size); err = PTR_ERR(srq->umem); diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index bfd4eebc1182..599794c5a78f 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -576,14 +576,10 @@ enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port); int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); -int mthca_process_mad(struct ib_device *ibdev, - int mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index); +int mthca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index); int mthca_create_agents(struct mthca_dev *dev); void mthca_free_agents(struct mthca_dev *dev); diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 7ad517da4917..99aa8183a7f2 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -196,30 +196,19 @@ static void forward_trap(struct mthca_dev *dev, } } -int mthca_process_mad(struct ib_device *ibdev, - int mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) +int mthca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index) { int err; u16 slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); u16 prev_lid = 0; struct ib_port_attr pattr; - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; - - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad))) - return IB_MAD_RESULT_FAILURE; /* Forward locally generated traps to the SM */ - if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && - slid == 0) { - forward_trap(to_mdev(ibdev), port_num, in_mad); + if (in->mad_hdr.method == IB_MGMT_METHOD_TRAP && !slid) { + forward_trap(to_mdev(ibdev), port_num, in); return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; } @@ -229,40 +218,39 @@ int mthca_process_mad(struct ib_device *ibdev, * Only handle PMA and Mellanox vendor-specific class gets and * sets for other classes. */ - if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || - in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_SET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS) + if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + if (in->mad_hdr.method != IB_MGMT_METHOD_GET && + in->mad_hdr.method != IB_MGMT_METHOD_SET && + in->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS) return IB_MAD_RESULT_SUCCESS; /* * Don't process SMInfo queries or vendor-specific * MADs -- the SMA can't handle them. */ - if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO || - ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) == + if (in->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO || + ((in->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) == IB_SMP_ATTR_VENDOR_MASK)) return IB_MAD_RESULT_SUCCESS; - } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || - in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS1 || - in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS2) { - if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && - in_mad->mad_hdr.method != IB_MGMT_METHOD_SET) + } else if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || + in->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS1 || + in->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS2) { + if (in->mad_hdr.method != IB_MGMT_METHOD_GET && + in->mad_hdr.method != IB_MGMT_METHOD_SET) return IB_MAD_RESULT_SUCCESS; } else return IB_MAD_RESULT_SUCCESS; - if ((in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || - in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && - in_mad->mad_hdr.method == IB_MGMT_METHOD_SET && - in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && + if ((in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && + in->mad_hdr.method == IB_MGMT_METHOD_SET && + in->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && !ib_query_port(ibdev, port_num, &pattr)) prev_lid = ib_lid_cpu16(pattr.lid); - err = mthca_MAD_IFC(to_mdev(ibdev), - mad_flags & IB_MAD_IGNORE_MKEY, - mad_flags & IB_MAD_IGNORE_BKEY, - port_num, in_wc, in_grh, in_mad, out_mad); + err = mthca_MAD_IFC(to_mdev(ibdev), mad_flags & IB_MAD_IGNORE_MKEY, + mad_flags & IB_MAD_IGNORE_BKEY, port_num, in_wc, + in_grh, in, out); if (err == -EBADMSG) return IB_MAD_RESULT_SUCCESS; else if (err) { @@ -270,16 +258,16 @@ int mthca_process_mad(struct ib_device *ibdev, return IB_MAD_RESULT_FAILURE; } - if (!out_mad->mad_hdr.status) { - smp_snoop(ibdev, port_num, in_mad, prev_lid); - node_desc_override(ibdev, out_mad); + if (!out->mad_hdr.status) { + smp_snoop(ibdev, port_num, in, prev_lid); + node_desc_override(ibdev, out); } /* set return bit in status of directed route responses */ - if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) - out_mad->mad_hdr.status |= cpu_to_be16(1 << 15); + if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + out->mad_hdr.status |= cpu_to_be16(1 << 15); - if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) + if (in->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) /* no response for trap repress */ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 23554d8bf241..33002530fee7 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -880,9 +880,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); - mr->umem = ib_umem_get(udata, start, length, acc, - ucmd.mr_attrs & MTHCA_MR_DMASYNC); - + mr->umem = ib_umem_get(udata, start, length, acc); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); goto err; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 8d3e36d548aa..2b7f00ac41b0 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -247,35 +247,20 @@ int ocrdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) return 0; } -int ocrdma_process_mad(struct ib_device *ibdev, - int process_mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, +int ocrdma_process_mad(struct ib_device *ibdev, int process_mad_flags, + u8 port_num, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, const struct ib_mad *in, + struct ib_mad *out, size_t *out_mad_size, u16 *out_mad_pkey_index) { - int status; + int status = IB_MAD_RESULT_SUCCESS; struct ocrdma_dev *dev; - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; - - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad))) - return IB_MAD_RESULT_FAILURE; - switch (in_mad->mad_hdr.mgmt_class) { - case IB_MGMT_CLASS_PERF_MGMT: + if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) { dev = get_ocrdma_dev(ibdev); - if (!ocrdma_pma_counters(dev, out_mad)) - status = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; - else - status = IB_MAD_RESULT_SUCCESS; - break; - default: - status = IB_MAD_RESULT_SUCCESS; - break; + ocrdma_pma_counters(dev, out); + status |= IB_MAD_RESULT_REPLY; } + return status; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index 64cb82c08664..9780afcde780 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -56,12 +56,9 @@ int ocrdma_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags, void ocrdma_destroy_ah(struct ib_ah *ah, u32 flags); int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); -int ocrdma_process_mad(struct ib_device *, - int process_mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, +int ocrdma_process_mad(struct ib_device *dev, int process_mad_flags, + u8 port_num, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, const struct ib_mad *in, + struct ib_mad *out, size_t *out_mad_size, u16 *out_mad_pkey_index); #endif /* __OCRDMA_AH_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index c15cfc6cef81..d8c47d24d6d6 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -166,7 +166,6 @@ static const struct ib_device_ops ocrdma_dev_ops = { .get_port_immutable = ocrdma_port_immutable, .map_mr_sg = ocrdma_map_mr_sg, .mmap = ocrdma_mmap, - .modify_port = ocrdma_modify_port, .modify_qp = ocrdma_modify_qp, .poll_cq = ocrdma_poll_cq, .post_recv = ocrdma_post_recv, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 6ef89c226ad8..c2e0d0fa44be 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -2034,7 +2034,7 @@ struct ocrdma_rx_stats { }; struct ocrdma_rx_qp_err_stats { - u32 nak_invalid_requst_errors; + u32 nak_invalid_request_errors; u32 nak_remote_operation_errors; u32 nak_count_remote_access_errors; u32 local_length_errors; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c index a902942adb5d..5f831e3bdbad 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -423,8 +423,8 @@ static char *ocrdma_rxqp_errstats(struct ocrdma_dev *dev) memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); pcur = stats; - pcur += ocrdma_add_stat(stats, pcur, "nak_invalid_requst_errors", - (u64)rx_qp_err_stats->nak_invalid_requst_errors); + pcur += ocrdma_add_stat(stats, pcur, "nak_invalid_request_errors", + (u64)rx_qp_err_stats->nak_invalid_request_errors); pcur += ocrdma_add_stat(stats, pcur, "nak_remote_operation_errors", (u64)rx_qp_err_stats->nak_remote_operation_errors); pcur += ocrdma_add_stat(stats, pcur, "nak_count_remote_access_errors", @@ -670,12 +670,10 @@ err: return -EFAULT; } -int ocrdma_pma_counters(struct ocrdma_dev *dev, - struct ib_mad *out_mad) +void ocrdma_pma_counters(struct ocrdma_dev *dev, struct ib_mad *out_mad) { struct ib_pma_portcounters *pma_cnt; - memset(out_mad->data, 0, sizeof out_mad->data); pma_cnt = (void *)(out_mad->data + 40); ocrdma_update_stats(dev); @@ -683,7 +681,6 @@ int ocrdma_pma_counters(struct ocrdma_dev *dev, pma_cnt->port_rcv_data = cpu_to_be32(ocrdma_sysfs_rcv_data(dev)); pma_cnt->port_xmit_packets = cpu_to_be32(ocrdma_sysfs_xmit_pkts(dev)); pma_cnt->port_rcv_packets = cpu_to_be32(ocrdma_sysfs_rcv_pkts(dev)); - return 0; } static ssize_t ocrdma_dbgfs_ops_read(struct file *filp, char __user *buffer, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h index bba1fec4f11f..98feca26ac55 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h @@ -69,7 +69,6 @@ bool ocrdma_alloc_stats_resources(struct ocrdma_dev *dev); void ocrdma_release_stats_resources(struct ocrdma_dev *dev); void ocrdma_rem_port_stats(struct ocrdma_dev *dev); void ocrdma_add_port_stats(struct ocrdma_dev *dev); -int ocrdma_pma_counters(struct ocrdma_dev *dev, - struct ib_mad *out_mad); +void ocrdma_pma_counters(struct ocrdma_dev *dev, struct ib_mad *out_mad); #endif /* __OCRDMA_STATS_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index e8267e590772..9bc1ca6f6f9e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -190,12 +190,6 @@ int ocrdma_query_port(struct ib_device *ibdev, return 0; } -int ocrdma_modify_port(struct ib_device *ibdev, u8 port, int mask, - struct ib_port_modify *props) -{ - return 0; -} - static int ocrdma_add_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr, unsigned long len) { @@ -875,7 +869,7 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(status); - mr->umem = ib_umem_get(udata, start, len, acc, 0); + mr->umem = ib_umem_get(udata, start, len, acc); if (IS_ERR(mr->umem)) { status = -EFAULT; goto umem_err; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index 32488da1b752..3a5010881be5 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -54,8 +54,6 @@ int ocrdma_arm_cq(struct ib_cq *, enum ib_cq_notify_flags flags); int ocrdma_query_device(struct ib_device *, struct ib_device_attr *props, struct ib_udata *uhw); int ocrdma_query_port(struct ib_device *, u8 port, struct ib_port_attr *props); -int ocrdma_modify_port(struct ib_device *, u8 port, int mask, - struct ib_port_modify *props); enum rdma_protocol_type ocrdma_query_protocol(struct ib_device *device, u8 port_num); diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index dc71b6e16a07..dcdc85a1ab25 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -212,7 +212,7 @@ static const struct ib_device_ops qedr_dev_ops = { .get_link_layer = qedr_link_layer, .map_mr_sg = qedr_map_mr_sg, .mmap = qedr_mmap, - .modify_port = qedr_modify_port, + .mmap_free = qedr_mmap_free, .modify_qp = qedr_modify_qp, .modify_srq = qedr_modify_srq, .poll_cq = qedr_poll_cq, @@ -357,9 +357,10 @@ static int qedr_alloc_resources(struct qedr_dev *dev) return -ENOMEM; spin_lock_init(&dev->sgid_lock); + xa_init_flags(&dev->srqs, XA_FLAGS_LOCK_IRQ); if (IS_IWARP(dev)) { - xa_init_flags(&dev->qps, XA_FLAGS_LOCK_IRQ); + xa_init(&dev->qps); dev->iwarp_wq = create_singlethread_workqueue("qedr_iwarpq"); } diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 0cfd849b13d6..5488dbd59d3c 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -40,6 +40,7 @@ #include <linux/qed/qed_rdma_if.h> #include <linux/qed/qede_rdma.h> #include <linux/qed/roce_common.h> +#include <linux/completion.h> #include "qedr_hsi_rdma.h" #define QEDR_NODE_DESC "QLogic 579xx RoCE HCA" @@ -230,14 +231,16 @@ struct qedr_ucontext { struct qedr_dev *dev; struct qedr_pd *pd; void __iomem *dpi_addr; + struct rdma_user_mmap_entry *db_mmap_entry; u64 dpi_phys_addr; u32 dpi_size; u16 dpi; + bool db_rec; +}; - struct list_head mm_head; - - /* Lock to protect mm list */ - struct mutex mm_list_lock; +union db_prod32 { + struct rdma_pwm_val16_data data; + u32 raw; }; union db_prod64 { @@ -265,6 +268,13 @@ struct qedr_userq { struct qedr_pbl *pbl_tbl; u64 buf_addr; size_t buf_len; + + /* doorbell recovery */ + void __iomem *db_addr; + struct qedr_user_db_rec *db_rec_data; + struct rdma_user_mmap_entry *db_mmap_entry; + void __iomem *db_rec_db2_addr; + union db_prod32 db_rec_db2_data; }; struct qedr_cq { @@ -300,19 +310,6 @@ struct qedr_pd { struct qedr_ucontext *uctx; }; -struct qedr_mm { - struct { - u64 phy_addr; - unsigned long len; - } key; - struct list_head entry; -}; - -union db_prod32 { - struct rdma_pwm_val16_data data; - u32 raw; -}; - struct qedr_qp_hwq_info { /* WQE Elements */ struct qed_chain pbl; @@ -377,10 +374,20 @@ enum qedr_qp_err_bitmap { QEDR_QP_ERR_RQ_PBL_FULL = 32, }; +enum qedr_qp_create_type { + QEDR_QP_CREATE_NONE, + QEDR_QP_CREATE_USER, + QEDR_QP_CREATE_KERNEL, +}; + +enum qedr_iwarp_cm_flags { + QEDR_IWARP_CM_WAIT_FOR_CONNECT = BIT(0), + QEDR_IWARP_CM_WAIT_FOR_DISCONNECT = BIT(1), +}; + struct qedr_qp { struct ib_qp ibqp; /* must be first */ struct qedr_dev *dev; - struct qedr_iw_ep *ep; struct qedr_qp_hwq_info sq; struct qedr_qp_hwq_info rq; @@ -395,6 +402,7 @@ struct qedr_qp { u32 id; struct qedr_pd *pd; enum ib_qp_type qp_type; + enum qedr_qp_create_type create_type; struct qed_rdma_qp *qed_qp; u32 qp_id; u16 icid; @@ -437,8 +445,11 @@ struct qedr_qp { /* Relevant to qps created from user space only (applications) */ struct qedr_userq usq; struct qedr_userq urq; - atomic_t refcnt; - bool destroyed; + + /* synchronization objects used with iwarp ep */ + struct kref refcnt; + struct completion iwarp_cm_comp; + unsigned long iwarp_cm_flags; /* enum iwarp_cm_flags */ }; struct qedr_ah { @@ -476,6 +487,18 @@ struct qedr_mr { u32 npages; }; +struct qedr_user_mmap_entry { + struct rdma_user_mmap_entry rdma_entry; + struct qedr_dev *dev; + union { + u64 io_address; + void *address; + }; + size_t length; + u16 dpi; + u8 mmap_flag; +}; + #define SET_FIELD2(value, name, flag) ((value) |= ((flag) << (name ## _SHIFT))) #define QEDR_RESP_IMM (RDMA_CQE_RESPONDER_IMM_FLG_MASK << \ @@ -531,7 +554,7 @@ struct qedr_iw_ep { struct iw_cm_id *cm_id; struct qedr_qp *qp; void *qed_context; - u8 during_connect; + struct kref refcnt; }; static inline @@ -574,4 +597,11 @@ static inline struct qedr_srq *get_qedr_srq(struct ib_srq *ibsrq) { return container_of(ibsrq, struct qedr_srq, ibsrq); } + +static inline struct qedr_user_mmap_entry * +get_qedr_mmap_entry(struct rdma_user_mmap_entry *rdma_entry) +{ + return container_of(rdma_entry, struct qedr_user_mmap_entry, + rdma_entry); +} #endif diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 22881d4442b9..792eecd206b6 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -79,6 +79,27 @@ qedr_fill_sockaddr6(const struct qed_iwarp_cm_info *cm_info, } } +static void qedr_iw_free_qp(struct kref *ref) +{ + struct qedr_qp *qp = container_of(ref, struct qedr_qp, refcnt); + + kfree(qp); +} + +static void +qedr_iw_free_ep(struct kref *ref) +{ + struct qedr_iw_ep *ep = container_of(ref, struct qedr_iw_ep, refcnt); + + if (ep->qp) + kref_put(&ep->qp->refcnt, qedr_iw_free_qp); + + if (ep->cm_id) + ep->cm_id->rem_ref(ep->cm_id); + + kfree(ep); +} + static void qedr_iw_mpa_request(void *context, struct qed_iwarp_cm_event_params *params) { @@ -93,6 +114,7 @@ qedr_iw_mpa_request(void *context, struct qed_iwarp_cm_event_params *params) ep->dev = dev; ep->qed_context = params->ep_context; + kref_init(&ep->refcnt); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CONNECT_REQUEST; @@ -141,12 +163,10 @@ qedr_iw_close_event(void *context, struct qed_iwarp_cm_event_params *params) { struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; - if (ep->cm_id) { + if (ep->cm_id) qedr_iw_issue_event(context, params, IW_CM_EVENT_CLOSE); - ep->cm_id->rem_ref(ep->cm_id); - ep->cm_id = NULL; - } + kref_put(&ep->refcnt, qedr_iw_free_ep); } static void @@ -186,11 +206,13 @@ static void qedr_iw_disconnect_worker(struct work_struct *work) struct qedr_qp *qp = ep->qp; struct iw_cm_event event; - if (qp->destroyed) { - kfree(dwork); - qedr_iw_qp_rem_ref(&qp->ibqp); - return; - } + /* The qp won't be released until we release the ep. + * the ep's refcnt was increased before calling this + * function, therefore it is safe to access qp + */ + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT, + &qp->iwarp_cm_flags)) + goto out; memset(&event, 0, sizeof(event)); event.status = dwork->status; @@ -204,7 +226,6 @@ static void qedr_iw_disconnect_worker(struct work_struct *work) else qp_params.new_state = QED_ROCE_QP_STATE_SQD; - kfree(dwork); if (ep->cm_id) ep->cm_id->event_handler(ep->cm_id, &event); @@ -214,7 +235,10 @@ static void qedr_iw_disconnect_worker(struct work_struct *work) dev->ops->rdma_modify_qp(dev->rdma_ctx, qp->qed_qp, &qp_params); - qedr_iw_qp_rem_ref(&qp->ibqp); + complete(&ep->qp->iwarp_cm_comp); +out: + kfree(dwork); + kref_put(&ep->refcnt, qedr_iw_free_ep); } static void @@ -224,13 +248,17 @@ qedr_iw_disconnect_event(void *context, struct qedr_discon_work *work; struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; struct qedr_dev *dev = ep->dev; - struct qedr_qp *qp = ep->qp; work = kzalloc(sizeof(*work), GFP_ATOMIC); if (!work) return; - qedr_iw_qp_add_ref(&qp->ibqp); + /* We can't get a close event before disconnect, but since + * we're scheduling a work queue we need to make sure close + * won't delete the ep, so we increase the refcnt + */ + kref_get(&ep->refcnt); + work->ep = ep; work->event = params->event; work->status = params->status; @@ -252,16 +280,30 @@ qedr_iw_passive_complete(void *context, if ((params->status == -ECONNREFUSED) && (!ep->qp)) { DP_DEBUG(dev, QEDR_MSG_IWARP, "PASSIVE connection refused releasing ep...\n"); - kfree(ep); + kref_put(&ep->refcnt, qedr_iw_free_ep); return; } + complete(&ep->qp->iwarp_cm_comp); qedr_iw_issue_event(context, params, IW_CM_EVENT_ESTABLISHED); if (params->status < 0) qedr_iw_close_event(context, params); } +static void +qedr_iw_active_complete(void *context, + struct qed_iwarp_cm_event_params *params) +{ + struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; + + complete(&ep->qp->iwarp_cm_comp); + qedr_iw_issue_event(context, params, IW_CM_EVENT_CONNECT_REPLY); + + if (params->status < 0) + kref_put(&ep->refcnt, qedr_iw_free_ep); +} + static int qedr_iw_mpa_reply(void *context, struct qed_iwarp_cm_event_params *params) { @@ -288,27 +330,15 @@ qedr_iw_event_handler(void *context, struct qed_iwarp_cm_event_params *params) qedr_iw_mpa_reply(context, params); break; case QED_IWARP_EVENT_PASSIVE_COMPLETE: - ep->during_connect = 0; qedr_iw_passive_complete(context, params); break; - case QED_IWARP_EVENT_ACTIVE_COMPLETE: - ep->during_connect = 0; - qedr_iw_issue_event(context, - params, - IW_CM_EVENT_CONNECT_REPLY); - if (params->status < 0) { - struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context; - - ep->cm_id->rem_ref(ep->cm_id); - ep->cm_id = NULL; - } + qedr_iw_active_complete(context, params); break; case QED_IWARP_EVENT_DISCONNECT: qedr_iw_disconnect_event(context, params); break; case QED_IWARP_EVENT_CLOSE: - ep->during_connect = 0; qedr_iw_close_event(context, params); break; case QED_IWARP_EVENT_RQ_EMPTY: @@ -451,10 +481,10 @@ qedr_addr6_resolve(struct qedr_dev *dev, if ((!dst) || dst->error) { if (dst) { - dst_release(dst); DP_ERR(dev, "ip6_route_output returned dst->error = %d\n", dst->error); + dst_release(dst); } return -EINVAL; } @@ -476,6 +506,19 @@ qedr_addr6_resolve(struct qedr_dev *dev, return rc; } +static struct qedr_qp *qedr_iw_load_qp(struct qedr_dev *dev, u32 qpn) +{ + struct qedr_qp *qp; + + xa_lock(&dev->qps); + qp = xa_load(&dev->qps, qpn); + if (qp) + kref_get(&qp->refcnt); + xa_unlock(&dev->qps); + + return qp; +} + int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { struct qedr_dev *dev = get_qedr_dev(cm_id->device); @@ -491,10 +534,6 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) int rc = 0; int i; - qp = xa_load(&dev->qps, conn_param->qpn); - if (unlikely(!qp)) - return -EINVAL; - laddr = (struct sockaddr_in *)&cm_id->m_local_addr; raddr = (struct sockaddr_in *)&cm_id->m_remote_addr; laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr; @@ -516,8 +555,15 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) return -ENOMEM; ep->dev = dev; + kref_init(&ep->refcnt); + + qp = qedr_iw_load_qp(dev, conn_param->qpn); + if (!qp) { + rc = -EINVAL; + goto err; + } + ep->qp = qp; - qp->ep = ep; cm_id->add_ref(cm_id); ep->cm_id = cm_id; @@ -580,16 +626,20 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) in_params.qp = qp->qed_qp; memcpy(in_params.local_mac_addr, dev->ndev->dev_addr, ETH_ALEN); - ep->during_connect = 1; + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT, + &qp->iwarp_cm_flags)) + goto err; /* QP already being destroyed */ + rc = dev->ops->iwarp_connect(dev->rdma_ctx, &in_params, &out_params); - if (rc) + if (rc) { + complete(&qp->iwarp_cm_comp); goto err; + } return rc; err: - cm_id->rem_ref(cm_id); - kfree(ep); + kref_put(&ep->refcnt, qedr_iw_free_ep); return rc; } @@ -677,18 +727,17 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct qedr_dev *dev = ep->dev; struct qedr_qp *qp; struct qed_iwarp_accept_in params; - int rc; + int rc = 0; DP_DEBUG(dev, QEDR_MSG_IWARP, "Accept on qpid=%d\n", conn_param->qpn); - qp = xa_load(&dev->qps, conn_param->qpn); + qp = qedr_iw_load_qp(dev, conn_param->qpn); if (!qp) { DP_ERR(dev, "Invalid QP number %d\n", conn_param->qpn); return -EINVAL; } ep->qp = qp; - qp->ep = ep; cm_id->add_ref(cm_id); ep->cm_id = cm_id; @@ -700,15 +749,21 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) params.ird = conn_param->ird; params.ord = conn_param->ord; - ep->during_connect = 1; + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT, + &qp->iwarp_cm_flags)) + goto err; /* QP already destroyed */ + rc = dev->ops->iwarp_accept(dev->rdma_ctx, ¶ms); - if (rc) + if (rc) { + complete(&qp->iwarp_cm_comp); goto err; + } return rc; + err: - ep->during_connect = 0; - cm_id->rem_ref(cm_id); + kref_put(&ep->refcnt, qedr_iw_free_ep); + return rc; } @@ -731,17 +786,14 @@ void qedr_iw_qp_add_ref(struct ib_qp *ibqp) { struct qedr_qp *qp = get_qedr_qp(ibqp); - atomic_inc(&qp->refcnt); + kref_get(&qp->refcnt); } void qedr_iw_qp_rem_ref(struct ib_qp *ibqp) { struct qedr_qp *qp = get_qedr_qp(ibqp); - if (atomic_dec_and_test(&qp->refcnt)) { - xa_erase_irq(&qp->dev->qps, qp->qp_id); - kfree(qp); - } + kref_put(&qp->refcnt, qedr_iw_free_qp); } struct ib_qp *qedr_iw_get_qp(struct ib_device *ibdev, int qpn) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 6f3ce86019b7..4cd292966aa9 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -51,6 +51,7 @@ #include "verbs.h" #include <rdma/qedr-abi.h> #include "qedr_roce_cm.h" +#include "qedr_iw_cm.h" #define QEDR_SRQ_WQE_ELEM_SIZE sizeof(union rdma_srq_elm) #define RDMA_MAX_SGE_PER_SRQ (4) @@ -58,6 +59,11 @@ #define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT) +enum { + QEDR_USER_MMAP_IO_WC = 0, + QEDR_USER_MMAP_PHYS_PAGE, +}; + static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len) { @@ -250,78 +256,31 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) return 0; } -int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask, - struct ib_port_modify *props) -{ - return 0; -} - -static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr, - unsigned long len) -{ - struct qedr_mm *mm; - - mm = kzalloc(sizeof(*mm), GFP_KERNEL); - if (!mm) - return -ENOMEM; - - mm->key.phy_addr = phy_addr; - /* This function might be called with a length which is not a multiple - * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel - * forces this granularity by increasing the requested size if needed. - * When qedr_mmap is called, it will search the list with the updated - * length as a key. To prevent search failures, the length is rounded up - * in advance to PAGE_SIZE. - */ - mm->key.len = roundup(len, PAGE_SIZE); - INIT_LIST_HEAD(&mm->entry); - - mutex_lock(&uctx->mm_list_lock); - list_add(&mm->entry, &uctx->mm_head); - mutex_unlock(&uctx->mm_list_lock); - - DP_DEBUG(uctx->dev, QEDR_MSG_MISC, - "added (addr=0x%llx,len=0x%lx) for ctx=%p\n", - (unsigned long long)mm->key.phy_addr, - (unsigned long)mm->key.len, uctx); - - return 0; -} - -static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr, - unsigned long len) -{ - bool found = false; - struct qedr_mm *mm; - - mutex_lock(&uctx->mm_list_lock); - list_for_each_entry(mm, &uctx->mm_head, entry) { - if (len != mm->key.len || phy_addr != mm->key.phy_addr) - continue; - - found = true; - break; - } - mutex_unlock(&uctx->mm_list_lock); - DP_DEBUG(uctx->dev, QEDR_MSG_MISC, - "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n", - mm->key.phy_addr, mm->key.len, uctx, found); - - return found; -} - int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { struct ib_device *ibdev = uctx->device; int rc; struct qedr_ucontext *ctx = get_qedr_ucontext(uctx); struct qedr_alloc_ucontext_resp uresp = {}; + struct qedr_alloc_ucontext_req ureq = {}; struct qedr_dev *dev = get_qedr_dev(ibdev); struct qed_rdma_add_user_out_params oparams; + struct qedr_user_mmap_entry *entry; if (!udata) return -EFAULT; + if (udata->inlen) { + rc = ib_copy_from_udata(&ureq, udata, + min(sizeof(ureq), udata->inlen)); + if (rc) { + DP_ERR(dev, "Problem copying data from user space\n"); + return -EFAULT; + } + + ctx->db_rec = !!(ureq.context_flags & QEDR_ALLOC_UCTX_DB_REC); + } + rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams); if (rc) { DP_ERR(dev, @@ -334,13 +293,29 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) ctx->dpi_addr = oparams.dpi_addr; ctx->dpi_phys_addr = oparams.dpi_phys_addr; ctx->dpi_size = oparams.dpi_size; - INIT_LIST_HEAD(&ctx->mm_head); - mutex_init(&ctx->mm_list_lock); + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + rc = -ENOMEM; + goto err; + } + + entry->io_address = ctx->dpi_phys_addr; + entry->length = ctx->dpi_size; + entry->mmap_flag = QEDR_USER_MMAP_IO_WC; + entry->dpi = ctx->dpi; + entry->dev = dev; + rc = rdma_user_mmap_entry_insert(uctx, &entry->rdma_entry, + ctx->dpi_size); + if (rc) { + kfree(entry); + goto err; + } + ctx->db_mmap_entry = &entry->rdma_entry; uresp.dpm_enabled = dev->user_dpm_enabled; uresp.wids_enabled = 1; uresp.wid_count = oparams.wid_count; - uresp.db_pa = ctx->dpi_phys_addr; + uresp.db_pa = rdma_user_mmap_get_offset(ctx->db_mmap_entry); uresp.db_size = ctx->dpi_size; uresp.max_send_wr = dev->attr.max_sqe; uresp.max_recv_wr = dev->attr.max_rqe; @@ -352,82 +327,92 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rc) - return rc; + goto err; ctx->dev = dev; - rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size); - if (rc) - return rc; - DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n", &ctx->ibucontext); return 0; + +err: + if (!ctx->db_mmap_entry) + dev->ops->rdma_remove_user(dev->rdma_ctx, ctx->dpi); + else + rdma_user_mmap_entry_remove(ctx->db_mmap_entry); + + return rc; } void qedr_dealloc_ucontext(struct ib_ucontext *ibctx) { struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx); - struct qedr_mm *mm, *tmp; DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n", uctx); - uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi); - list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) { - DP_DEBUG(uctx->dev, QEDR_MSG_MISC, - "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n", - mm->key.phy_addr, mm->key.len, uctx); - list_del(&mm->entry); - kfree(mm); - } + rdma_user_mmap_entry_remove(uctx->db_mmap_entry); } -int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry) { - struct qedr_ucontext *ucontext = get_qedr_ucontext(context); - struct qedr_dev *dev = get_qedr_dev(context->device); - unsigned long phys_addr = vma->vm_pgoff << PAGE_SHIFT; - unsigned long len = (vma->vm_end - vma->vm_start); - unsigned long dpi_start; + struct qedr_user_mmap_entry *entry = get_qedr_mmap_entry(rdma_entry); + struct qedr_dev *dev = entry->dev; - dpi_start = dev->db_phys_addr + (ucontext->dpi * ucontext->dpi_size); + if (entry->mmap_flag == QEDR_USER_MMAP_PHYS_PAGE) + free_page((unsigned long)entry->address); + else if (entry->mmap_flag == QEDR_USER_MMAP_IO_WC) + dev->ops->rdma_remove_user(dev->rdma_ctx, entry->dpi); - DP_DEBUG(dev, QEDR_MSG_INIT, - "mmap invoked with vm_start=0x%pK, vm_end=0x%pK,vm_pgoff=0x%pK; dpi_start=0x%pK dpi_size=0x%x\n", - (void *)vma->vm_start, (void *)vma->vm_end, - (void *)vma->vm_pgoff, (void *)dpi_start, ucontext->dpi_size); + kfree(entry); +} - if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) { - DP_ERR(dev, - "failed mmap, addresses must be page aligned: start=0x%pK, end=0x%pK\n", - (void *)vma->vm_start, (void *)vma->vm_end); - return -EINVAL; - } +int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma) +{ + struct ib_device *dev = ucontext->device; + size_t length = vma->vm_end - vma->vm_start; + struct rdma_user_mmap_entry *rdma_entry; + struct qedr_user_mmap_entry *entry; + int rc = 0; + u64 pfn; - if (!qedr_search_mmap(ucontext, phys_addr, len)) { - DP_ERR(dev, "failed mmap, vm_pgoff=0x%lx is not authorized\n", - vma->vm_pgoff); - return -EINVAL; - } + ibdev_dbg(dev, + "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n", + vma->vm_start, vma->vm_end, length, vma->vm_pgoff); - if (phys_addr < dpi_start || - ((phys_addr + len) > (dpi_start + ucontext->dpi_size))) { - DP_ERR(dev, - "failed mmap, pages are outside of dpi; page address=0x%pK, dpi_start=0x%pK, dpi_size=0x%x\n", - (void *)phys_addr, (void *)dpi_start, - ucontext->dpi_size); + rdma_entry = rdma_user_mmap_entry_get(ucontext, vma); + if (!rdma_entry) { + ibdev_dbg(dev, "pgoff[%#lx] does not have valid entry\n", + vma->vm_pgoff); return -EINVAL; } - - if (vma->vm_flags & VM_READ) { - DP_ERR(dev, "failed mmap, cannot map doorbell bar for read\n"); - return -EINVAL; + entry = get_qedr_mmap_entry(rdma_entry); + ibdev_dbg(dev, + "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n", + entry->io_address, length, entry->mmap_flag); + + switch (entry->mmap_flag) { + case QEDR_USER_MMAP_IO_WC: + pfn = entry->io_address >> PAGE_SHIFT; + rc = rdma_user_mmap_io(ucontext, vma, pfn, length, + pgprot_writecombine(vma->vm_page_prot), + rdma_entry); + break; + case QEDR_USER_MMAP_PHYS_PAGE: + rc = vm_insert_page(vma, vma->vm_start, + virt_to_page(entry->address)); + break; + default: + rc = -EINVAL; } - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); - return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len, - vma->vm_page_prot); + if (rc) + ibdev_dbg(dev, + "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n", + entry->io_address, length, entry->mmap_flag, rc); + + rdma_user_mmap_entry_put(rdma_entry); + return rc; } int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) @@ -657,16 +642,50 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem, } } +static int qedr_db_recovery_add(struct qedr_dev *dev, + void __iomem *db_addr, + void *db_data, + enum qed_db_rec_width db_width, + enum qed_db_rec_space db_space) +{ + if (!db_data) { + DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n"); + return 0; + } + + return dev->ops->common->db_recovery_add(dev->cdev, db_addr, db_data, + db_width, db_space); +} + +static void qedr_db_recovery_del(struct qedr_dev *dev, + void __iomem *db_addr, + void *db_data) +{ + if (!db_data) { + DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n"); + return; + } + + /* Ignore return code as there is not much we can do about it. Error + * log will be printed inside. + */ + dev->ops->common->db_recovery_del(dev->cdev, db_addr, db_data); +} + static int qedr_copy_cq_uresp(struct qedr_dev *dev, - struct qedr_cq *cq, struct ib_udata *udata) + struct qedr_cq *cq, struct ib_udata *udata, + u32 db_offset) { struct qedr_create_cq_uresp uresp; int rc; memset(&uresp, 0, sizeof(uresp)); - uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT); + uresp.db_offset = db_offset; uresp.icid = cq->icid; + if (cq->q.db_mmap_entry) + uresp.db_rec_addr = + rdma_user_mmap_get_offset(cq->q.db_mmap_entry); rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); if (rc) @@ -694,10 +713,58 @@ static inline int qedr_align_cq_entries(int entries) return aligned_size / QEDR_CQE_SIZE; } +static int qedr_init_user_db_rec(struct ib_udata *udata, + struct qedr_dev *dev, struct qedr_userq *q, + bool requires_db_rec) +{ + struct qedr_ucontext *uctx = + rdma_udata_to_drv_context(udata, struct qedr_ucontext, + ibucontext); + struct qedr_user_mmap_entry *entry; + int rc; + + /* Aborting for non doorbell userqueue (SRQ) or non-supporting lib */ + if (requires_db_rec == 0 || !uctx->db_rec) + return 0; + + /* Allocate a page for doorbell recovery, add to mmap */ + q->db_rec_data = (void *)get_zeroed_page(GFP_USER); + if (!q->db_rec_data) { + DP_ERR(dev, "get_zeroed_page failed\n"); + return -ENOMEM; + } + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + goto err_free_db_data; + + entry->address = q->db_rec_data; + entry->length = PAGE_SIZE; + entry->mmap_flag = QEDR_USER_MMAP_PHYS_PAGE; + rc = rdma_user_mmap_entry_insert(&uctx->ibucontext, + &entry->rdma_entry, + PAGE_SIZE); + if (rc) + goto err_free_entry; + + q->db_mmap_entry = &entry->rdma_entry; + + return 0; + +err_free_entry: + kfree(entry); + +err_free_db_data: + free_page((unsigned long)q->db_rec_data); + q->db_rec_data = NULL; + return -ENOMEM; +} + static inline int qedr_init_user_queue(struct ib_udata *udata, struct qedr_dev *dev, struct qedr_userq *q, u64 buf_addr, - size_t buf_len, int access, int dmasync, + size_t buf_len, bool requires_db_rec, + int access, int alloc_and_init) { u32 fw_pages; @@ -705,7 +772,7 @@ static inline int qedr_init_user_queue(struct ib_udata *udata, q->buf_addr = buf_addr; q->buf_len = buf_len; - q->umem = ib_umem_get(udata, q->buf_addr, q->buf_len, access, dmasync); + q->umem = ib_umem_get(udata, q->buf_addr, q->buf_len, access); if (IS_ERR(q->umem)) { DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n", PTR_ERR(q->umem)); @@ -735,7 +802,8 @@ static inline int qedr_init_user_queue(struct ib_udata *udata, } } - return 0; + /* mmap the user address used to store doorbell data for recovery */ + return qedr_init_user_db_rec(udata, dev, q, requires_db_rec); err0: ib_umem_release(q->umem); @@ -821,6 +889,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, int entries = attr->cqe; struct qedr_cq *cq = get_qedr_cq(ibcq); int chain_entries; + u32 db_offset; int page_cnt; u64 pbl_ptr; u16 icid; @@ -840,8 +909,12 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, chain_entries = qedr_align_cq_entries(entries); chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES); + /* calc db offset. user will add DPI base, kernel will add db addr */ + db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT); + if (udata) { - if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) { + if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), + udata->inlen))) { DP_ERR(dev, "create cq: problem copying data from user space\n"); goto err0; @@ -856,7 +929,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, cq->cq_type = QEDR_CQ_TYPE_USER; rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr, - ureq.len, IB_ACCESS_LOCAL_WRITE, 1, + ureq.len, true, IB_ACCESS_LOCAL_WRITE, 1); if (rc) goto err0; @@ -865,6 +938,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, page_cnt = cq->q.pbl_info.num_pbes; cq->ibcq.cqe = chain_entries; + cq->q.db_addr = ctx->dpi_addr + db_offset; } else { cq->cq_type = QEDR_CQ_TYPE_KERNEL; @@ -876,7 +950,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, sizeof(union rdma_cqe), &cq->pbl, NULL); if (rc) - goto err1; + goto err0; page_cnt = qed_chain_get_page_cnt(&cq->pbl); pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl); @@ -888,21 +962,28 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, rc = dev->ops->rdma_create_cq(dev->rdma_ctx, ¶ms, &icid); if (rc) - goto err2; + goto err1; cq->icid = icid; cq->sig = QEDR_CQ_MAGIC_NUMBER; spin_lock_init(&cq->cq_lock); if (udata) { - rc = qedr_copy_cq_uresp(dev, cq, udata); + rc = qedr_copy_cq_uresp(dev, cq, udata, db_offset); + if (rc) + goto err2; + + rc = qedr_db_recovery_add(dev, cq->q.db_addr, + &cq->q.db_rec_data->db_data, + DB_REC_WIDTH_64B, + DB_REC_USER); if (rc) - goto err3; + goto err2; + } else { /* Generate doorbell address. */ - cq->db_addr = dev->db_addr + - DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT); cq->db.data.icid = cq->icid; + cq->db_addr = dev->db_addr + db_offset; cq->db.data.params = DB_AGG_CMD_SET << RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT; @@ -912,6 +993,11 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, cq->latest_cqe = NULL; consume_cqe(cq); cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl); + + rc = qedr_db_recovery_add(dev, cq->db_addr, &cq->db.data, + DB_REC_WIDTH_64B, DB_REC_KERNEL); + if (rc) + goto err2; } DP_DEBUG(dev, QEDR_MSG_CQ, @@ -920,18 +1006,19 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, return 0; -err3: +err2: destroy_iparams.icid = cq->icid; dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams, &destroy_oparams); -err2: - if (udata) - qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); - else - dev->ops->common->chain_free(dev->cdev, &cq->pbl); err1: - if (udata) + if (udata) { + qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); ib_umem_release(cq->q.umem); + if (cq->q.db_mmap_entry) + rdma_user_mmap_entry_remove(cq->q.db_mmap_entry); + } else { + dev->ops->common->chain_free(dev->cdev, &cq->pbl); + } err0: return -EINVAL; } @@ -962,8 +1049,10 @@ void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) cq->destroyed = 1; /* GSIs CQs are handled by driver, so they don't exist in the FW */ - if (cq->cq_type == QEDR_CQ_TYPE_GSI) + if (cq->cq_type == QEDR_CQ_TYPE_GSI) { + qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data); return; + } iparams.icid = cq->icid; dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams); @@ -972,6 +1061,14 @@ void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) if (udata) { qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); ib_umem_release(cq->q.umem); + + if (cq->q.db_rec_data) { + qedr_db_recovery_del(dev, cq->q.db_addr, + &cq->q.db_rec_data->db_data); + rdma_user_mmap_entry_remove(cq->q.db_mmap_entry); + } + } else { + qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data); } /* We don't want the IRQ handler to handle a non-existing CQ so we @@ -1136,8 +1233,8 @@ static int qedr_copy_srq_uresp(struct qedr_dev *dev, } static void qedr_copy_rq_uresp(struct qedr_dev *dev, - struct qedr_create_qp_uresp *uresp, - struct qedr_qp *qp) + struct qedr_create_qp_uresp *uresp, + struct qedr_qp *qp) { /* iWARP requires two doorbells per RQ. */ if (rdma_protocol_iwarp(&dev->ibdev, 1)) { @@ -1150,6 +1247,9 @@ static void qedr_copy_rq_uresp(struct qedr_dev *dev, } uresp->rq_icid = qp->icid; + if (qp->urq.db_mmap_entry) + uresp->rq_db_rec_addr = + rdma_user_mmap_get_offset(qp->urq.db_mmap_entry); } static void qedr_copy_sq_uresp(struct qedr_dev *dev, @@ -1163,22 +1263,26 @@ static void qedr_copy_sq_uresp(struct qedr_dev *dev, uresp->sq_icid = qp->icid; else uresp->sq_icid = qp->icid + 1; + + if (qp->usq.db_mmap_entry) + uresp->sq_db_rec_addr = + rdma_user_mmap_get_offset(qp->usq.db_mmap_entry); } static int qedr_copy_qp_uresp(struct qedr_dev *dev, - struct qedr_qp *qp, struct ib_udata *udata) + struct qedr_qp *qp, struct ib_udata *udata, + struct qedr_create_qp_uresp *uresp) { - struct qedr_create_qp_uresp uresp; int rc; - memset(&uresp, 0, sizeof(uresp)); - qedr_copy_sq_uresp(dev, &uresp, qp); - qedr_copy_rq_uresp(dev, &uresp, qp); + memset(uresp, 0, sizeof(*uresp)); + qedr_copy_sq_uresp(dev, uresp, qp); + qedr_copy_rq_uresp(dev, uresp, qp); - uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE; - uresp.qp_id = qp->qp_id; + uresp->atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE; + uresp->qp_id = qp->qp_id; - rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + rc = qedr_ib_copy_to_udata(udata, uresp, sizeof(*uresp)); if (rc) DP_ERR(dev, "create qp: failed a copy to user space with qp icid=0x%x.\n", @@ -1193,7 +1297,10 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev, struct ib_qp_init_attr *attrs) { spin_lock_init(&qp->q_lock); - atomic_set(&qp->refcnt, 1); + if (rdma_protocol_iwarp(&dev->ibdev, 1)) { + kref_init(&qp->refcnt); + init_completion(&qp->iwarp_cm_comp); + } qp->pd = pd; qp->qp_type = attrs->qp_type; qp->max_inline_data = attrs->cap.max_inline_data; @@ -1222,16 +1329,35 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev, qp->sq.max_sges, qp->sq_cq->icid); } -static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp) +static int qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp) { + int rc; + qp->sq.db = dev->db_addr + DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); qp->sq.db_data.data.icid = qp->icid + 1; + rc = qedr_db_recovery_add(dev, qp->sq.db, + &qp->sq.db_data, + DB_REC_WIDTH_32B, + DB_REC_KERNEL); + if (rc) + return rc; + if (!qp->srq) { qp->rq.db = dev->db_addr + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); qp->rq.db_data.data.icid = qp->icid; + + rc = qedr_db_recovery_add(dev, qp->rq.db, + &qp->rq.db_data, + DB_REC_WIDTH_32B, + DB_REC_KERNEL); + if (rc) + qedr_db_recovery_del(dev, qp->sq.db, + &qp->sq.db_data); } + + return rc; } static int qedr_check_srq_params(struct qedr_dev *dev, @@ -1279,19 +1405,19 @@ static void qedr_free_srq_kernel_params(struct qedr_srq *srq) static int qedr_init_srq_user_params(struct ib_udata *udata, struct qedr_srq *srq, struct qedr_create_srq_ureq *ureq, - int access, int dmasync) + int access) { struct scatterlist *sg; int rc; rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr, - ureq->srq_len, access, dmasync, 1); + ureq->srq_len, false, access, 1); if (rc) return rc; srq->prod_umem = ib_umem_get(udata, ureq->prod_pair_addr, - sizeof(struct rdma_srq_producers), access, dmasync); + sizeof(struct rdma_srq_producers), access); if (IS_ERR(srq->prod_umem)) { qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl); ib_umem_release(srq->usrq.umem); @@ -1381,13 +1507,14 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, hw_srq->max_sges = init_attr->attr.max_sge; if (udata) { - if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) { + if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), + udata->inlen))) { DP_ERR(dev, "create srq: problem copying data from user space\n"); goto err0; } - rc = qedr_init_srq_user_params(udata, srq, &ureq, 0, 0); + rc = qedr_init_srq_user_params(udata, srq, &ureq, 0); if (rc) goto err0; @@ -1570,13 +1697,39 @@ qedr_iwarp_populate_user_qp(struct qedr_dev *dev, &qp->urq.pbl_info, FW_PAGE_SHIFT); } -static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp) +static void qedr_cleanup_user(struct qedr_dev *dev, + struct qedr_ucontext *ctx, + struct qedr_qp *qp) { ib_umem_release(qp->usq.umem); qp->usq.umem = NULL; ib_umem_release(qp->urq.umem); qp->urq.umem = NULL; + + if (rdma_protocol_roce(&dev->ibdev, 1)) { + qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl); + qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl); + } else { + kfree(qp->usq.pbl_tbl); + kfree(qp->urq.pbl_tbl); + } + + if (qp->usq.db_rec_data) { + qedr_db_recovery_del(dev, qp->usq.db_addr, + &qp->usq.db_rec_data->db_data); + rdma_user_mmap_entry_remove(qp->usq.db_mmap_entry); + } + + if (qp->urq.db_rec_data) { + qedr_db_recovery_del(dev, qp->urq.db_addr, + &qp->urq.db_rec_data->db_data); + rdma_user_mmap_entry_remove(qp->urq.db_mmap_entry); + } + + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + qedr_db_recovery_del(dev, qp->urq.db_rec_db2_addr, + &qp->urq.db_rec_db2_data); } static int qedr_create_user_qp(struct qedr_dev *dev, @@ -1588,27 +1741,30 @@ static int qedr_create_user_qp(struct qedr_dev *dev, struct qed_rdma_create_qp_in_params in_params; struct qed_rdma_create_qp_out_params out_params; struct qedr_pd *pd = get_qedr_pd(ibpd); + struct qedr_create_qp_uresp uresp; + struct qedr_ucontext *ctx = NULL; struct qedr_create_qp_ureq ureq; int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1); int rc = -EINVAL; + qp->create_type = QEDR_QP_CREATE_USER; memset(&ureq, 0, sizeof(ureq)); - rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq)); + rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen)); if (rc) { DP_ERR(dev, "Problem copying data from user space\n"); return rc; } - /* SQ - read access only (0), dma sync not required (0) */ + /* SQ - read access only (0) */ rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr, - ureq.sq_len, 0, 0, alloc_and_init); + ureq.sq_len, true, 0, alloc_and_init); if (rc) return rc; if (!qp->srq) { - /* RQ - read access only (0), dma sync not required (0) */ + /* RQ - read access only (0) */ rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr, - ureq.rq_len, 0, 0, alloc_and_init); + ureq.rq_len, true, 0, alloc_and_init); if (rc) return rc; } @@ -1638,29 +1794,76 @@ static int qedr_create_user_qp(struct qedr_dev *dev, qp->qp_id = out_params.qp_id; qp->icid = out_params.icid; - rc = qedr_copy_qp_uresp(dev, qp, udata); + rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp); + if (rc) + goto err; + + /* db offset was calculated in copy_qp_uresp, now set in the user q */ + ctx = pd->uctx; + qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset; + qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset; + + if (rdma_protocol_iwarp(&dev->ibdev, 1)) { + qp->urq.db_rec_db2_addr = ctx->dpi_addr + uresp.rq_db2_offset; + + /* calculate the db_rec_db2 data since it is constant so no + * need to reflect from user + */ + qp->urq.db_rec_db2_data.data.icid = cpu_to_le16(qp->icid); + qp->urq.db_rec_db2_data.data.value = + cpu_to_le16(DQ_TCM_IWARP_POST_RQ_CF_CMD); + } + + rc = qedr_db_recovery_add(dev, qp->usq.db_addr, + &qp->usq.db_rec_data->db_data, + DB_REC_WIDTH_32B, + DB_REC_USER); if (rc) goto err; + rc = qedr_db_recovery_add(dev, qp->urq.db_addr, + &qp->urq.db_rec_data->db_data, + DB_REC_WIDTH_32B, + DB_REC_USER); + if (rc) + goto err; + + if (rdma_protocol_iwarp(&dev->ibdev, 1)) { + rc = qedr_db_recovery_add(dev, qp->urq.db_rec_db2_addr, + &qp->urq.db_rec_db2_data, + DB_REC_WIDTH_32B, + DB_REC_USER); + if (rc) + goto err; + } qedr_qp_user_print(dev, qp); - return 0; + return rc; err: rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); if (rc) DP_ERR(dev, "create qp: fatal fault. rc=%d", rc); err1: - qedr_cleanup_user(dev, qp); + qedr_cleanup_user(dev, ctx, qp); return rc; } -static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp) +static int qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp) { + int rc; + qp->sq.db = dev->db_addr + DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); qp->sq.db_data.data.icid = qp->icid; + rc = qedr_db_recovery_add(dev, qp->sq.db, + &qp->sq.db_data, + DB_REC_WIDTH_32B, + DB_REC_KERNEL); + if (rc) + return rc; + qp->rq.db = dev->db_addr + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD); qp->rq.db_data.data.icid = qp->icid; @@ -1668,6 +1871,19 @@ static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp) DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS); qp->rq.iwarp_db2_data.data.icid = qp->icid; qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD; + + rc = qedr_db_recovery_add(dev, qp->rq.db, + &qp->rq.db_data, + DB_REC_WIDTH_32B, + DB_REC_KERNEL); + if (rc) + return rc; + + rc = qedr_db_recovery_add(dev, qp->rq.iwarp_db2, + &qp->rq.iwarp_db2_data, + DB_REC_WIDTH_32B, + DB_REC_KERNEL); + return rc; } static int @@ -1715,8 +1931,7 @@ qedr_roce_create_kernel_qp(struct qedr_dev *dev, qp->qp_id = out_params.qp_id; qp->icid = out_params.icid; - qedr_set_roce_db_info(dev, qp); - return rc; + return qedr_set_roce_db_info(dev, qp); } static int @@ -1774,8 +1989,7 @@ qedr_iwarp_create_kernel_qp(struct qedr_dev *dev, qp->qp_id = out_params.qp_id; qp->icid = out_params.icid; - qedr_set_iwarp_db_info(dev, qp); - return rc; + return qedr_set_iwarp_db_info(dev, qp); err: dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); @@ -1790,6 +2004,20 @@ static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp) dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl); kfree(qp->rqe_wr_id); + + /* GSI qp is not registered to db mechanism so no need to delete */ + if (qp->qp_type == IB_QPT_GSI) + return; + + qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data); + + if (!qp->srq) { + qedr_db_recovery_del(dev, qp->rq.db, &qp->rq.db_data); + + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + qedr_db_recovery_del(dev, qp->rq.iwarp_db2, + &qp->rq.iwarp_db2_data); + } } static int qedr_create_kernel_qp(struct qedr_dev *dev, @@ -1805,6 +2033,7 @@ static int qedr_create_kernel_qp(struct qedr_dev *dev, u32 n_sq_entries; memset(&in_params, 0, sizeof(in_params)); + qp->create_type = QEDR_QP_CREATE_KERNEL; /* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in * the ring. The ring should allow at least a single WR, even if the @@ -1918,7 +2147,7 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, qp->ibqp.qp_num = qp->qp_id; if (rdma_protocol_iwarp(&dev->ibdev, 1)) { - rc = xa_insert_irq(&dev->qps, qp->qp_id, qp, GFP_KERNEL); + rc = xa_insert(&dev->qps, qp->qp_id, qp, GFP_KERNEL); if (rc) goto err; } @@ -2429,7 +2658,10 @@ err: static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp, struct ib_udata *udata) { - int rc = 0; + struct qedr_ucontext *ctx = + rdma_udata_to_drv_context(udata, struct qedr_ucontext, + ibucontext); + int rc; if (qp->qp_type != IB_QPT_GSI) { rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); @@ -2437,8 +2669,8 @@ static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp, return rc; } - if (udata) - qedr_cleanup_user(dev, qp); + if (qp->create_type == QEDR_QP_CREATE_USER) + qedr_cleanup_user(dev, ctx, qp); else qedr_cleanup_kernel(dev, qp); @@ -2467,34 +2699,44 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) qedr_modify_qp(ibqp, &attr, attr_mask, NULL); } } else { - /* Wait for the connect/accept to complete */ - if (qp->ep) { - int wait_count = 1; - - while (qp->ep->during_connect) { - DP_DEBUG(dev, QEDR_MSG_QP, - "Still in during connect/accept\n"); - - msleep(100); - if (wait_count++ > 200) { - DP_NOTICE(dev, - "during connect timeout\n"); - break; - } - } - } + /* If connection establishment started the WAIT_FOR_CONNECT + * bit will be on and we need to Wait for the establishment + * to complete before destroying the qp. + */ + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT, + &qp->iwarp_cm_flags)) + wait_for_completion(&qp->iwarp_cm_comp); + + /* If graceful disconnect started, the WAIT_FOR_DISCONNECT + * bit will be on, and we need to wait for the disconnect to + * complete before continuing. We can use the same completion, + * iwarp_cm_comp, since this is the only place that waits for + * this completion and it is sequential. In addition, + * disconnect can't occur before the connection is fully + * established, therefore if WAIT_FOR_DISCONNECT is on it + * means WAIT_FOR_CONNECT is also on and the completion for + * CONNECT already occurred. + */ + if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT, + &qp->iwarp_cm_flags)) + wait_for_completion(&qp->iwarp_cm_comp); } if (qp->qp_type == IB_QPT_GSI) qedr_destroy_gsi_qp(dev); + /* We need to remove the entry from the xarray before we release the + * qp_id to avoid a race of the qp_id being reallocated and failing + * on xa_insert + */ + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + xa_erase(&dev->qps, qp->qp_id); + qedr_free_qp_resources(dev, qp, udata); - if (atomic_dec_and_test(&qp->refcnt) && - rdma_protocol_iwarp(&dev->ibdev, 1)) { - xa_erase_irq(&dev->qps, qp->qp_id); - kfree(qp); - } + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + qedr_iw_qp_rem_ref(&qp->ibqp); + return 0; } @@ -2597,7 +2839,7 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, mr->type = QEDR_MR_USER; - mr->umem = ib_umem_get(udata, start, len, acc, 0); + mr->umem = ib_umem_get(udata, start, len, acc); if (IS_ERR(mr->umem)) { rc = -EFAULT; goto err0; @@ -2673,8 +2915,8 @@ int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); - if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR)) - qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table); + if (mr->type != QEDR_MR_DMA) + free_mr_info(dev, &mr->info); /* it could be user registered memory. */ ib_umem_release(mr->umem); @@ -4106,19 +4348,10 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) } int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags, - u8 port_num, - const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *mad_hdr, - size_t in_mad_size, struct ib_mad_hdr *out_mad, - size_t *out_mad_size, u16 *out_mad_pkey_index) + u8 port_num, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, const struct ib_mad *in, + struct ib_mad *out_mad, size_t *out_mad_size, + u16 *out_mad_pkey_index) { - struct qedr_dev *dev = get_qedr_dev(ibdev); - - DP_DEBUG(dev, QEDR_MSG_GSI, - "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n", - mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod, - mad_hdr->class_specific, mad_hdr->class_version, - mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status); return IB_MAD_RESULT_SUCCESS; } diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 9aaa90283d6e..18027844eb87 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -35,8 +35,6 @@ int qedr_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, struct ib_udata *udata); int qedr_query_port(struct ib_device *, u8 port, struct ib_port_attr *props); -int qedr_modify_port(struct ib_device *, u8 port, int mask, - struct ib_port_modify *props); int qedr_iw_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid); @@ -46,7 +44,8 @@ int qedr_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey); int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata); void qedr_dealloc_ucontext(struct ib_ucontext *uctx); -int qedr_mmap(struct ib_ucontext *, struct vm_area_struct *vma); +int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma); +void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry); int qedr_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); void qedr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); @@ -93,10 +92,9 @@ int qedr_post_recv(struct ib_qp *, const struct ib_recv_wr *, const struct ib_recv_wr **bad_wr); int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags, u8 port_num, const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, - size_t in_mad_size, struct ib_mad_hdr *out_mad, - size_t *out_mad_size, u16 *out_mad_pkey_index); + const struct ib_grh *in_grh, const struct ib_mad *in_mad, + struct ib_mad *out_mad, size_t *out_mad_size, + u16 *out_mad_pkey_index); int qedr_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable); diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 531d8a1db2c3..ca5ea734e3d0 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -1417,7 +1417,6 @@ static void qib_6120_quiet_serdes(struct qib_pportdata *ppd) * * The exact combo of LEDs if on is true is determined by looking * at the ibcstatus. - * These LEDs indicate the physical and logical state of IB link. * For this chip (at least with recommended board pinouts), LED1 * is Yellow (logical state) and LED2 is Green (physical state), diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index f92faf5ec369..79bb83222e8d 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2098,8 +2098,6 @@ static int cc_get_classportinfo(struct ib_cc_mad *ccp, struct ib_cc_classportinfo_attr *p = (struct ib_cc_classportinfo_attr *)ccp->mgmt_data; - memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); - p->base_version = 1; p->class_version = 1; p->cap_mask = 0; @@ -2120,8 +2118,6 @@ static int cc_get_congestion_info(struct ib_cc_mad *ccp, struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); - memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); - p->congestion_info = 0; p->control_table_cap = ppd->cc_max_table_entries; @@ -2138,8 +2134,6 @@ static int cc_get_congestion_setting(struct ib_cc_mad *ccp, struct qib_pportdata *ppd = ppd_from_ibp(ibp); struct ib_cc_congestion_entry_shadow *entries; - memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); - spin_lock(&ppd->cc_shadow_lock); entries = ppd->congestion_entries_shadow->entries; @@ -2176,8 +2170,6 @@ static int cc_get_congestion_control_table(struct ib_cc_mad *ccp, if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1) goto bail; - memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); - spin_lock(&ppd->cc_shadow_lock); max_cct_block = @@ -2296,18 +2288,11 @@ bail: return reply_failure((struct ib_smp *) ccp); } -static int check_cc_key(struct qib_ibport *ibp, - struct ib_cc_mad *ccp, int mad_flags) -{ - return 0; -} - static int process_cc(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_mad *in_mad, struct ib_mad *out_mad) { struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad; - struct qib_ibport *ibp = to_iport(ibdev, port); int ret; *out_mad = *in_mad; @@ -2318,10 +2303,6 @@ static int process_cc(struct ib_device *ibdev, int mad_flags, goto bail; } - ret = check_cc_key(ibp, ccp, mad_flags); - if (ret) - goto bail; - switch (ccp->method) { case IB_MGMT_METHOD_GET: switch (ccp->attr_id) { @@ -2405,28 +2386,21 @@ bail: */ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index) { int ret; struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; - - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad))) - return IB_MAD_RESULT_FAILURE; - switch (in_mad->mad_hdr.mgmt_class) { + switch (in->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: case IB_MGMT_CLASS_SUBN_LID_ROUTED: - ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad); + ret = process_subn(ibdev, mad_flags, port, in, out); goto bail; case IB_MGMT_CLASS_PERF_MGMT: - ret = process_perf(ibdev, port, in_mad, out_mad); + ret = process_perf(ibdev, port, in, out); goto bail; case IB_MGMT_CLASS_CONG_MGMT: @@ -2435,7 +2409,7 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, ret = IB_MAD_RESULT_SUCCESS; goto bail; } - ret = process_cc(ibdev, mad_flags, port, in_mad, out_mad); + ret = process_cc(ibdev, mad_flags, port, in, out); goto bail; default: diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 3926be78036e..568b21eb6ea1 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -301,6 +301,9 @@ static ssize_t qib_portattr_show(struct kobject *kobj, struct qib_pportdata *ppd = container_of(kobj, struct qib_pportdata, pport_kobj); + if (!pattr->show) + return -EIO; + return pattr->show(ppd, buf); } @@ -312,6 +315,9 @@ static ssize_t qib_portattr_store(struct kobject *kobj, struct qib_pportdata *ppd = container_of(kobj, struct qib_pportdata, pport_kobj); + if (!pattr->store) + return -EIO; + return pattr->store(ppd, buf, len); } diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 17bdf8acee2f..8bf414b47b96 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -245,9 +245,8 @@ void qib_sys_guid_chg(struct qib_ibport *ibp); void qib_node_desc_chg(struct qib_ibport *ibp); int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index); + const struct ib_mad *in, struct ib_mad *out, + size_t *out_mad_size, u16 *out_mad_pkey_index); void qib_notify_create_mad_agent(struct rvt_dev_info *rdi, int port_idx); void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index 7800e6930502..a26a4fd86bf4 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -136,7 +136,7 @@ int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, } cq->umem = ib_umem_get(udata, ucmd.buf_addr, ucmd.buf_size, - IB_ACCESS_LOCAL_WRITE, 1); + IB_ACCESS_LOCAL_WRITE); if (IS_ERR(cq->umem)) { ret = PTR_ERR(cq->umem); goto err_cq; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h index 8f9749d54688..86a6c054ea26 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h @@ -58,7 +58,8 @@ #define PVRDMA_ROCEV1_VERSION 17 #define PVRDMA_ROCEV2_VERSION 18 #define PVRDMA_PPN64_VERSION 19 -#define PVRDMA_VERSION PVRDMA_PPN64_VERSION +#define PVRDMA_QPHANDLE_VERSION 20 +#define PVRDMA_VERSION PVRDMA_QPHANDLE_VERSION #define PVRDMA_BOARD_ID 1 #define PVRDMA_REV_ID 1 @@ -581,6 +582,17 @@ struct pvrdma_cmd_create_qp_resp { u32 max_inline_data; }; +struct pvrdma_cmd_create_qp_resp_v2 { + struct pvrdma_cmd_resp_hdr hdr; + u32 qpn; + u32 qp_handle; + u32 max_send_wr; + u32 max_recv_wr; + u32 max_send_sge; + u32 max_recv_sge; + u32 max_inline_data; +}; + struct pvrdma_cmd_modify_qp { struct pvrdma_cmd_hdr hdr; u32 qp_handle; @@ -663,6 +675,7 @@ union pvrdma_cmd_resp { struct pvrdma_cmd_create_cq_resp create_cq_resp; struct pvrdma_cmd_resize_cq_resp resize_cq_resp; struct pvrdma_cmd_create_qp_resp create_qp_resp; + struct pvrdma_cmd_create_qp_resp_v2 create_qp_resp_v2; struct pvrdma_cmd_query_qp_resp query_qp_resp; struct pvrdma_cmd_destroy_qp_resp destroy_qp_resp; struct pvrdma_cmd_create_srq_resp create_srq_resp; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c index f3a3d22ee8d7..c61e665ff261 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c @@ -126,7 +126,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return ERR_PTR(-EINVAL); } - umem = ib_umem_get(udata, start, length, access_flags, 0); + umem = ib_umem_get(udata, start, length, access_flags); if (IS_ERR(umem)) { dev_warn(&dev->pdev->dev, "could not get umem for mem region\n"); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index bca6a58a442e..f15809c28f67 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -52,6 +52,9 @@ #include "pvrdma.h" +static void __pvrdma_destroy_qp(struct pvrdma_dev *dev, + struct pvrdma_qp *qp); + static inline void get_cqs(struct pvrdma_qp *qp, struct pvrdma_cq **send_cq, struct pvrdma_cq **recv_cq) { @@ -195,7 +198,9 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, union pvrdma_cmd_resp rsp; struct pvrdma_cmd_create_qp *cmd = &req.create_qp; struct pvrdma_cmd_create_qp_resp *resp = &rsp.create_qp_resp; + struct pvrdma_cmd_create_qp_resp_v2 *resp_v2 = &rsp.create_qp_resp_v2; struct pvrdma_create_qp ucmd; + struct pvrdma_create_qp_resp qp_resp = {}; unsigned long flags; int ret; bool is_srq = !!init_attr->srq; @@ -260,10 +265,19 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, goto err_qp; } + /* Userspace supports qpn and qp handles? */ + if (dev->dsr_version >= PVRDMA_QPHANDLE_VERSION && + udata->outlen < sizeof(qp_resp)) { + dev_warn(&dev->pdev->dev, + "create queuepair not supported\n"); + ret = -EOPNOTSUPP; + goto err_qp; + } + if (!is_srq) { /* set qp->sq.wqe_cnt, shift, buf_size.. */ qp->rumem = ib_umem_get(udata, ucmd.rbuf_addr, - ucmd.rbuf_size, 0, 0); + ucmd.rbuf_size, 0); if (IS_ERR(qp->rumem)) { ret = PTR_ERR(qp->rumem); goto err_qp; @@ -275,7 +289,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, } qp->sumem = ib_umem_get(udata, ucmd.sbuf_addr, - ucmd.sbuf_size, 0, 0); + ucmd.sbuf_size, 0); if (IS_ERR(qp->sumem)) { if (!is_srq) ib_umem_release(qp->rumem); @@ -379,13 +393,33 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, } /* max_send_wr/_recv_wr/_send_sge/_recv_sge/_inline_data */ - qp->qp_handle = resp->qpn; qp->port = init_attr->port_num; - qp->ibqp.qp_num = resp->qpn; + + if (dev->dsr_version >= PVRDMA_QPHANDLE_VERSION) { + qp->ibqp.qp_num = resp_v2->qpn; + qp->qp_handle = resp_v2->qp_handle; + } else { + qp->ibqp.qp_num = resp->qpn; + qp->qp_handle = resp->qpn; + } + spin_lock_irqsave(&dev->qp_tbl_lock, flags); dev->qp_tbl[qp->qp_handle % dev->dsr->caps.max_qp] = qp; spin_unlock_irqrestore(&dev->qp_tbl_lock, flags); + if (udata) { + qp_resp.qpn = qp->ibqp.qp_num; + qp_resp.qp_handle = qp->qp_handle; + + if (ib_copy_to_udata(udata, &qp_resp, + min(udata->outlen, sizeof(qp_resp)))) { + dev_warn(&dev->pdev->dev, + "failed to copy back udata\n"); + __pvrdma_destroy_qp(dev, qp); + return ERR_PTR(-EINVAL); + } + } + return &qp->ibqp; err_pdir: @@ -400,27 +434,15 @@ err_qp: return ERR_PTR(ret); } -static void pvrdma_free_qp(struct pvrdma_qp *qp) +static void _pvrdma_free_qp(struct pvrdma_qp *qp) { + unsigned long flags; struct pvrdma_dev *dev = to_vdev(qp->ibqp.device); - struct pvrdma_cq *scq; - struct pvrdma_cq *rcq; - unsigned long flags, scq_flags, rcq_flags; - - /* In case cq is polling */ - get_cqs(qp, &scq, &rcq); - pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags); - - _pvrdma_flush_cqe(qp, scq); - if (scq != rcq) - _pvrdma_flush_cqe(qp, rcq); spin_lock_irqsave(&dev->qp_tbl_lock, flags); dev->qp_tbl[qp->qp_handle] = NULL; spin_unlock_irqrestore(&dev->qp_tbl_lock, flags); - pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags); - if (refcount_dec_and_test(&qp->refcnt)) complete(&qp->free); wait_for_completion(&qp->free); @@ -435,34 +457,71 @@ static void pvrdma_free_qp(struct pvrdma_qp *qp) atomic_dec(&dev->num_qps); } -/** - * pvrdma_destroy_qp - destroy a queue pair - * @qp: the queue pair to destroy - * @udata: user data or null for kernel object - * - * @return: 0 on success. - */ -int pvrdma_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) +static void pvrdma_free_qp(struct pvrdma_qp *qp) +{ + struct pvrdma_cq *scq; + struct pvrdma_cq *rcq; + unsigned long scq_flags, rcq_flags; + + /* In case cq is polling */ + get_cqs(qp, &scq, &rcq); + pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags); + + _pvrdma_flush_cqe(qp, scq); + if (scq != rcq) + _pvrdma_flush_cqe(qp, rcq); + + /* + * We're now unlocking the CQs before clearing out the qp handle this + * should still be safe. We have destroyed the backend QP and flushed + * the CQEs so there should be no other completions for this QP. + */ + pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags); + + _pvrdma_free_qp(qp); +} + +static inline void _pvrdma_destroy_qp_work(struct pvrdma_dev *dev, + u32 qp_handle) { - struct pvrdma_qp *vqp = to_vqp(qp); union pvrdma_cmd_req req; struct pvrdma_cmd_destroy_qp *cmd = &req.destroy_qp; int ret; memset(cmd, 0, sizeof(*cmd)); cmd->hdr.cmd = PVRDMA_CMD_DESTROY_QP; - cmd->qp_handle = vqp->qp_handle; + cmd->qp_handle = qp_handle; - ret = pvrdma_cmd_post(to_vdev(qp->device), &req, NULL, 0); + ret = pvrdma_cmd_post(dev, &req, NULL, 0); if (ret < 0) - dev_warn(&to_vdev(qp->device)->pdev->dev, + dev_warn(&dev->pdev->dev, "destroy queuepair failed, error: %d\n", ret); +} +/** + * pvrdma_destroy_qp - destroy a queue pair + * @qp: the queue pair to destroy + * @udata: user data or null for kernel object + * + * @return: always 0. + */ +int pvrdma_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) +{ + struct pvrdma_qp *vqp = to_vqp(qp); + + _pvrdma_destroy_qp_work(to_vdev(qp->device), vqp->qp_handle); pvrdma_free_qp(vqp); return 0; } +static void __pvrdma_destroy_qp(struct pvrdma_dev *dev, + struct pvrdma_qp *qp) +{ + _pvrdma_destroy_qp_work(dev, qp->qp_handle); + _pvrdma_free_qp(qp); +} + /** * pvrdma_modify_qp - modify queue pair attributes * @ibqp: the queue pair diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index 36cdfbdbd325..98c8be71d91d 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -146,7 +146,7 @@ int pvrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, goto err_srq; } - srq->umem = ib_umem_get(udata, ucmd.buf_addr, ucmd.buf_size, 0, 0); + srq->umem = ib_umem_get(udata, ucmd.buf_addr, ucmd.buf_size, 0); if (IS_ERR(srq->umem)) { ret = PTR_ERR(srq->umem); goto err_srq; |