From 421cba7457ee92d47111bc050e71c92dfdfcbdd3 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 6 Jun 2011 12:32:25 -0700 Subject: [PATCH] librdmacm: Abstract ibverbs SRQ creation Support QPs with SRQs. If a user allocates an SRQ on an rdma_cm_id, we post receive messages directly to the SRQ. This also allows us to handle XRC SRQs, which may be associated with an rdma_cm_id, but without a corresponding QP. To handle registering memory, we store the PD associated with an rdma_cm_id directly with the id, rather than finding the PD using a QP pointer. Signed-off-by: Sean Hefty --- Makefile.am | 2 + include/rdma/rdma_cma.h | 2 + include/rdma/rdma_verbs.h | 26 +++++++++---- man/rdma_create_srq.3 | 44 +++++++++++++++++++++ man/rdma_destroy_ep.3 | 5 ++- man/rdma_destroy_srq.3 | 21 ++++++++++ src/cma.c | 81 ++++++++++++++++++++++++++++++++------- src/librdmacm.map | 2 + 8 files changed, 160 insertions(+), 23 deletions(-) create mode 100644 man/rdma_create_srq.3 create mode 100644 man/rdma_destroy_srq.3 diff --git a/Makefile.am b/Makefile.am index 4e92e35c..be35b78b 100644 --- a/Makefile.am +++ b/Makefile.am @@ -50,9 +50,11 @@ man_MANS = \ man/rdma_create_event_channel.3 \ man/rdma_create_id.3 \ man/rdma_create_qp.3 \ + man/rdma_create_srq.3 \ man/rdma_destroy_event_channel.3 \ man/rdma_destroy_id.3 \ man/rdma_destroy_qp.3 \ + man/rdma_destroy_srq.3 \ man/rdma_disconnect.3 \ man/rdma_free_devices.3 \ man/rdma_get_cm_event.3 \ diff --git a/include/rdma/rdma_cma.h b/include/rdma/rdma_cma.h index fa4d1894..da6d1a74 100755 --- a/include/rdma/rdma_cma.h +++ b/include/rdma/rdma_cma.h @@ -131,6 +131,8 @@ struct rdma_cm_id { struct ibv_cq *send_cq; struct ibv_comp_channel *recv_cq_channel; struct ibv_cq *recv_cq; + struct ibv_srq *srq; + struct ibv_pd *pd; }; enum { diff --git a/include/rdma/rdma_verbs.h b/include/rdma/rdma_verbs.h index e6a6b422..eca2c7a0 100644 --- a/include/rdma/rdma_verbs.h +++ b/include/rdma/rdma_verbs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 Intel Corporation. All rights reserved. + * Copyright (c) 2010-2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -51,27 +51,36 @@ static inline int rdma_seterrno(int ret) return ret; } +/* + * Shared receive queues. + */ +int rdma_create_srq(struct rdma_cm_id *id, struct ibv_pd *pd, + struct ibv_srq_init_attr *attr); + +void rdma_destroy_srq(struct rdma_cm_id *id); + + /* * Memory registration helpers. */ static inline struct ibv_mr * rdma_reg_msgs(struct rdma_cm_id *id, void *addr, size_t length) { - return ibv_reg_mr(id->qp->pd, addr, length, IBV_ACCESS_LOCAL_WRITE); + return ibv_reg_mr(id->pd, addr, length, IBV_ACCESS_LOCAL_WRITE); } static inline struct ibv_mr * rdma_reg_read(struct rdma_cm_id *id, void *addr, size_t length) { - return ibv_reg_mr(id->qp->pd, addr, length, IBV_ACCESS_LOCAL_WRITE | - IBV_ACCESS_REMOTE_READ); + return ibv_reg_mr(id->pd, addr, length, IBV_ACCESS_LOCAL_WRITE | + IBV_ACCESS_REMOTE_READ); } static inline struct ibv_mr * rdma_reg_write(struct rdma_cm_id *id, void *addr, size_t length) { - return ibv_reg_mr(id->qp->pd, addr, length, IBV_ACCESS_LOCAL_WRITE | - IBV_ACCESS_REMOTE_WRITE); + return ibv_reg_mr(id->pd, addr, length, IBV_ACCESS_LOCAL_WRITE | + IBV_ACCESS_REMOTE_WRITE); } static inline int @@ -96,7 +105,10 @@ rdma_post_recvv(struct rdma_cm_id *id, void *context, struct ibv_sge *sgl, wr.sg_list = sgl; wr.num_sge = nsge; - return rdma_seterrno(ibv_post_recv(id->qp, &wr, &bad)); + if (id->srq) + return rdma_seterrno(ibv_post_srq_recv(id->srq, &wr, &bad)); + else + return rdma_seterrno(ibv_post_recv(id->qp, &wr, &bad)); } static inline int diff --git a/man/rdma_create_srq.3 b/man/rdma_create_srq.3 new file mode 100644 index 00000000..9b917c29 --- /dev/null +++ b/man/rdma_create_srq.3 @@ -0,0 +1,44 @@ +.TH "RDMA_CREATE_SRQ" 3 "2011-06-15" "librdmacm" "Librdmacm Programmer's Manual" librdmacm +.SH NAME +rdma_create_srq \- Allocate a shared receive queue. +.SH SYNOPSIS +.B "#include " +.P +.B "int" rdma_create_srq +.BI "(struct rdma_cm_id *" id "," +.BI "struct ibv_pd *" pd "," +.BI "struct ibv_srq_init_attr *" attr ");" +.SH ARGUMENTS +.IP "id" 12 +RDMA identifier. +.IP "pd" 12 +Optional protection domain for the SRQ. +.IP "attr" 12 +Initial SRQ attributes. +.SH "DESCRIPTION" +Allocate a SRQ associated with the specified rdma_cm_id. +.SH "RETURN VALUE" +Returns 0 on success, or -1 on error. If an error occurs, errno will be +set to indicate the failure reason. +.SH "NOTES" +The rdma_cm_id must be bound to a local RDMA device before calling this +function, and the protection domain, if provided, must be for that same device. +After being allocated, the SRQ will be ready to handle posting of receives. +.P +If a protection domain is not given - pd parameter is NULL - then +the rdma_cm_id will be created using a default protection domain. One +default protection domain is allocated per RDMA device. +.P +The initial SRQ attributes are specified by the attr parameter. The +ext.xrc.cq fields in the ibv_srq_init_attr is optional. If +a completion queue is not specified for an XRC SRQ, then a CQ will be +allocated by the rdma_cm for the SRQ, along with corresponding completion +channels. Completion channels and CQ data created by the rdma_cm are +exposed to the user through the rdma_cm_id structure. +.P +The actual capabilities and properties of the created SRQ will be +returned to the user through the attr parameter. An rdma_cm_id +may only be associated with a single SRQ. +.SH "SEE ALSO" +rdma_bind_addr(3), rdma_resolve_addr(3), rdma_create_ep(3), +rdma_destroy_srq(3), ibv_create_srq(3), ibv_create_xsrq(3) diff --git a/man/rdma_destroy_ep.3 b/man/rdma_destroy_ep.3 index 13a87666..b48a1e59 100644 --- a/man/rdma_destroy_ep.3 +++ b/man/rdma_destroy_ep.3 @@ -1,4 +1,5 @@ -.TH "RDMA_DESTROY_EP" 3 "2007-05-15" "librdmacm" "Librdmacm Programmer's Manual" librdmacm + +.TH "RDMA_DESTROY_EP" 3 "2011-06-15" "librdmacm" "Librdmacm Programmer's Manual" librdmacm .SH NAME rdma_destroy_ep \- Release a communication identifier. .SH SYNOPSIS @@ -15,7 +16,7 @@ Destroys the specified rdma_cm_id and all associated resources Returns 0 on success, or -1 on error. If an error occurs, errno will be set to indicate the failure reason. .SH "NOTES" -rdma_destroy_ep will automatically destroy any QP associated with +rdma_destroy_ep will automatically destroy any QP and SRQ associated with the rdma_cm_id. .SH "SEE ALSO" rdma_create_ep(3) diff --git a/man/rdma_destroy_srq.3 b/man/rdma_destroy_srq.3 new file mode 100644 index 00000000..5f0dfe0f --- /dev/null +++ b/man/rdma_destroy_srq.3 @@ -0,0 +1,21 @@ +.TH "RDMA_DESTROY_SRQ" 3 "2011-06-15" "librdmacm" "Librdmacm Programmer's Manual" librdmacm +.SH NAME +rdma_destroy_srq \- Deallocate a SRQ. +.SH SYNOPSIS +.B "#include " +.P +.B "void" rdma_destroy_srq +.BI "(struct rdma_cm_id *" id ");" +.SH ARGUMENTS +.IP "id" 12 +RDMA identifier. +.SH "DESCRIPTION" +Destroy an SRQ allocated on the rdma_cm_id. +.SH "RETURN VALUE" +Returns 0 on success, or -1 on error. If an error occurs, errno will be +set to indicate the failure reason. +.SH "NOTES" +Users should destroy any SRQ associated with an rdma_cm_id before +destroying the ID. +.SH "SEE ALSO" +rdma_create_srq(3), rdma_destroy_id(3), ibv_destroy_srq(3) diff --git a/src/cma.c b/src/cma.c index b7cf2954..7cfb936b 100755 --- a/src/cma.c +++ b/src/cma.c @@ -116,7 +116,6 @@ struct cma_id_private { pthread_mutex_t mut; uint32_t handle; struct cma_multicast *mc_list; - struct ibv_pd *pd; struct ibv_qp_init_attr *qp_init_attr; uint8_t initiator_depth; uint8_t responder_resources; @@ -387,6 +386,7 @@ static int ucma_get_device(struct cma_id_private *id_priv, uint64_t guid) if (cma_dev->guid == guid) { id_priv->cma_dev = cma_dev; id_priv->id.verbs = cma_dev->verbs; + id_priv->id.pd = cma_dev->pd; return 0; } } @@ -1141,32 +1141,28 @@ static void ucma_destroy_cqs(struct rdma_cm_id *id) ibv_destroy_comp_channel(id->send_cq_channel); } -static int ucma_create_cqs(struct rdma_cm_id *id, struct ibv_qp_init_attr *attr) +static int ucma_create_cqs(struct rdma_cm_id *id, uint32_t send_size, uint32_t recv_size) { - if (!attr->recv_cq) { + if (recv_size) { id->recv_cq_channel = ibv_create_comp_channel(id->verbs); if (!id->recv_cq_channel) goto err; - id->recv_cq = ibv_create_cq(id->verbs, attr->cap.max_recv_wr, + id->recv_cq = ibv_create_cq(id->verbs, recv_size, id, id->recv_cq_channel, 0); if (!id->recv_cq) goto err; - - attr->recv_cq = id->recv_cq; } - if (!attr->send_cq) { + if (send_size) { id->send_cq_channel = ibv_create_comp_channel(id->verbs); if (!id->send_cq_channel) goto err; - id->send_cq = ibv_create_cq(id->verbs, attr->cap.max_send_wr, + id->send_cq = ibv_create_cq(id->verbs, send_size, id, id->send_cq_channel, 0); if (!id->send_cq) goto err; - - attr->send_cq = id->send_cq; } return 0; @@ -1175,6 +1171,53 @@ err: return ERR(ENOMEM); } +int rdma_create_srq(struct rdma_cm_id *id, struct ibv_pd *pd, + struct ibv_srq_init_attr *attr) +{ + struct cma_id_private *id_priv; + struct ibv_srq *srq; + int ret; + + id_priv = container_of(id, struct cma_id_private, id); + if (!pd) + pd = id->pd; + +#ifdef IBV_XRC_OPS + if (attr->srq_type == IBV_SRQT_XRC) { + if (!attr->ext.xrc.cq) { + ret = ucma_create_cqs(id, 0, attr->attr.max_wr); + if (ret) + return ret; + + attr->ext.xrc.cq = id->recv_cq; + } + } + + srq = ibv_create_xsrq(pd, attr); +#else + srq = ibv_create_srq(pd, attr); +#endif + if (!srq) { + ret = -1; + goto err; + } + + id->pd = pd; + id->srq = srq; + return 0; +err: + ucma_destroy_cqs(id); + return ret; +} + +void rdma_destroy_srq(struct rdma_cm_id *id) +{ + ibv_destroy_srq(id->srq); + if (!id->qp) + ucma_destroy_cqs(id); + id->srq = NULL; +} + int rdma_create_qp(struct rdma_cm_id *id, struct ibv_pd *pd, struct ibv_qp_init_attr *qp_init_attr) { @@ -1187,14 +1230,19 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ibv_pd *pd, id_priv = container_of(id, struct cma_id_private, id); if (!pd) - pd = id_priv->cma_dev->pd; + pd = id->pd; else if (id->verbs != pd->context) return ERR(EINVAL); - ret = ucma_create_cqs(id, qp_init_attr); + ret = ucma_create_cqs(id, qp_init_attr->send_cq ? 0 : qp_init_attr->cap.max_send_wr, + qp_init_attr->recv_cq ? 0 : qp_init_attr->cap.max_recv_wr); if (ret) return ret; + if (!qp_init_attr->send_cq) + qp_init_attr->send_cq = id->send_cq; + if (!qp_init_attr->recv_cq) + qp_init_attr->recv_cq = id->recv_cq; qp = ibv_create_qp(pd, qp_init_attr); if (!qp) { ret = ERR(ENOMEM); @@ -1208,6 +1256,7 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ibv_pd *pd, if (ret) goto err2; + id->pd = pd; id->qp = qp; return 0; err2: @@ -1384,7 +1433,7 @@ int rdma_get_request(struct rdma_cm_id *listen, struct rdma_cm_id **id) struct ibv_qp_init_attr attr; attr = *id_priv->qp_init_attr; - ret = rdma_create_qp(event->id, id_priv->pd, &attr); + ret = rdma_create_qp(event->id, listen->pd, &attr); if (ret) goto err; } @@ -2114,7 +2163,8 @@ static int ucma_passive_ep(struct rdma_cm_id *id, struct rdma_addrinfo *res, return ret; id_priv = container_of(id, struct cma_id_private, id); - id_priv->pd = pd; + if (pd) + id->pd = pd; if (qp_init_attr) { id_priv->qp_init_attr = malloc(sizeof *qp_init_attr); @@ -2199,6 +2249,9 @@ void rdma_destroy_ep(struct rdma_cm_id *id) if (id->qp) rdma_destroy_qp(id); + if (id->srq) + rdma_destroy_srq(id); + id_priv = container_of(id, struct cma_id_private, id); if (id_priv->qp_init_attr) free(id_priv->qp_init_attr); diff --git a/src/librdmacm.map b/src/librdmacm.map index 19b193aa..b3235ff7 100644 --- a/src/librdmacm.map +++ b/src/librdmacm.map @@ -33,5 +33,7 @@ RDMACM_1.0 { rdma_get_request; rdma_create_ep; rdma_destroy_ep; + rdma_create_srq; + rdma_destroy_srq; local: *; }; -- 2.46.0