]> git.openfabrics.org - ~ardavis/dapl.git/commitdiff
ucm: fix issues with UD QP's.
authorArlin Davis <arlin.r.davis@intel.com>
Tue, 8 Sep 2009 16:11:37 +0000 (09:11 -0700)
committerArlin Davis <arlin.r.davis@intel.com>
Tue, 8 Sep 2009 16:11:37 +0000 (09:11 -0700)
private data size not in host order when processing
connection events.

ud extentions event should include original ia_addr
and qpn used during connection and not the IB qpn.

ucm QP service resource cleanup in wrong order.

cleanup extra cr/lf device.c

Signed-off-by: Arlin Davis <arlin.r.davis@intel.com>
dapl/openib_common/qp.c
dapl/openib_ucm/cm.c
dapl/openib_ucm/device.c

index 581fc83882d3321b99bf66661890b3fa980a40cd..09a61b155f651c9407910211fd280248b3c9979d 100644 (file)
@@ -557,6 +557,7 @@ dapls_create_ah(IN DAPL_HCA         *hca,
        /* address handle. RC and UD */
        qp_attr.ah_attr.dlid = ntohs(lid);
        if (gid != NULL) {
+               dapl_log(DAPL_DBG_TYPE_CM, "dapl_create_ah: with GID\n");
                qp_attr.ah_attr.is_global = 1;
                qp_attr.ah_attr.grh.dgid.global.subnet_prefix = 
                                ntohll(gid->global.subnet_prefix);
@@ -569,6 +570,10 @@ dapls_create_ah(IN DAPL_HCA                *hca,
        qp_attr.ah_attr.src_path_bits = 0;
        qp_attr.ah_attr.port_num = hca->port_num;
 
+       dapl_log(DAPL_DBG_TYPE_CM, 
+                       " dapls_create_ah: port %x lid %x pd %p ctx %p handle 0x%x\n", 
+                       hca->port_num,qp_attr.ah_attr.dlid, pd, pd->context, pd->handle);
+
        /* UD: create AH for remote side */
        ah = ibv_create_ah(pd, &qp_attr.ah_attr);
        if (!ah) {
index a2db64e4e01f377acc483f2cfeedc3cb54b28a84..72de5d58ed16524370f0350e8c83c41cc43ad50a 100644 (file)
@@ -381,7 +381,7 @@ dp_ib_cm_handle_t ucm_cm_find(ib_hca_transport_t *tp, ib_cm_msg_t *msg)
                        continue;
                
                dapl_dbg_log(DAPL_DBG_TYPE_CM, 
-                            " MATCH? cm %p st %s sport %x sqpn %x lid %x\n", 
+                            " MATCH? cm %p st %s sport %d sqpn %x lid %x\n", 
                             cm, dapl_cm_state_str(cm->state),
                             ntohs(cm->msg.sport), ntohl(cm->msg.sqpn),
                             ntohs(cm->msg.saddr.ib.lid));
@@ -755,7 +755,7 @@ DAT_RETURN
 dapli_cm_connect(DAPL_EP *ep, dp_ib_cm_handle_t cm)
 {
        dapl_log(DAPL_DBG_TYPE_EP, 
-                " connect: lid %x qpn %x lport %d p_sz=%d -> "
+                " connect: lid %x i_qpn %x lport %d p_sz=%d -> "
                 " lid %x c_qpn %x rport %d\n",
                 htons(cm->msg.saddr.ib.lid), htonl(cm->msg.saddr.ib.qpn),
                 htons(cm->msg.sport), htons(cm->msg.p_size),
@@ -934,6 +934,30 @@ ud_bail:
                dapl_os_memcpy(&xevent.remote_ah.ia_addr,
                               &cm->msg.daddr,
                               sizeof(union dcm_addr));
+               /* remote ia_addr reference includes ucm qpn, not IB qpn */
+               ((union dcm_addr*)
+                       &xevent.remote_ah.ia_addr)->ib.qpn = cm->msg.dqpn;
+
+               dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                            " ACTIVE: UD xevent ah %p qpn 0x%x lid 0x%x\n",
+                            xevent.remote_ah.ah, xevent.remote_ah.qpn, lid);
+               dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                            " ACTIVE: UD xevent ia_addr qp_type %d, port %d"
+                            " lid 0x%x qpn 0x%x gid 0x"F64x" 0x"F64x" \n",
+                            ((union dcm_addr*)
+                               &xevent.remote_ah.ia_addr)->ib.qp_type,
+                            ((union dcm_addr*)
+                               &xevent.remote_ah.ia_addr)->ib.port_num,
+                            ntohs(((union dcm_addr*)
+                               &xevent.remote_ah.ia_addr)->ib.lid),
+                            ntohl(((union dcm_addr*)
+                               &xevent.remote_ah.ia_addr)->ib.qpn),
+                            ntohll(((union dcm_addr*)
+                               &xevent.remote_ah.ia_addr)->
+                                       ib.gid.global.subnet_prefix),
+                            ntohll(((union dcm_addr*)
+                               &xevent.remote_ah.ia_addr)->
+                                       ib.gid.global.interface_id));
 
                if (event == IB_CME_CONNECTED)
                        event = DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED;
@@ -944,7 +968,7 @@ ud_bail:
                                (DAPL_EVD *)cm->ep->param.connect_evd_handle,
                                event,
                                (DAT_EP_HANDLE)ep,
-                               (DAT_COUNT)cm->msg.p_size,
+                               (DAT_COUNT)ntohs(cm->msg.p_size),
                                (DAT_PVOID *)cm->msg.p_data,
                                (DAT_PVOID *)&xevent);
 
@@ -1026,7 +1050,7 @@ static void ucm_accept(ib_cm_srvc_handle_t cm, ib_cm_msg_t *msg)
                dapls_evd_post_cr_event_ext(acm->sp,
                                            DAT_IB_UD_CONNECTION_REQUEST_EVENT,
                                            acm,
-                                           (DAT_COUNT)acm->msg.p_size,
+                                           (DAT_COUNT)ntohs(acm->msg.p_size),
                                            (DAT_PVOID *)acm->msg.p_data,
                                            (DAT_PVOID *)&xevent);
        } else
@@ -1070,7 +1094,7 @@ static void ucm_accept_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg)
        dapl_dbg_log(DAPL_DBG_TYPE_CM, " PASSIVE: connected!\n");
 
 #ifdef DAT_EXTENSIONS
-       if (cm->msg.daddr.ib.qp_type == IBV_QPT_UD) {
+       if (cm->msg.saddr.ib.qp_type == IBV_QPT_UD) {
                DAT_IB_EXTENSION_EVENT_DATA xevent;
                uint16_t lid = ntohs(cm->msg.daddr.ib.lid);
                
@@ -1081,13 +1105,37 @@ static void ucm_accept_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg)
                xevent.remote_ah.qpn = ntohl(cm->msg.daddr.ib.qpn);
                dapl_os_memcpy(&xevent.remote_ah.ia_addr,
                               &cm->msg.daddr,
-                              sizeof(cm->msg.daddr));
+                               sizeof(union dcm_addr));
+               /* remote ia_addr reference includes ucm qpn, not IB qpn */
+               ((union dcm_addr*)
+                       &xevent.remote_ah.ia_addr)->ib.qpn = cm->msg.dqpn;
+
+               dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                            " PASSIVE: UD xevent ah %p qpn 0x%x lid 0x%x\n",
+                            xevent.remote_ah.ah, xevent.remote_ah.qpn, lid);
+               dapl_dbg_log(DAPL_DBG_TYPE_EP,
+                            " PASSIVE: UD xevent ia_addr qp_type %d, port %d"
+                            " lid 0x%x qpn 0x%x gid 0x"F64x" 0x"F64x" \n",
+                            ((union dcm_addr*)
+                               &xevent.remote_ah.ia_addr)->ib.qp_type,
+                            ((union dcm_addr*)
+                               &xevent.remote_ah.ia_addr)->ib.port_num,
+                            ntohs(((union dcm_addr*)
+                               &xevent.remote_ah.ia_addr)->ib.lid),
+                            ntohl(((union dcm_addr*)
+                               &xevent.remote_ah.ia_addr)->ib.qpn),
+                            ntohll(((union dcm_addr*)
+                               &xevent.remote_ah.ia_addr)->
+                                       ib.gid.global.subnet_prefix),
+                            ntohll(((union dcm_addr*)
+                               &xevent.remote_ah.ia_addr)->
+                                       ib.gid.global.interface_id));
 
                dapls_evd_post_connection_event_ext(
                                (DAPL_EVD *)cm->ep->param.connect_evd_handle,
                                DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED,
                                (DAT_EP_HANDLE)cm->ep,
-                               (DAT_COUNT)cm->msg.p_size,
+                               (DAT_COUNT)ntohs(cm->msg.p_size),
                                (DAT_PVOID *)cm->msg.p_data,
                                (DAT_PVOID *)&xevent);
 
@@ -1130,9 +1178,9 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data)
        dapl_dbg_log(DAPL_DBG_TYPE_CM,
                     " ACCEPT_USR: remote port_num=%d lid=%x"
                     " iqp=%x qp_type %d, psize=%d\n",
-                    cm->msg.daddr.ib.port_num, cm->msg.daddr.ib.lid,
-                    cm->msg.daddr.ib.qpn, cm->msg.daddr.ib.qp_type, 
-                    cm->msg.p_size);
+                    cm->msg.daddr.ib.port_num, ntohs(cm->msg.daddr.ib.lid),
+                    ntohl(cm->msg.daddr.ib.qpn), cm->msg.daddr.ib.qp_type, 
+                    ntohs(cm->msg.p_size));
 
        dapl_dbg_log(DAPL_DBG_TYPE_CM,
                     " ACCEPT_USR: remote GID subnet %016llx id %016llx\n",
@@ -1186,7 +1234,7 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data)
        /* setup local QP info and type from EP, copy pdata, for reply */
        cm->msg.op = htons(DCM_REP);
        cm->msg.saddr.ib.qpn = htonl(ep->qp_handle->qp_num);
-       cm->msg.saddr.ib.qp_type = htons(ep->qp_handle->qp_type);
+       cm->msg.saddr.ib.qp_type = ep->qp_handle->qp_type;
        cm->msg.saddr.ib.port_num = cm->hca->port_num;
        cm->msg.saddr.ib.lid = cm->hca->ib_trans.addr.ib.lid; 
        cm->msg.saddr.ib.gid = cm->hca->ib_trans.addr.ib.gid; 
@@ -1254,6 +1302,7 @@ dapls_ib_connect(IN DAT_EP_HANDLE ep_handle,
        /* remote uCM information, comes from consumer provider r_addr */
        cm->msg.dport = htons((uint16_t)r_psp);
        cm->msg.dqpn = cm->msg.daddr.ib.qpn;
+       cm->msg.daddr.ib.qpn = 0; /* don't have a remote qpn until reply */
        
        if (p_size) {
                cm->msg.p_size = htons(p_size);
index 329b050978b6f9693a2c46ed453d195f48fd5a35..243044afa58f5fc327d8ebe096f6d8b8275d8e66 100644 (file)
@@ -281,8 +281,9 @@ found:
        }
 
        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
-                    " open_hca: devname %s, port %d, hostname_IP %s\n",
+                    " open_hca: devname %s, ctx %p port %d, hostname_IP %s\n",
                     ibv_get_device_name(hca_ptr->ib_trans.ib_dev),
+                    hca_ptr->ib_hca_handle,
                     hca_ptr->ib_trans.addr.ib.port_num, 
                     inet_ntoa(((struct sockaddr_in *)
                               &hca_ptr->hca_address)->sin_addr));
@@ -371,72 +372,79 @@ static void ucm_service_destroy(IN DAPL_HCA *hca)
        ib_hca_transport_t *tp = &hca->ib_trans;
        int msg_size = sizeof(ib_cm_msg_t);
 
-       if (tp->pd)\r
-               ibv_dealloc_pd(tp->pd);\r
-\r
-       if (tp->rch)\r
-               ibv_destroy_comp_channel(tp->rch);\r
-\r
-       if (tp->scq)\r
-               ibv_destroy_cq(tp->scq);\r
-\r
-       if (tp->rcq)\r
-               ibv_destroy_cq(tp->rcq);\r
-\r
-       if (tp->qp)\r
-               ibv_destroy_qp(tp->qp);\r
-
        if (tp->mr_sbuf)
                ibv_dereg_mr(tp->mr_sbuf);
 
        if (tp->mr_sbuf)
-               ibv_dereg_mr(tp->mr_sbuf);\r
-\r
-       if (tp->ah)\r
-               dapl_os_free(tp->ah, (sizeof(*tp->ah) * 0xffff));\r
-\r
-       if (tp->sid)\r
-               dapl_os_free(tp->sid, (sizeof(*tp->sid) * 0xffff));\r
-\r
-       if (tp->rbuf)\r
-               dapl_os_free(tp->rbuf, (msg_size * tp->qpe));\r
-\r
-       if (tp->sbuf)\r
-               dapl_os_free(tp->sbuf, (msg_size * tp->qpe));\r
+               ibv_dereg_mr(tp->mr_sbuf);
+
+       if (tp->qp)
+               ibv_destroy_qp(tp->qp);
+
+       if (tp->scq)
+               ibv_destroy_cq(tp->scq);
+
+       if (tp->rcq)
+               ibv_destroy_cq(tp->rcq);
+
+       if (tp->rch)
+               ibv_destroy_comp_channel(tp->rch);
+
+        dapl_log(DAPL_DBG_TYPE_UTIL,
+                        " destroy_service: pd %p ctx %p handle 0x%x\n",
+                         tp->pd, tp->pd->context, tp->pd->handle);
+       if (tp->pd)
+               ibv_dealloc_pd(tp->pd);
+
+       if (tp->ah)
+               dapl_os_free(tp->ah, (sizeof(*tp->ah) * 0xffff));
+
+       if (tp->sid)
+               dapl_os_free(tp->sid, (sizeof(*tp->sid) * 0xffff));
+
+       if (tp->rbuf)
+               dapl_os_free(tp->rbuf, (msg_size * tp->qpe));
+
+       if (tp->sbuf)
+               dapl_os_free(tp->sbuf, (msg_size * tp->qpe));
 }
 
 static int ucm_service_create(IN DAPL_HCA *hca)
 {
-        struct ibv_qp_init_attr qp_create;\r
-       ib_hca_transport_t *tp = &hca->ib_trans;\r
-       struct ibv_recv_wr recv_wr, *recv_err;\r
-        struct ibv_sge sge;\r
-       int i, mlen = sizeof(ib_cm_msg_t);\r
-       int hlen = sizeof(struct ibv_grh); /* hdr included with UD recv */\r
+        struct ibv_qp_init_attr qp_create;
+       ib_hca_transport_t *tp = &hca->ib_trans;
+       struct ibv_recv_wr recv_wr, *recv_err;
+        struct ibv_sge sge;
+       int i, mlen = sizeof(ib_cm_msg_t);
+       int hlen = sizeof(struct ibv_grh); /* hdr included with UD recv */
 
        dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ucm_create: \n");
 
        /* get queue sizes */
        tp->qpe = dapl_os_get_env_val("DAPL_UCM_QPE", UCM_DEFAULT_QPE);
-       tp->cqe = dapl_os_get_env_val("DAPL_UCM_CQE", UCM_DEFAULT_CQE);\r
-       tp->pd = ibv_alloc_pd(hca->ib_hca_handle);\r
-        if (!tp->pd) \r
-                goto bail;\r
-        \r
-       tp->rch = ibv_create_comp_channel(hca->ib_hca_handle);\r
-       if (!tp->rch) \r
-               goto bail;\r
-\r
-       tp->scq = ibv_create_cq(hca->ib_hca_handle, tp->cqe, hca, NULL, 0);\r
-       if (!tp->scq) \r
-               goto bail;\r
+       tp->cqe = dapl_os_get_env_val("DAPL_UCM_CQE", UCM_DEFAULT_CQE);
+       tp->pd = ibv_alloc_pd(hca->ib_hca_handle);
+        if (!tp->pd) 
+                goto bail;
+        
+        dapl_log(DAPL_DBG_TYPE_UTIL,
+                        " create_service: pd %p ctx %p handle 0x%x\n",
+                         tp->pd, tp->pd->context, tp->pd->handle);
+
+       tp->rch = ibv_create_comp_channel(hca->ib_hca_handle);
+       if (!tp->rch) 
+               goto bail;
+
+       tp->scq = ibv_create_cq(hca->ib_hca_handle, tp->cqe, hca, NULL, 0);
+       if (!tp->scq) 
+               goto bail;
         
-       tp->rcq = ibv_create_cq(hca->ib_hca_handle, tp->cqe, hca, tp->rch, 0);\r
-       if (!tp->rcq) \r
-               goto bail;\r
-\r
-       if(ibv_req_notify_cq(tp->rcq, 0))\r
-               goto bail; \r
+       tp->rcq = ibv_create_cq(hca->ib_hca_handle, tp->cqe, hca, tp->rch, 0);
+       if (!tp->rcq) 
+               goto bail;
+
+       if(ibv_req_notify_cq(tp->rcq, 0))
+               goto bail; 
  
        dapl_os_memzero((void *)&qp_create, sizeof(qp_create));
        qp_create.qp_type = IBV_QPT_UD;
@@ -446,59 +454,59 @@ static int ucm_service_create(IN DAPL_HCA *hca)
        qp_create.cap.max_send_sge = qp_create.cap.max_recv_sge = 1;
        qp_create.cap.max_inline_data = tp->max_inline_send;
        qp_create.qp_context = (void *)hca;
-\r
-       tp->qp = ibv_create_qp(tp->pd, &qp_create);\r
-       if (!tp->qp) \r
-                goto bail;\r
-\r
+
+       tp->qp = ibv_create_qp(tp->pd, &qp_create);
+       if (!tp->qp) 
+                goto bail;
+
        tp->ah = (ib_ah_handle_t*) dapl_os_alloc(sizeof(ib_ah_handle_t) * 0xffff);
        tp->sid = (uint8_t*) dapl_os_alloc(sizeof(uint8_t) * 0xffff);
        tp->rbuf = (void*) dapl_os_alloc((mlen + hlen) * tp->qpe);
        tp->sbuf = (void*) dapl_os_alloc(mlen * tp->qpe);
-\r
-       if (!tp->ah || !tp->rbuf || !tp->sbuf || !tp->sid)\r
-               goto bail;\r
-\r
+
+       if (!tp->ah || !tp->rbuf || !tp->sbuf || !tp->sid)
+               goto bail;
+
        (void)dapl_os_memzero(tp->ah, (sizeof(ib_ah_handle_t) * 0xffff));
        (void)dapl_os_memzero(tp->sid, (sizeof(uint8_t) * 0xffff));
        tp->sid[0] = 1; /* resv slot 0, 0 == no ports available */
-       (void)dapl_os_memzero(tp->rbuf, ((mlen + hlen) * tp->qpe));\r
-       (void)dapl_os_memzero(tp->sbuf, (mlen * tp->qpe));\r
-
-       tp->mr_sbuf = ibv_reg_mr(tp->pd, tp->sbuf, \r
-                                (mlen * tp->qpe),\r
-                                IBV_ACCESS_LOCAL_WRITE);\r
-       if (!tp->mr_sbuf)\r
-               goto bail;\r
-\r
-       tp->mr_rbuf = ibv_reg_mr(tp->pd, tp->rbuf, \r
-                                ((mlen + hlen) * tp->qpe),\r
-                                IBV_ACCESS_LOCAL_WRITE);\r
-       if (!tp->mr_rbuf)\r
-               goto bail;\r
-       \r
-       /* modify UD QP: init, rtr, rts */\r
-       if ((dapls_modify_qp_ud(hca, tp->qp)) != DAT_SUCCESS)\r
-               goto bail;\r
-\r
-       /* post receive buffers, setup head, tail pointers */\r
-       recv_wr.next = NULL;\r
-       recv_wr.sg_list = &sge;\r
-       recv_wr.num_sge = 1;\r
-       sge.length = mlen + hlen;\r
-       sge.lkey = tp->mr_rbuf->lkey;\r
-\r
-       for (i = 0; i < tp->qpe; i++) {\r
-               recv_wr.wr_id = \r
-                       (uintptr_t)((char *)&tp->rbuf[i] + \r
-                                   sizeof(struct ibv_grh));\r
-               sge.addr = (uintptr_t) &tp->rbuf[i];\r
-               if (ibv_post_recv(tp->qp, &recv_wr, &recv_err))\r
-                       goto bail;\r
-       }\r
-\r
-       /* save qp_num as part of ia_address, network order */\r
-       tp->addr.ib.qpn = htonl(tp->qp->qp_num);\r
+       (void)dapl_os_memzero(tp->rbuf, ((mlen + hlen) * tp->qpe));
+       (void)dapl_os_memzero(tp->sbuf, (mlen * tp->qpe));
+
+       tp->mr_sbuf = ibv_reg_mr(tp->pd, tp->sbuf, 
+                                (mlen * tp->qpe),
+                                IBV_ACCESS_LOCAL_WRITE);
+       if (!tp->mr_sbuf)
+               goto bail;
+
+       tp->mr_rbuf = ibv_reg_mr(tp->pd, tp->rbuf, 
+                                ((mlen + hlen) * tp->qpe),
+                                IBV_ACCESS_LOCAL_WRITE);
+       if (!tp->mr_rbuf)
+               goto bail;
+       
+       /* modify UD QP: init, rtr, rts */
+       if ((dapls_modify_qp_ud(hca, tp->qp)) != DAT_SUCCESS)
+               goto bail;
+
+       /* post receive buffers, setup head, tail pointers */
+       recv_wr.next = NULL;
+       recv_wr.sg_list = &sge;
+       recv_wr.num_sge = 1;
+       sge.length = mlen + hlen;
+       sge.lkey = tp->mr_rbuf->lkey;
+
+       for (i = 0; i < tp->qpe; i++) {
+               recv_wr.wr_id = 
+                       (uintptr_t)((char *)&tp->rbuf[i] + 
+                                   sizeof(struct ibv_grh));
+               sge.addr = (uintptr_t) &tp->rbuf[i];
+               if (ibv_post_recv(tp->qp, &recv_wr, &recv_err))
+                       goto bail;
+       }
+
+       /* save qp_num as part of ia_address, network order */
+       tp->addr.ib.qpn = htonl(tp->qp->qp_num);
         return 0;
 bail:
        dapl_log(DAPL_DBG_TYPE_ERR,