]> git.openfabrics.org - ~shefty/rdma-win.git/commitdiff
[ipoib] cm
authoraestrin <aestrin@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Thu, 2 Oct 2008 23:57:06 +0000 (23:57 +0000)
committeraestrin <aestrin@ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86>
Thu, 2 Oct 2008 23:57:06 +0000 (23:57 +0000)
- fixes in CM mostly: move conn state change to RTU callback, and resume send after state changed.
- some attempts to fix LSO and optimize send path to coexist with LSO.
- disable send chksum and set recv checksum -bypass if cm is enabled (no support in HW yet).

git-svn-id: svn://openib.tc.cornell.edu/gen1@1628 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86

branches/ipoib_cm/kernel/ipoib_cm.c
branches/ipoib_cm/kernel/ipoib_driver.c
branches/ipoib_cm/kernel/ipoib_port.c

index 5355eadcd3929e7bfb35c402d3a9e1a9a6f505e6..970b256fc0662500b20e4f4b17866d84dc4c243d 100644 (file)
@@ -456,27 +456,28 @@ IN                        ib_cm_req_rec_t                         *p_cm_req )
                goto conn_exit;\r
        }\r
 \r
-       cm_state = endpt_cm_set_state( p_endpt, IPOIB_CM_CONNECT );\r
-       if( cm_state == IPOIB_CM_CONNECTED )\r
+       if( ( cm_state = endpt_cm_get_state( p_endpt ) ) != IPOIB_CM_DISCONNECTED )\r
        {\r
-               /* if we've got that far most likely we have stale connection. */\r
-               endpt_cm_set_state( p_endpt, IPOIB_CM_STALE_CONN );\r
-               IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT,\r
-                       ("Endpoint [%p] IN STATE %d. DESTROY STALE CONNECTION\n",\r
-                       p_endpt, cm_state ) );\r
-               ipoib_port_remove_endpt( p_port, p_endpt->mac );\r
-               return;\r
-       }\r
-       if( cm_state != IPOIB_CM_DISCONNECTED )\r
-       {\r
-               IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT,\r
-                       ("Endpoint [%p] IN STATE %d. CAN't ACCEPT CONNECTION REQ\n",\r
-                       p_endpt, cm_state ) );\r
-               __conn_reject( p_port, p_cm_req->h_cm_req, rej_status );\r
-               return;\r
+               if( cm_state == IPOIB_CM_CONNECTED )\r
+               {\r
+                       /* if we've got that far most likely we have stale connection. */\r
+                       endpt_cm_set_state( p_endpt, IPOIB_CM_STALE_CONN );\r
+                       __conn_reject( p_port, p_cm_req->h_cm_req, IB_REJ_STALE_CONN );\r
+                       IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT,\r
+                               ("Endpoint [%p] IN STATE %d. DESTROY STALE CONNECTION\n",\r
+                               p_endpt, cm_state ) );\r
+                       ipoib_port_remove_endpt( p_port, p_endpt->mac );\r
+                       return;\r
+               }\r
 \r
+               IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT,\r
+               ("Endpoint [%p] IN STATE %d. REJECT CONNECTION REQ\n", p_endpt, cm_state ) );\r
+               \r
+               rej_status = IB_REJ_INVALID_COMM_INSTANCE;\r
+               goto conn_exit;\r
        }\r
 \r
+       endpt_cm_set_state( p_endpt, IPOIB_CM_CONNECT );\r
        if( !p_endpt->conn.h_qp )\r
        {\r
                ib_status = endpt_cm_create_qp( p_endpt );\r
@@ -509,18 +510,7 @@ IN                 ib_cm_req_rec_t                         *p_cm_req )
        }\r
        \r
        ib_status = __conn_accept( p_port, p_endpt, p_cm_req );\r
-       if( ib_status == IB_SUCCESS )\r
-       {\r
-               IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT,\r
-                       ("Endpoint [ %p ] CONNECT ACCEPTED\n", p_endpt ) );\r
 \r
-               cl_fmap_insert( &p_port->endpt_mgr.conn_endpts, \r
-                                       &p_endpt->dgid, \r
-                                       &p_endpt->conn_item );\r
-\r
-               endpt_cm_set_state( p_endpt, IPOIB_CM_CONNECTED );\r
-       }\r
-       \r
 conn_exit:\r
 \r
        if( ib_status != IB_SUCCESS )\r
@@ -531,19 +521,11 @@ conn_exit:
                        p_port->p_adapter->p_ifc->destroy_qp( p_endpt->conn.h_qp, NULL );\r
                        p_endpt->conn.h_qp = NULL;\r
                }\r
-       }\r
-       cl_spinlock_acquire( &p_port->send_lock );\r
-       if( cl_qlist_count( &p_port->send_mgr.pending_list ) )\r
-       {\r
-               cl_spinlock_release( &p_port->send_lock );\r
+       \r
                ipoib_port_resume( p_port );\r
        }\r
-       else\r
-       {\r
-               cl_spinlock_release( &p_port->send_lock );\r
-       }\r
-       IPOIB_EXIT( IPOIB_DBG_ENDPT );\r
 \r
+       IPOIB_EXIT( IPOIB_DBG_ENDPT );\r
        return;\r
 }\r
 \r
@@ -674,9 +656,40 @@ static void
 __conn_rtu_cb(\r
 IN                             ib_cm_rtu_rec_t                         *p_rtu_rec )\r
 {\r
+       ipoib_endpt_t*  p_endpt;\r
+       ipoib_port_t*   p_port;\r
+\r
        IPOIB_ENTER( IPOIB_DBG_ENDPT );\r
-       UNUSED_PARAM( p_rtu_rec );\r
+       \r
+       CL_ASSERT( p_rtu_rec );\r
+       p_endpt = (ipoib_endpt_t *)p_rtu_rec->qp_context;\r
+       CL_ASSERT( p_endpt );\r
+       p_port = ipoib_endpt_parent( p_endpt );\r
+       CL_ASSERT( p_port );\r
+\r
+       IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT,\r
+               ("Endpoint [ %p ] CONNECT ACCEPTED\n", p_endpt ) );\r
+\r
+       cl_obj_lock( &p_port->obj );\r
+\r
+       cl_fmap_insert( &p_port->endpt_mgr.conn_endpts, \r
+                                       &p_endpt->dgid, \r
+                                       &p_endpt->conn_item );\r
 \r
+       endpt_cm_set_state( p_endpt, IPOIB_CM_CONNECTED );\r
+\r
+       cl_obj_unlock( &p_port->obj );\r
+\r
+       cl_spinlock_acquire( &p_port->send_lock );\r
+       if( cl_qlist_count( &p_port->send_mgr.pending_list ) )\r
+       {\r
+               cl_spinlock_release( &p_port->send_lock );\r
+               ipoib_port_resume( p_port );\r
+       }\r
+       else\r
+       {\r
+               cl_spinlock_release( &p_port->send_lock );\r
+       }\r
        IPOIB_EXIT( IPOIB_DBG_ENDPT );\r
 }\r
 \r
@@ -685,7 +698,7 @@ static void
 __conn_rej_cb(\r
        IN              ib_cm_rej_rec_t                 *p_rej_rec )\r
 {\r
-       ipoib_endpt_t*  p_endpt ;\r
+       ipoib_endpt_t*  p_endpt;\r
        ipoib_port_t*   p_port;\r
 \r
        IPOIB_ENTER( IPOIB_DBG_ENDPT );\r
@@ -973,6 +986,10 @@ __conn_reject(
 \r
        IPOIB_ENTER( IPOIB_DBG_ENDPT );\r
 \r
+       IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT,\r
+                       ("CM REJECT SEND with reason %#x\n", \r
+                                                               cl_ntoh16( rej_status ) ) );\r
+\r
        cl_memclr( &cm_rej, sizeof( ib_cm_rej_t ) );\r
        cm_rej.rej_status = IB_REJ_USER_DEFINED;\r
        cm_rej.ari_length = sizeof( uint16_t );\r
index 26c282ca718d978526a91c2d8ac499c50d884619..2d20df73634be6f8d5175d65997b1dea5abe7c97 100644 (file)
@@ -1444,7 +1444,8 @@ __ipoib_get_tcp_task_offload(
        NDIS_TASK_OFFLOAD_HEADER        *p_offload_hdr;\r
        NDIS_TASK_OFFLOAD                       *p_offload_task;\r
        NDIS_TASK_TCP_IP_CHECKSUM       *p_offload_chksum;\r
-\r
+       csum_flag_t                                     send_csum;\r
+       csum_flag_t                                     recv_csum;\r
        NDIS_TASK_TCP_LARGE_SEND        *p_offload_lso;\r
        ULONG                                           buf_len;\r
 \r
@@ -1473,6 +1474,16 @@ __ipoib_get_tcp_task_offload(
        {\r
                return NDIS_STATUS_INVALID_DATA;\r
        }\r
+       if( p_adapter->params.cm_enabled )\r
+       {\r
+               send_csum = CSUM_DISABLED;\r
+               recv_csum = CSUM_BYPASS;\r
+       }\r
+       else\r
+       {\r
+               send_csum = p_adapter->params.send_chksum_offload;\r
+               recv_csum = p_adapter->params.recv_chksum_offload;\r
+       }\r
 \r
        p_offload_hdr->OffsetFirstTask = sizeof(NDIS_TASK_OFFLOAD_HEADER);\r
        p_offload_task = (NDIS_TASK_OFFLOAD*)(p_offload_hdr + 1);\r
@@ -1483,20 +1494,18 @@ __ipoib_get_tcp_task_offload(
        p_offload_task->TaskBufferLength = sizeof(NDIS_TASK_TCP_IP_CHECKSUM);\r
        p_offload_chksum =\r
                (NDIS_TASK_TCP_IP_CHECKSUM*)p_offload_task->TaskBuffer;\r
-\r
+       \r
        p_offload_chksum->V4Transmit.IpOptionsSupported =\r
        p_offload_chksum->V4Transmit.TcpOptionsSupported =\r
        p_offload_chksum->V4Transmit.TcpChecksum =\r
        p_offload_chksum->V4Transmit.UdpChecksum =\r
-       p_offload_chksum->V4Transmit.IpChecksum =\r
-               !!p_adapter->params.send_chksum_offload;\r
+       p_offload_chksum->V4Transmit.IpChecksum = !!send_csum;\r
 \r
        p_offload_chksum->V4Receive.IpOptionsSupported =\r
        p_offload_chksum->V4Receive.TcpOptionsSupported =\r
        p_offload_chksum->V4Receive.TcpChecksum =\r
        p_offload_chksum->V4Receive.UdpChecksum =\r
-       p_offload_chksum->V4Receive.IpChecksum =\r
-               !!p_adapter->params.recv_chksum_offload;\r
+       p_offload_chksum->V4Receive.IpChecksum = !!recv_csum;\r
 \r
        p_offload_chksum->V6Transmit.IpOptionsSupported = FALSE;\r
        p_offload_chksum->V6Transmit.TcpOptionsSupported = FALSE;\r
index 5564c3c7b60d107fdf6c529dd2855b314c4c6b5e..bca2dcd19a2af5526afcf2476dcaf3042425898f 100644 (file)
@@ -2640,10 +2640,6 @@ __recv_arp(
 \r
        (*pp_src)->cm_flag = cm_capable;\r
 \r
-       IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT,\r
-                               (" ARP RECV'd from ENDPT[%p] CM %#x\n",\r
-                               (*pp_src), ipoib_addr_get_flags( &p_ib_arp->src_hw ) ) );\r
-\r
        CL_ASSERT( !cl_memcmp(\r
                &(*pp_src)->dgid, &p_ib_arp->src_hw.gid, sizeof(ib_gid_t) ) );\r
        CL_ASSERT( ipoib_is_voltaire_router_gid( &(*pp_src)->dgid ) ||\r
@@ -2662,13 +2658,21 @@ __recv_arp(
                        ipoib_addr_set_sid( \r
                                &(*pp_src)->conn.service_id,\r
                                ipoib_addr_get_qpn( &p_ib_arp->src_hw ) );\r
-\r
-                       IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT,\r
-                               (" ARP REQUEST RECEIVED, ENDPT[%p] state %d CM CAPABLE: %d\n",\r
-                               *pp_src, endpt_cm_get_state( *pp_src ), cm_capable ) );\r
                }\r
        }\r
 \r
+#if DBG\r
+       \r
+       IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT,\r
+               ("ARP REQUEST from ENDPT[%p] state %d CM cap: %d QPN: %#x MAC: %02x:%02x:%02x:%02x:%02x:%02x\n",\r
+                       *pp_src, endpt_cm_get_state( *pp_src ), \r
+                       ((cm_capable == IPOIB_CM_FLAG_RC)? 1: 0),\r
+                       cl_ntoh32( ipoib_addr_get_qpn( &p_ib_arp->src_hw ) ),\r
+                       (*pp_src)->mac.addr[0], (*pp_src)->mac.addr[1],\r
+                       (*pp_src)->mac.addr[2], (*pp_src)->mac.addr[3],\r
+                       (*pp_src)->mac.addr[4], (*pp_src)->mac.addr[5] ));\r
+#endif\r
+\r
        /* Now swizzle the data. */\r
        p_arp->hw_type = ARP_HW_TYPE_ETH;\r
        p_arp->hw_size = sizeof(mac_addr_t);\r
@@ -3963,6 +3967,7 @@ __send_mgr_filter_arp(
                                                                                                &p_port->endpt_mgr.conn_lock );\r
                                        cl_event_signal( &p_port->endpt_mgr.event );\r
                                        return NDIS_STATUS_PENDING;\r
+                       \r
                        case IPOIB_CM_CONNECT:\r
                                /* queue ARP REP packet until connected */\r
                                        ExFreeToNPagedLookasideList(\r
@@ -3980,24 +3985,21 @@ __send_mgr_filter_arp(
                cl_memclr( &p_ib_arp->dst_hw, sizeof(ipoib_hw_addr_t) );\r
        }\r
 \r
+#if DBG\r
        if( p_port->p_adapter->params.cm_enabled )\r
        {\r
-               if( p_arp->op == ARP_OP_REQ )\r
-               {\r
-                       IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT,\r
-                               (" ARP REQUEST SEND to ENDPT[%p] CM flag %#x\n",\r
-                               p_desc->p_endpt, ipoib_addr_get_flags( &p_ib_arp->src_hw ) ));\r
-               }\r
-               else if( p_arp->op == ARP_OP_REP )\r
-               {\r
-                       IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT,\r
-                               ("ARP REPLY to ENDPT[%p] state %d CM flag: %#x MAC %02x:%02x:%02x:%02x:%02x:%02x\n",\r
-                               p_desc->p_endpt, endpt_cm_get_state( p_desc->p_endpt ), p_desc->p_endpt->cm_flag,\r
-                               p_desc->p_endpt->mac.addr[0], p_desc->p_endpt->mac.addr[1],\r
-                               p_desc->p_endpt->mac.addr[2], p_desc->p_endpt->mac.addr[3],\r
-                               p_desc->p_endpt->mac.addr[4], p_desc->p_endpt->mac.addr[5] ) );\r
-               }\r
+               IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT,\r
+               (" %s SEND to ENDPT[%p] State: %d flag: %#x, QPN: %#x MAC %02x:%02x:%02x:%02x:%02x:%02x\n",\r
+                       (( p_arp->op == ARP_OP_REQ )? "ARP REQUEST" : "ARP REPLY" ),\r
+                       p_desc->p_endpt, \r
+                       endpt_cm_get_state( p_desc->p_endpt ),\r
+                       p_desc->p_endpt->cm_flag, \r
+                       cl_ntoh32( ipoib_addr_get_qpn( &p_ib_arp->dst_hw )),\r
+                       p_desc->p_endpt->mac.addr[0], p_desc->p_endpt->mac.addr[1],\r
+                       p_desc->p_endpt->mac.addr[2], p_desc->p_endpt->mac.addr[3],\r
+                       p_desc->p_endpt->mac.addr[4], p_desc->p_endpt->mac.addr[5] ));\r
        }\r
+#endif\r
 \r
        p_ib_arp->dst_ip = p_arp->dst_ip;\r
 \r
@@ -4146,8 +4148,6 @@ __build_send_desc(
        PktExt = NDIS_PACKET_EXTENSION_FROM_PACKET(p_desc->p_pkt);\r
        pChecksumPktInfo = (PNDIS_TCP_IP_CHECKSUM_PACKET_INFO)&PktExt->NdisPacketInfo[TcpIpChecksumPacketInfo];\r
        mss = PtrToUlong(PktExt->NdisPacketInfo[TcpLargeSendPacketInfo]);\r
-       //TODO: optimization: we already got total length from NdisGetFirstBufferFromPacketSafe before\r
-       NdisQueryPacket(p_desc->p_pkt, (PUINT)&PhysBufCount, NULL, &FirstBuffer,(PUINT)&PacketLength);\r
 \r
        /* Format the send descriptor. */\r
        hdr_idx = cl_atomic_inc( &p_port->hdr_idx );\r
@@ -4159,6 +4159,9 @@ __build_send_desc(
        if( p_port->p_adapter->params.lso && mss )\r
        {\r
                memset(&TheLsoData, 0, sizeof TheLsoData );\r
+               //TODO: optimization: we already got total length from NdisGetFirstBufferFromPacketSafe before\r
+               NdisQueryPacket(p_desc->p_pkt, (PUINT)&PhysBufCount, NULL, &FirstBuffer,(PUINT)&PacketLength);\r
+\r
                status = GetLsoHeaderSize(\r
                        p_port,\r
                        FirstBuffer, \r
@@ -4213,30 +4216,31 @@ __build_send_desc(
        /* Setup the work request. */\r
        p_desc->wr.p_next = NULL;\r
        p_desc->wr.wr_id = (uintn_t)p_desc->p_pkt;\r
-\r
-       if(p_port->p_adapter->params.send_chksum_offload && \r
-               (pChecksumPktInfo->Transmit.NdisPacketChecksumV4 || pChecksumPktInfo->Transmit.NdisPacketChecksumV6))\r
-       {\r
-               // Set transimition checksum offloading \r
-               if (pChecksumPktInfo->Transmit.NdisPacketIpChecksum) \r
-               {\r
-                       p_desc->wr.send_opt |= IB_SEND_OPT_TX_IP_CSUM;\r
-               }\r
-               if(pChecksumPktInfo->Transmit.NdisPacketTcpChecksum  ) \r
-               {\r
-                       p_desc->wr.send_opt |= IB_SEND_OPT_TX_TCP_UDP_CSUM;\r
-               }\r
-       }\r
-       \r
        p_desc->wr.ds_array = p_desc->local_ds;\r
 \r
+       /* check if we can send data through RC QP */\r
        if( ETH_IS_UNICAST( p_eth_hdr->dst.addr ) &&  \r
                endpt_cm_get_state( p_desc->p_endpt) == IPOIB_CM_CONNECTED )\r
        {\r
                p_desc->send_qp = p_desc->p_endpt->conn.h_qp;\r
        }\r
-       else\r
+       else // UD QP\r
        {\r
+               if( p_port->p_adapter->params.send_chksum_offload && \r
+                       ( pChecksumPktInfo->Transmit.NdisPacketChecksumV4 || \r
+                         pChecksumPktInfo->Transmit.NdisPacketChecksumV6 ))\r
+               {\r
+                       // Set transimition checksum offloading \r
+                       if( pChecksumPktInfo->Transmit.NdisPacketIpChecksum )\r
+                       {\r
+                               p_desc->wr.send_opt |= IB_SEND_OPT_TX_IP_CSUM;\r
+                       }\r
+                       if( pChecksumPktInfo->Transmit.NdisPacketTcpChecksum )\r
+                       {\r
+                               p_desc->wr.send_opt |= IB_SEND_OPT_TX_TCP_UDP_CSUM;\r
+                       }\r
+               }\r
+\r
                p_desc->send_qp = p_port->ib_mgr.h_qp;\r
                p_desc->wr.dgrm.ud.remote_qp = p_desc->p_endpt->qpn;\r
                p_desc->wr.dgrm.ud.remote_qkey = p_port->ib_mgr.bcast_rec.qkey;\r
@@ -6698,7 +6702,7 @@ NDIS_STATUS GetLsoHeaderSize(
        TcpHdr = (tcp_hdr_t UNALIGNED *)pSrc;\r
        TcpHeaderLen = TCP_HEADER_LENGTH(TcpHdr);\r
 \r
-       ASSERT(TcpHeaderLen == 20);\r
+       //ASSERT(TcpHeaderLen == 20);\r
        \r
        if (CurrLength < TcpHeaderLen) {\r
                //IPOIB_PRINT(TRACE_LEVEL_VERBOSE, ETH, ("Error porcessing packets\n"));\r