From d798951391a89e4e6cff99edc821b5d04d49d743 Mon Sep 17 00:00:00 2001 From: aestrin Date: Thu, 2 Oct 2008 23:57:06 +0000 Subject: [PATCH] [ipoib] cm - fixes in CM mostly: move conn state change to RTU callback, and resume send after state changed. - some attempts to fix LSO and optimize send path to coexist with LSO. - disable send chksum and set recv checksum -bypass if cm is enabled (no support in HW yet). git-svn-id: svn://openib.tc.cornell.edu/gen1@1628 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86 --- branches/ipoib_cm/kernel/ipoib_cm.c | 97 +++++++++++++++---------- branches/ipoib_cm/kernel/ipoib_driver.c | 21 ++++-- branches/ipoib_cm/kernel/ipoib_port.c | 88 +++++++++++----------- 3 files changed, 118 insertions(+), 88 deletions(-) diff --git a/branches/ipoib_cm/kernel/ipoib_cm.c b/branches/ipoib_cm/kernel/ipoib_cm.c index 5355eadc..970b256f 100644 --- a/branches/ipoib_cm/kernel/ipoib_cm.c +++ b/branches/ipoib_cm/kernel/ipoib_cm.c @@ -456,27 +456,28 @@ IN ib_cm_req_rec_t *p_cm_req ) goto conn_exit; } - cm_state = endpt_cm_set_state( p_endpt, IPOIB_CM_CONNECT ); - if( cm_state == IPOIB_CM_CONNECTED ) + if( ( cm_state = endpt_cm_get_state( p_endpt ) ) != IPOIB_CM_DISCONNECTED ) { - /* if we've got that far most likely we have stale connection. */ - endpt_cm_set_state( p_endpt, IPOIB_CM_STALE_CONN ); - IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT, - ("Endpoint [%p] IN STATE %d. DESTROY STALE CONNECTION\n", - p_endpt, cm_state ) ); - ipoib_port_remove_endpt( p_port, p_endpt->mac ); - return; - } - if( cm_state != IPOIB_CM_DISCONNECTED ) - { - IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT, - ("Endpoint [%p] IN STATE %d. CAN't ACCEPT CONNECTION REQ\n", - p_endpt, cm_state ) ); - __conn_reject( p_port, p_cm_req->h_cm_req, rej_status ); - return; + if( cm_state == IPOIB_CM_CONNECTED ) + { + /* if we've got that far most likely we have stale connection. */ + endpt_cm_set_state( p_endpt, IPOIB_CM_STALE_CONN ); + __conn_reject( p_port, p_cm_req->h_cm_req, IB_REJ_STALE_CONN ); + IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT, + ("Endpoint [%p] IN STATE %d. DESTROY STALE CONNECTION\n", + p_endpt, cm_state ) ); + ipoib_port_remove_endpt( p_port, p_endpt->mac ); + return; + } + IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT, + ("Endpoint [%p] IN STATE %d. REJECT CONNECTION REQ\n", p_endpt, cm_state ) ); + + rej_status = IB_REJ_INVALID_COMM_INSTANCE; + goto conn_exit; } + endpt_cm_set_state( p_endpt, IPOIB_CM_CONNECT ); if( !p_endpt->conn.h_qp ) { ib_status = endpt_cm_create_qp( p_endpt ); @@ -509,18 +510,7 @@ IN ib_cm_req_rec_t *p_cm_req ) } ib_status = __conn_accept( p_port, p_endpt, p_cm_req ); - if( ib_status == IB_SUCCESS ) - { - IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT, - ("Endpoint [ %p ] CONNECT ACCEPTED\n", p_endpt ) ); - cl_fmap_insert( &p_port->endpt_mgr.conn_endpts, - &p_endpt->dgid, - &p_endpt->conn_item ); - - endpt_cm_set_state( p_endpt, IPOIB_CM_CONNECTED ); - } - conn_exit: if( ib_status != IB_SUCCESS ) @@ -531,19 +521,11 @@ conn_exit: p_port->p_adapter->p_ifc->destroy_qp( p_endpt->conn.h_qp, NULL ); p_endpt->conn.h_qp = NULL; } - } - cl_spinlock_acquire( &p_port->send_lock ); - if( cl_qlist_count( &p_port->send_mgr.pending_list ) ) - { - cl_spinlock_release( &p_port->send_lock ); + ipoib_port_resume( p_port ); } - else - { - cl_spinlock_release( &p_port->send_lock ); - } - IPOIB_EXIT( IPOIB_DBG_ENDPT ); + IPOIB_EXIT( IPOIB_DBG_ENDPT ); return; } @@ -674,9 +656,40 @@ static void __conn_rtu_cb( IN ib_cm_rtu_rec_t *p_rtu_rec ) { + ipoib_endpt_t* p_endpt; + ipoib_port_t* p_port; + IPOIB_ENTER( IPOIB_DBG_ENDPT ); - UNUSED_PARAM( p_rtu_rec ); + + CL_ASSERT( p_rtu_rec ); + p_endpt = (ipoib_endpt_t *)p_rtu_rec->qp_context; + CL_ASSERT( p_endpt ); + p_port = ipoib_endpt_parent( p_endpt ); + CL_ASSERT( p_port ); + + IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT, + ("Endpoint [ %p ] CONNECT ACCEPTED\n", p_endpt ) ); + + cl_obj_lock( &p_port->obj ); + + cl_fmap_insert( &p_port->endpt_mgr.conn_endpts, + &p_endpt->dgid, + &p_endpt->conn_item ); + endpt_cm_set_state( p_endpt, IPOIB_CM_CONNECTED ); + + cl_obj_unlock( &p_port->obj ); + + cl_spinlock_acquire( &p_port->send_lock ); + if( cl_qlist_count( &p_port->send_mgr.pending_list ) ) + { + cl_spinlock_release( &p_port->send_lock ); + ipoib_port_resume( p_port ); + } + else + { + cl_spinlock_release( &p_port->send_lock ); + } IPOIB_EXIT( IPOIB_DBG_ENDPT ); } @@ -685,7 +698,7 @@ static void __conn_rej_cb( IN ib_cm_rej_rec_t *p_rej_rec ) { - ipoib_endpt_t* p_endpt ; + ipoib_endpt_t* p_endpt; ipoib_port_t* p_port; IPOIB_ENTER( IPOIB_DBG_ENDPT ); @@ -973,6 +986,10 @@ __conn_reject( IPOIB_ENTER( IPOIB_DBG_ENDPT ); + IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT, + ("CM REJECT SEND with reason %#x\n", + cl_ntoh16( rej_status ) ) ); + cl_memclr( &cm_rej, sizeof( ib_cm_rej_t ) ); cm_rej.rej_status = IB_REJ_USER_DEFINED; cm_rej.ari_length = sizeof( uint16_t ); diff --git a/branches/ipoib_cm/kernel/ipoib_driver.c b/branches/ipoib_cm/kernel/ipoib_driver.c index 26c282ca..2d20df73 100644 --- a/branches/ipoib_cm/kernel/ipoib_driver.c +++ b/branches/ipoib_cm/kernel/ipoib_driver.c @@ -1444,7 +1444,8 @@ __ipoib_get_tcp_task_offload( NDIS_TASK_OFFLOAD_HEADER *p_offload_hdr; NDIS_TASK_OFFLOAD *p_offload_task; NDIS_TASK_TCP_IP_CHECKSUM *p_offload_chksum; - + csum_flag_t send_csum; + csum_flag_t recv_csum; NDIS_TASK_TCP_LARGE_SEND *p_offload_lso; ULONG buf_len; @@ -1473,6 +1474,16 @@ __ipoib_get_tcp_task_offload( { return NDIS_STATUS_INVALID_DATA; } + if( p_adapter->params.cm_enabled ) + { + send_csum = CSUM_DISABLED; + recv_csum = CSUM_BYPASS; + } + else + { + send_csum = p_adapter->params.send_chksum_offload; + recv_csum = p_adapter->params.recv_chksum_offload; + } p_offload_hdr->OffsetFirstTask = sizeof(NDIS_TASK_OFFLOAD_HEADER); p_offload_task = (NDIS_TASK_OFFLOAD*)(p_offload_hdr + 1); @@ -1483,20 +1494,18 @@ __ipoib_get_tcp_task_offload( p_offload_task->TaskBufferLength = sizeof(NDIS_TASK_TCP_IP_CHECKSUM); p_offload_chksum = (NDIS_TASK_TCP_IP_CHECKSUM*)p_offload_task->TaskBuffer; - + p_offload_chksum->V4Transmit.IpOptionsSupported = p_offload_chksum->V4Transmit.TcpOptionsSupported = p_offload_chksum->V4Transmit.TcpChecksum = p_offload_chksum->V4Transmit.UdpChecksum = - p_offload_chksum->V4Transmit.IpChecksum = - !!p_adapter->params.send_chksum_offload; + p_offload_chksum->V4Transmit.IpChecksum = !!send_csum; p_offload_chksum->V4Receive.IpOptionsSupported = p_offload_chksum->V4Receive.TcpOptionsSupported = p_offload_chksum->V4Receive.TcpChecksum = p_offload_chksum->V4Receive.UdpChecksum = - p_offload_chksum->V4Receive.IpChecksum = - !!p_adapter->params.recv_chksum_offload; + p_offload_chksum->V4Receive.IpChecksum = !!recv_csum; p_offload_chksum->V6Transmit.IpOptionsSupported = FALSE; p_offload_chksum->V6Transmit.TcpOptionsSupported = FALSE; diff --git a/branches/ipoib_cm/kernel/ipoib_port.c b/branches/ipoib_cm/kernel/ipoib_port.c index 5564c3c7..bca2dcd1 100644 --- a/branches/ipoib_cm/kernel/ipoib_port.c +++ b/branches/ipoib_cm/kernel/ipoib_port.c @@ -2640,10 +2640,6 @@ __recv_arp( (*pp_src)->cm_flag = cm_capable; - IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT, - (" ARP RECV'd from ENDPT[%p] CM %#x\n", - (*pp_src), ipoib_addr_get_flags( &p_ib_arp->src_hw ) ) ); - CL_ASSERT( !cl_memcmp( &(*pp_src)->dgid, &p_ib_arp->src_hw.gid, sizeof(ib_gid_t) ) ); CL_ASSERT( ipoib_is_voltaire_router_gid( &(*pp_src)->dgid ) || @@ -2662,13 +2658,21 @@ __recv_arp( ipoib_addr_set_sid( &(*pp_src)->conn.service_id, ipoib_addr_get_qpn( &p_ib_arp->src_hw ) ); - - IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT, - (" ARP REQUEST RECEIVED, ENDPT[%p] state %d CM CAPABLE: %d\n", - *pp_src, endpt_cm_get_state( *pp_src ), cm_capable ) ); } } +#if DBG + + IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT, + ("ARP REQUEST from ENDPT[%p] state %d CM cap: %d QPN: %#x MAC: %02x:%02x:%02x:%02x:%02x:%02x\n", + *pp_src, endpt_cm_get_state( *pp_src ), + ((cm_capable == IPOIB_CM_FLAG_RC)? 1: 0), + cl_ntoh32( ipoib_addr_get_qpn( &p_ib_arp->src_hw ) ), + (*pp_src)->mac.addr[0], (*pp_src)->mac.addr[1], + (*pp_src)->mac.addr[2], (*pp_src)->mac.addr[3], + (*pp_src)->mac.addr[4], (*pp_src)->mac.addr[5] )); +#endif + /* Now swizzle the data. */ p_arp->hw_type = ARP_HW_TYPE_ETH; p_arp->hw_size = sizeof(mac_addr_t); @@ -3963,6 +3967,7 @@ __send_mgr_filter_arp( &p_port->endpt_mgr.conn_lock ); cl_event_signal( &p_port->endpt_mgr.event ); return NDIS_STATUS_PENDING; + case IPOIB_CM_CONNECT: /* queue ARP REP packet until connected */ ExFreeToNPagedLookasideList( @@ -3980,24 +3985,21 @@ __send_mgr_filter_arp( cl_memclr( &p_ib_arp->dst_hw, sizeof(ipoib_hw_addr_t) ); } +#if DBG if( p_port->p_adapter->params.cm_enabled ) { - if( p_arp->op == ARP_OP_REQ ) - { - IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT, - (" ARP REQUEST SEND to ENDPT[%p] CM flag %#x\n", - p_desc->p_endpt, ipoib_addr_get_flags( &p_ib_arp->src_hw ) )); - } - else if( p_arp->op == ARP_OP_REP ) - { - IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT, - ("ARP REPLY to ENDPT[%p] state %d CM flag: %#x MAC %02x:%02x:%02x:%02x:%02x:%02x\n", - p_desc->p_endpt, endpt_cm_get_state( p_desc->p_endpt ), p_desc->p_endpt->cm_flag, - p_desc->p_endpt->mac.addr[0], p_desc->p_endpt->mac.addr[1], - p_desc->p_endpt->mac.addr[2], p_desc->p_endpt->mac.addr[3], - p_desc->p_endpt->mac.addr[4], p_desc->p_endpt->mac.addr[5] ) ); - } + IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT, + (" %s SEND to ENDPT[%p] State: %d flag: %#x, QPN: %#x MAC %02x:%02x:%02x:%02x:%02x:%02x\n", + (( p_arp->op == ARP_OP_REQ )? "ARP REQUEST" : "ARP REPLY" ), + p_desc->p_endpt, + endpt_cm_get_state( p_desc->p_endpt ), + p_desc->p_endpt->cm_flag, + cl_ntoh32( ipoib_addr_get_qpn( &p_ib_arp->dst_hw )), + p_desc->p_endpt->mac.addr[0], p_desc->p_endpt->mac.addr[1], + p_desc->p_endpt->mac.addr[2], p_desc->p_endpt->mac.addr[3], + p_desc->p_endpt->mac.addr[4], p_desc->p_endpt->mac.addr[5] )); } +#endif p_ib_arp->dst_ip = p_arp->dst_ip; @@ -4146,8 +4148,6 @@ __build_send_desc( PktExt = NDIS_PACKET_EXTENSION_FROM_PACKET(p_desc->p_pkt); pChecksumPktInfo = (PNDIS_TCP_IP_CHECKSUM_PACKET_INFO)&PktExt->NdisPacketInfo[TcpIpChecksumPacketInfo]; mss = PtrToUlong(PktExt->NdisPacketInfo[TcpLargeSendPacketInfo]); - //TODO: optimization: we already got total length from NdisGetFirstBufferFromPacketSafe before - NdisQueryPacket(p_desc->p_pkt, (PUINT)&PhysBufCount, NULL, &FirstBuffer,(PUINT)&PacketLength); /* Format the send descriptor. */ hdr_idx = cl_atomic_inc( &p_port->hdr_idx ); @@ -4159,6 +4159,9 @@ __build_send_desc( if( p_port->p_adapter->params.lso && mss ) { memset(&TheLsoData, 0, sizeof TheLsoData ); + //TODO: optimization: we already got total length from NdisGetFirstBufferFromPacketSafe before + NdisQueryPacket(p_desc->p_pkt, (PUINT)&PhysBufCount, NULL, &FirstBuffer,(PUINT)&PacketLength); + status = GetLsoHeaderSize( p_port, FirstBuffer, @@ -4213,30 +4216,31 @@ __build_send_desc( /* Setup the work request. */ p_desc->wr.p_next = NULL; p_desc->wr.wr_id = (uintn_t)p_desc->p_pkt; - - if(p_port->p_adapter->params.send_chksum_offload && - (pChecksumPktInfo->Transmit.NdisPacketChecksumV4 || pChecksumPktInfo->Transmit.NdisPacketChecksumV6)) - { - // Set transimition checksum offloading - if (pChecksumPktInfo->Transmit.NdisPacketIpChecksum) - { - p_desc->wr.send_opt |= IB_SEND_OPT_TX_IP_CSUM; - } - if(pChecksumPktInfo->Transmit.NdisPacketTcpChecksum ) - { - p_desc->wr.send_opt |= IB_SEND_OPT_TX_TCP_UDP_CSUM; - } - } - p_desc->wr.ds_array = p_desc->local_ds; + /* check if we can send data through RC QP */ if( ETH_IS_UNICAST( p_eth_hdr->dst.addr ) && endpt_cm_get_state( p_desc->p_endpt) == IPOIB_CM_CONNECTED ) { p_desc->send_qp = p_desc->p_endpt->conn.h_qp; } - else + else // UD QP { + if( p_port->p_adapter->params.send_chksum_offload && + ( pChecksumPktInfo->Transmit.NdisPacketChecksumV4 || + pChecksumPktInfo->Transmit.NdisPacketChecksumV6 )) + { + // Set transimition checksum offloading + if( pChecksumPktInfo->Transmit.NdisPacketIpChecksum ) + { + p_desc->wr.send_opt |= IB_SEND_OPT_TX_IP_CSUM; + } + if( pChecksumPktInfo->Transmit.NdisPacketTcpChecksum ) + { + p_desc->wr.send_opt |= IB_SEND_OPT_TX_TCP_UDP_CSUM; + } + } + p_desc->send_qp = p_port->ib_mgr.h_qp; p_desc->wr.dgrm.ud.remote_qp = p_desc->p_endpt->qpn; p_desc->wr.dgrm.ud.remote_qkey = p_port->ib_mgr.bcast_rec.qkey; @@ -6698,7 +6702,7 @@ NDIS_STATUS GetLsoHeaderSize( TcpHdr = (tcp_hdr_t UNALIGNED *)pSrc; TcpHeaderLen = TCP_HEADER_LENGTH(TcpHdr); - ASSERT(TcpHeaderLen == 20); + //ASSERT(TcpHeaderLen == 20); if (CurrLength < TcpHeaderLen) { //IPOIB_PRINT(TRACE_LEVEL_VERBOSE, ETH, ("Error porcessing packets\n")); -- 2.46.0