From fb3cb9544160fa8c09b5bb300a21eeb7f7cf1f02 Mon Sep 17 00:00:00 2001 From: aestrin Date: Wed, 22 Oct 2008 20:38:50 +0000 Subject: [PATCH] [ipoib] cm major merge with trunk. -also included "local_mad" and "avoid SM" patches. - conn request changed to utilize path record locally generated. git-svn-id: svn://openib.tc.cornell.edu/gen1@1685 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86 --- branches/ipoib_cm/kernel/ipoib_adapter.c | 61 ++- branches/ipoib_cm/kernel/ipoib_adapter.h | 13 +- branches/ipoib_cm/kernel/ipoib_cm.c | 18 +- branches/ipoib_cm/kernel/ipoib_debug.h | 44 +- branches/ipoib_cm/kernel/ipoib_driver.c | 98 +--- branches/ipoib_cm/kernel/ipoib_driver.h | 12 +- branches/ipoib_cm/kernel/ipoib_endpoint.c | 172 ++----- branches/ipoib_cm/kernel/ipoib_endpoint.h | 3 - branches/ipoib_cm/kernel/ipoib_ibat.c | 80 ++++ branches/ipoib_cm/kernel/ipoib_port.c | 556 +++++++++++++--------- branches/ipoib_cm/kernel/ipoib_port.h | 18 +- branches/ipoib_cm/kernel/ipoib_xfr_mgr.h | 229 +++++---- 12 files changed, 712 insertions(+), 592 deletions(-) diff --git a/branches/ipoib_cm/kernel/ipoib_adapter.c b/branches/ipoib_cm/kernel/ipoib_adapter.c index a70f0274..c51d9474 100644 --- a/branches/ipoib_cm/kernel/ipoib_adapter.c +++ b/branches/ipoib_cm/kernel/ipoib_adapter.c @@ -121,7 +121,9 @@ ipoib_get_adapter_guids( NDIS_STATUS ipoib_get_adapter_params( IN NDIS_HANDLE* const wrapper_config_context, - IN OUT ipoib_adapter_t *p_adapter ); + IN OUT ipoib_adapter_t *p_adapter, + OUT PUCHAR *p_mac, + OUT UINT *p_len ); /* Implementation */ @@ -134,6 +136,8 @@ ipoib_create_adapter( ipoib_adapter_t *p_adapter; ib_api_status_t status; cl_status_t cl_status; + PUCHAR mac; + UINT len; IPOIB_ENTER( IPOIB_DBG_INIT ); @@ -185,6 +189,17 @@ ipoib_create_adapter( return IB_ERROR; } + /* Read configuration parameters. */ + status = ipoib_get_adapter_params( wrapper_config_context, + p_adapter , &mac, &len); + if( status != NDIS_STATUS_SUCCESS ) + { + cl_obj_destroy( &p_adapter->obj ); + IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, + ("ipoib_get_adapter_params returned 0x%.8x.\n", status) ); + return status; + } + status = adapter_init( p_adapter ); if( status != IB_SUCCESS ) { @@ -195,15 +210,23 @@ ipoib_create_adapter( return status; } - /* Read configuration parameters. */ - status = ipoib_get_adapter_params( wrapper_config_context, - p_adapter ); - if( status != NDIS_STATUS_SUCCESS ) + ETH_COPY_NETWORK_ADDRESS( p_adapter->params.conf_mac.addr, p_adapter->mac.addr ); + /* If there is a NetworkAddress override in registry, use it */ + if( (status == NDIS_STATUS_SUCCESS) && (len == HW_ADDR_LEN) ) { - cl_obj_destroy( &p_adapter->obj ); - IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, - ("ipoib_get_adapter_params returned 0x%.8x.\n", status) ); - return status; + if( ETH_IS_MULTICAST(mac) || ETH_IS_BROADCAST(mac) || + !ETH_IS_LOCALLY_ADMINISTERED(mac) ) + { + IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_INIT, + ("Overriding NetworkAddress is invalid - " + "%02x-%02x-%02x-%02x-%02x-%02x\n", + mac[0], mac[1], mac[2], + mac[3], mac[4], mac[5]) ); + } + else + { + ETH_COPY_NETWORK_ADDRESS( p_adapter->params.conf_mac.addr, mac ); + } } *pp_adapter = p_adapter; @@ -283,7 +306,7 @@ adapter_construct( cl_perf_construct( &p_adapter->perf ); p_adapter->state = IB_PNP_PORT_ADD; - p_adapter->rate = FOUR_X_IN_100BPS; + p_adapter->port_rate = FOUR_X_IN_100BPS; } @@ -341,20 +364,24 @@ adapter_init( return IB_ERROR; } - /* Validate the port GUID and generate the MAC address. */ status = - ipoib_mac_from_guid( p_adapter->guids.port_guid.guid, &p_adapter->mac ); + ipoib_mac_from_guid( p_adapter->guids.port_guid.guid, p_adapter->params.guid_mask, &p_adapter->mac); if( status != IB_SUCCESS ) { + if( status == IB_INVALID_GUID_MASK ) + { + IPOIB_PRINT( TRACE_LEVEL_WARNING, IPOIB_DBG_ERROR, + ("Invalid GUID mask received, rejecting it") ); + ipoib_create_log(p_adapter->h_adapter, GUID_MASK_LOG_INDEX, EVENT_IPOIB_WRONG_PARAMETER_WRN); + } + IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("ipoib_mac_from_guid returned %s\n", p_adapter->p_ifc->get_err_str( status )) ); return status; } - - /* Open AL. */ status = p_adapter->p_ifc->open_al( &p_adapter->h_al ); if( status != IB_SUCCESS ) @@ -1038,7 +1065,7 @@ ipoib_set_rate( rate = 0; } - p_adapter->rate = rate; + p_adapter->port_rate = rate; IPOIB_EXIT( IPOIB_DBG_INIT ); } @@ -1097,8 +1124,8 @@ ipoib_set_active( */ IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT, ("Link UP!\n") ); NdisWriteErrorLogEntry( p_adapter->h_adapter, - EVENT_IPOIB_PORT_UP + (p_adapter->rate/ONE_X_IN_100BPS), - 1, p_adapter->rate ); + EVENT_IPOIB_PORT_UP + (p_adapter->port_rate/ONE_X_IN_100BPS), + 1, p_adapter->port_rate ); if( !p_adapter->reset ) { diff --git a/branches/ipoib_cm/kernel/ipoib_adapter.h b/branches/ipoib_cm/kernel/ipoib_adapter.h index 70b5af37..34e1d598 100644 --- a/branches/ipoib_cm/kernel/ipoib_adapter.h +++ b/branches/ipoib_cm/kernel/ipoib_adapter.h @@ -82,6 +82,8 @@ typedef struct _ipoib_params uint32_t xfer_block_size; mac_addr_t conf_mac; uint32_t mc_leave_rescan; + uint32_t guid_mask; + uint32_t bc_join_retry; boolean_t cm_enabled; uint32_t cm_payload_mtu; uint32_t cm_xfer_block_size; @@ -99,12 +101,11 @@ typedef struct _ipoib_params * Number of send WQEs to allocate. * * send_chksum_offload -* Flag to indicate whether to offload send checksums. This will make it -* so that IPoIB packets should never be forwarded out of the IB subnet -* without recalculating the checksum. -* * recv_chksum_offload -* Flag to indicate whether to offload recv checksums. +* Flags to indicate whether to offload send/recv checksums. +* 0 - No hardware cheksum +* 1 - Try to offload if the device support it +* 2 - Always report success (checksum bypass) * * wsdp_enabled * Flag to indicate whether WSDP is enabled for an adapter adapter. @@ -175,7 +176,7 @@ typedef struct _ipoib_adapter struct _ipoib_port *p_port; - uint32_t rate; + uint32_t port_rate; ipoib_params_t params; cl_spinlock_t recv_stat_lock; diff --git a/branches/ipoib_cm/kernel/ipoib_cm.c b/branches/ipoib_cm/kernel/ipoib_cm.c index 3922b6e3..3cdadca3 100644 --- a/branches/ipoib_cm/kernel/ipoib_cm.c +++ b/branches/ipoib_cm/kernel/ipoib_cm.c @@ -229,6 +229,7 @@ ipoib_endpt_connect( ib_api_status_t ib_status = IB_SUCCESS; ib_cm_req_t conn_req; ipoib_port_t* p_port; + ib_path_rec_t path_rec; IPOIB_ENTER( IPOIB_DBG_ENDPT ); @@ -252,6 +253,7 @@ ipoib_endpt_connect( cl_ntoh32( p_endpt->qpn ), cl_ntoh64( p_endpt->conn.service_id ) ) ); cl_memclr( &conn_req, sizeof(ib_cm_req_t) ); + cl_memclr( &path_rec, sizeof(ib_path_rec_t) ); p_endpt->conn.private_data.ud_qpn = p_port->ib_mgr.qpn; p_endpt->conn.private_data.recv_mtu = @@ -264,16 +266,26 @@ ipoib_endpt_connect( conn_req.p_req_pdata = (uint8_t *)&p_endpt->conn.private_data; conn_req.req_length = sizeof( cm_private_data_t ); conn_req.svc_id = p_endpt->conn.service_id; - conn_req.p_primary_path = (ib_path_rec_t*)&p_endpt->conn.path_rec; + + if( ipoib_endpt_get_path( p_port, + p_endpt, + &path_rec ) != STATUS_SUCCESS ) + { + IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, + ("ipoib_endpt_get_path failed\n" ) ); + return IB_INVALID_PARAMETER; + } + + conn_req.p_primary_path = (ib_path_rec_t*)&path_rec; conn_req.retry_cnt = 3; conn_req.rnr_nak_timeout = 22; /* 20 ms */ conn_req.rnr_retry_cnt = 3; conn_req.max_cm_retries = 5; conn_req.remote_resp_timeout = - (ib_path_rec_pkt_life( &p_endpt->conn.path_rec ) + 1) & 0x1f; + (ib_path_rec_pkt_life( &path_rec ) + 1) & 0x1f; conn_req.local_resp_timeout = - (ib_path_rec_pkt_life( &p_endpt->conn.path_rec ) + 1 ) & 0x1f; + (ib_path_rec_pkt_life( &path_rec ) + 1 ) & 0x1f; conn_req.flow_ctrl = FALSE; // srq attached qp does not support FC diff --git a/branches/ipoib_cm/kernel/ipoib_debug.h b/branches/ipoib_cm/kernel/ipoib_debug.h index 6dfa54b6..417bb686 100644 --- a/branches/ipoib_cm/kernel/ipoib_debug.h +++ b/branches/ipoib_cm/kernel/ipoib_debug.h @@ -274,28 +274,28 @@ enum ref_cnt_buckets ref_array_size, /* Used to size the array of ref buckets. */ ref_mask = 100, /* Used to differentiate derefs. */ - ref_failed_recv_wc = (100 + ref_get_recv), - ref_recv_inv_len = (200 + ref_get_recv), - ref_recv_loopback = (300 + ref_get_recv), - ref_recv_filter = (400 + ref_get_recv), - - ref_bcast_get_cb = (100 + ref_get_bcast), - - ref_join_bcast = (100 + ref_bcast), - ref_create_bcast = (200 + ref_bcast), - ref_bcast_inv_state = (300 + ref_bcast), - ref_bcast_req_failed = (400 + ref_bcast), - ref_bcast_error = (500 + ref_bcast), - ref_bcast_join_failed = (600 + ref_bcast), - ref_bcast_create_failed = (700 + ref_bcast), - - ref_mcast_inv_state = (100 + ref_join_mcast), - ref_mcast_req_failed = (200 + ref_join_mcast), - ref_mcast_no_endpt = (300 + ref_join_mcast), - ref_mcast_av_failed = (400 + ref_join_mcast), - ref_mcast_join_failed = (500 + ref_join_mcast), - - ref_port_info_cb = (100 + ref_port_up) + ref_failed_recv_wc = 100 + ref_get_recv, + ref_recv_inv_len = 200 + ref_get_recv, + ref_recv_loopback = 300 + ref_get_recv, + ref_recv_filter = 400 + ref_get_recv, + + ref_bcast_get_cb = 100 + ref_get_bcast, + + ref_join_bcast = 100 + ref_bcast, + ref_create_bcast = 200 + ref_bcast, + ref_bcast_inv_state = 300 + ref_bcast, + ref_bcast_req_failed = 400 + ref_bcast, + ref_bcast_error = 500 + ref_bcast, + ref_bcast_join_failed = 600 + ref_bcast, + ref_bcast_create_failed = 700 + ref_bcast, + + ref_mcast_inv_state = 100 + ref_join_mcast, + ref_mcast_req_failed = 200 + ref_join_mcast, + ref_mcast_no_endpt = 300 + ref_join_mcast, + ref_mcast_av_failed = 400 + ref_join_mcast, + ref_mcast_join_failed = 500 + ref_join_mcast, + + ref_port_info_cb = 100 + ref_port_up }; diff --git a/branches/ipoib_cm/kernel/ipoib_driver.c b/branches/ipoib_cm/kernel/ipoib_driver.c index 1861ed33..056d86ed 100644 --- a/branches/ipoib_cm/kernel/ipoib_driver.c +++ b/branches/ipoib_cm/kernel/ipoib_driver.c @@ -128,6 +128,9 @@ static const unsigned char VENDOR_ID[] = {0x00, 0x06, 0x6A, 0x00}; #define IB_INFINITE_SERVICE_LEASE 0xFFFFFFFF +//The mask is 8 bit and can't contain more than 6 non-zero bits +#define MAX_GUID_MAX 0xFC + /* Global driver debug level */ uint32_t g_ipoib_dbg_level = TRACE_LEVEL_ERROR; @@ -147,24 +150,28 @@ typedef struct _IPOIB_REG_ENTRY IPOIB_REG_ENTRY HCARegTable[] = { // reg value name If Required Offset in parentr struct Field size Default Min Max + {NDIS_STRING_CONST("GUIDMask"), 1, IPOIB_OFFSET(guid_mask), IPOIB_SIZE(guid_mask), 0, 0, MAX_GUID_MAX}, + /* GUIDMask should be the first element */ {NDIS_STRING_CONST("RqDepth"), 1, IPOIB_OFFSET(rq_depth), IPOIB_SIZE(rq_depth), 512, 128, 1024}, {NDIS_STRING_CONST("RqLowWatermark"), 0, IPOIB_OFFSET(rq_low_watermark), IPOIB_SIZE(rq_low_watermark), 4, 2, 8}, {NDIS_STRING_CONST("SqDepth"), 1, IPOIB_OFFSET(sq_depth), IPOIB_SIZE(sq_depth), 512, 128, 1024}, - {NDIS_STRING_CONST("SendChksum"), 1, IPOIB_OFFSET(send_chksum_offload), IPOIB_SIZE(send_chksum_offload),CSUM_ENABLED, CSUM_DISABLED, CSUM_BYPASS}, - {NDIS_STRING_CONST("RecvChksum"), 1, IPOIB_OFFSET(recv_chksum_offload), IPOIB_SIZE(recv_chksum_offload),CSUM_ENABLED, CSUM_DISABLED, CSUM_BYPASS}, + {NDIS_STRING_CONST("SendChksum"), 1, IPOIB_OFFSET(send_chksum_offload), IPOIB_SIZE(send_chksum_offload),CSUM_ENABLED,CSUM_DISABLED,CSUM_BYPASS}, + {NDIS_STRING_CONST("RecvChksum"), 1, IPOIB_OFFSET(recv_chksum_offload), IPOIB_SIZE(recv_chksum_offload),CSUM_ENABLED,CSUM_DISABLED,CSUM_BYPASS}, {NDIS_STRING_CONST("SaTimeout"), 1, IPOIB_OFFSET(sa_timeout), IPOIB_SIZE(sa_timeout), 1000, 250, UINT_MAX}, {NDIS_STRING_CONST("SaRetries"), 1, IPOIB_OFFSET(sa_retry_cnt), IPOIB_SIZE(sa_retry_cnt), 10, 1, UINT_MAX}, {NDIS_STRING_CONST("RecvRatio"), 1, IPOIB_OFFSET(recv_pool_ratio), IPOIB_SIZE(recv_pool_ratio), 1, 1, 10}, {NDIS_STRING_CONST("PayloadMtu"), 1, IPOIB_OFFSET(payload_mtu), IPOIB_SIZE(payload_mtu), 2044, 60, MAX_CM_PAYLOAD_MTU}, {NDIS_STRING_CONST("lso"), 0, IPOIB_OFFSET(lso), IPOIB_SIZE(lso), 0, 0, 1}, {NDIS_STRING_CONST("MCLeaveRescan"), 1, IPOIB_OFFSET(mc_leave_rescan), IPOIB_SIZE(mc_leave_rescan), 260, 1, 3600}, + {NDIS_STRING_CONST("BCJoinRetry"), 1, IPOIB_OFFSET(bc_join_retry), IPOIB_SIZE(bc_join_retry), 50, 0, 1000}, {NDIS_STRING_CONST("CmEnabled"), 0, IPOIB_OFFSET(cm_enabled), IPOIB_SIZE(cm_enabled), FALSE, FALSE, TRUE} -}; + +}; #define IPOIB_NUM_REG_PARAMS (sizeof (HCARegTable) / sizeof(IPOIB_REG_ENTRY)) -static void +void ipoib_create_log( NDIS_HANDLE h_adapter, UINT ind, @@ -484,13 +491,13 @@ ipoib_unload( NDIS_STATUS ipoib_get_adapter_params( IN NDIS_HANDLE* const wrapper_config_context, - IN OUT ipoib_adapter_t *p_adapter ) + IN OUT ipoib_adapter_t *p_adapter, + OUT PUCHAR *p_mac, + OUT UINT *p_len ) { NDIS_STATUS status; NDIS_HANDLE h_config; NDIS_CONFIGURATION_PARAMETER *p_param; - PUCHAR mac; - UINT len; UINT value; PIPOIB_REG_ENTRY pRegEntry; UINT i; @@ -614,26 +621,8 @@ ipoib_get_adapter_params( p_adapter->params.payload_mtu = min( MAX_UD_PAYLOAD_MTU, p_adapter->params.payload_mtu); p_adapter->params.xfer_block_size = (sizeof(eth_hdr_t) + p_adapter->params.payload_mtu); - NdisReadNetworkAddress( &status, &mac, &len, h_config ); + NdisReadNetworkAddress( &status, p_mac, p_len, h_config ); - ETH_COPY_NETWORK_ADDRESS( p_adapter->params.conf_mac.addr, p_adapter->mac.addr ); - /* If there is a NetworkAddress override in registry, use it */ - if( (status == NDIS_STATUS_SUCCESS) && (len == HW_ADDR_LEN) ) - { - if( ETH_IS_MULTICAST(mac) || ETH_IS_BROADCAST(mac) || - !ETH_IS_LOCALLY_ADMINISTERED(mac) ) - { - IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_INIT, - ("Overriding NetworkAddress is invalid - " - "%02x-%02x-%02x-%02x-%02x-%02x\n", - mac[0], mac[1], mac[2], - mac[3], mac[4], mac[5]) ); - } - else - { - ETH_COPY_NETWORK_ADDRESS( p_adapter->params.conf_mac.addr, mac ); - } - } NdisCloseConfiguration( h_config ); @@ -989,40 +978,20 @@ ipoib_query_info( IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID, ("Port %d received query for OID_GEN_MAXIMUM_FRAME_SIZE\n", port_num) ); if( p_adapter->params.cm_enabled ) + { info = p_adapter->params.cm_payload_mtu; + } else + { info = p_adapter->params.payload_mtu; + } break; case OID_GEN_LINK_SPEED: IPOIB_PRINT( TRACE_LEVEL_VERBOSE,IPOIB_DBG_OID, ("Port %d received query for OID_GEN_LINK_SPEED\n", port_num) ); cl_obj_lock( &p_adapter->obj ); - switch( p_adapter->state ) - { - case IB_PNP_PORT_ADD: - /* Mark the adapter as pending an OID */ - p_adapter->pending_query = TRUE; - - /* Save the request parameters. */ - p_adapter->query_oid = oid_info; - - IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID, - ("Port %d returning NDIS_STATUS_PENDING\n", port_num) ); - status = NDIS_STATUS_PENDING; - break; - - case IB_PNP_PORT_REMOVE: - IPOIB_PRINT( TRACE_LEVEL_INFORMATION,IPOIB_DBG_OID, - ("Port %d returning NDIS_STATUS_NOT_ACCEPTED\n", port_num) ); - status = NDIS_STATUS_NOT_ACCEPTED; - break; - - default: - CL_ASSERT( p_adapter->p_port ); - info = p_adapter->rate; - break; - } + info = p_adapter->port_rate; cl_obj_unlock( &p_adapter->obj ); break; @@ -1452,8 +1421,7 @@ __ipoib_get_tcp_task_offload( NDIS_TASK_OFFLOAD_HEADER *p_offload_hdr; NDIS_TASK_OFFLOAD *p_offload_task; NDIS_TASK_TCP_IP_CHECKSUM *p_offload_chksum; - csum_flag_t send_csum; - csum_flag_t recv_csum; + NDIS_TASK_TCP_LARGE_SEND *p_offload_lso; ULONG buf_len; @@ -1482,16 +1450,6 @@ __ipoib_get_tcp_task_offload( { return NDIS_STATUS_INVALID_DATA; } - if( p_adapter->params.cm_enabled ) - { - send_csum = CSUM_DISABLED; - recv_csum = CSUM_BYPASS; - } - else - { - send_csum = p_adapter->params.send_chksum_offload; - recv_csum = p_adapter->params.recv_chksum_offload; - } p_offload_hdr->OffsetFirstTask = sizeof(NDIS_TASK_OFFLOAD_HEADER); p_offload_task = (NDIS_TASK_OFFLOAD*)(p_offload_hdr + 1); @@ -1507,13 +1465,15 @@ __ipoib_get_tcp_task_offload( p_offload_chksum->V4Transmit.TcpOptionsSupported = p_offload_chksum->V4Transmit.TcpChecksum = p_offload_chksum->V4Transmit.UdpChecksum = - p_offload_chksum->V4Transmit.IpChecksum = !!send_csum; + p_offload_chksum->V4Transmit.IpChecksum = + !!(p_adapter->params.send_chksum_offload); p_offload_chksum->V4Receive.IpOptionsSupported = p_offload_chksum->V4Receive.TcpOptionsSupported = p_offload_chksum->V4Receive.TcpChecksum = p_offload_chksum->V4Receive.UdpChecksum = - p_offload_chksum->V4Receive.IpChecksum = !!recv_csum; + p_offload_chksum->V4Receive.IpChecksum = + !!(p_adapter->params.recv_chksum_offload); p_offload_chksum->V6Transmit.IpOptionsSupported = FALSE; p_offload_chksum->V6Transmit.TcpOptionsSupported = FALSE; @@ -2088,11 +2048,6 @@ ipoib_resume_oids( { switch( query_oid.oid ) { - case OID_GEN_LINK_SPEED: - ipoib_complete_query( p_adapter, &query_oid, - status, &p_adapter->rate, sizeof(p_adapter->rate) ); - break; - case OID_GEN_MEDIA_CONNECT_STATUS: info = NdisMediaStateConnected; ipoib_complete_query( p_adapter, &query_oid, @@ -2100,8 +2055,7 @@ ipoib_resume_oids( break; default: - CL_ASSERT( query_oid.oid == OID_GEN_LINK_SPEED || - query_oid.oid == OID_GEN_MEDIA_CONNECT_STATUS ); + CL_ASSERT( query_oid.oid == OID_GEN_MEDIA_CONNECT_STATUS ); break; } } diff --git a/branches/ipoib_cm/kernel/ipoib_driver.h b/branches/ipoib_cm/kernel/ipoib_driver.h index aa611f95..db2ea863 100644 --- a/branches/ipoib_cm/kernel/ipoib_driver.h +++ b/branches/ipoib_cm/kernel/ipoib_driver.h @@ -65,9 +65,8 @@ #define MAX_XFER_BLOCK_SIZE (sizeof(eth_hdr_t) + MAX_UD_PAYLOAD_MTU) #define DATA_OFFSET (sizeof(eth_hdr_t) - sizeof(ipoib_hdr_t)) -//#define IPOIB_CM_FLAG_RC (0x01) // RC flag -//#define IPOIB_CM_FLAG_SVCID (0x01) // IETF bit -#define IPOIB_CM_FLAG_RC (0x80) //OFED set RC flag this way +#define IPOIB_CM_FLAG_RC (0x80) +#define IPOIB_CM_FLAG_UC (0x40) #define IPOIB_CM_FLAG_SVCID (0x10) // OFED set IETF bit this way ( open OFED PR 1121 ) #define MAX_SEND_SGE (30) @@ -133,8 +132,13 @@ typedef struct _ipoib_bundle * List of adapters in the bundle. The adapter at the head is the * primary adapter of the bundle. *********/ +void +ipoib_create_log( + NDIS_HANDLE h_adapter, + UINT ind, + ULONG eventLogMsgId); - +#define GUID_MASK_LOG_INDEX 0 void ipoib_resume_oids( diff --git a/branches/ipoib_cm/kernel/ipoib_endpoint.c b/branches/ipoib_cm/kernel/ipoib_endpoint.c index f572f660..af0f3832 100644 --- a/branches/ipoib_cm/kernel/ipoib_endpoint.c +++ b/branches/ipoib_cm/kernel/ipoib_endpoint.c @@ -254,6 +254,7 @@ ipoib_endpt_set_mcast( return status; } p_endpt->h_mcast = p_mcast_rec->h_mcast; + p_endpt->dlid = p_mcast_rec->p_member_rec->mlid; IPOIB_EXIT( IPOIB_DBG_ENDPT ); return IB_SUCCESS; @@ -361,9 +362,8 @@ ipoib_endpt_queue( { ib_api_status_t status; ipoib_port_t *p_port; - ib_query_req_t query; - ib_user_query_t info; - ib_path_rec_t path; + ib_av_attr_t av_attr; + net32_t flow_lbl; IPOIB_ENTER( IPOIB_DBG_ENDPT ); @@ -373,108 +373,34 @@ ipoib_endpt_queue( return NDIS_STATUS_SUCCESS; } - if( p_endpt->h_query || - p_endpt->qpn == CL_HTON32(0x00FFFFFF) ) + if( p_endpt->qpn == CL_HTON32(0x00FFFFFF) ) { + /* + * Handle a race between the mcast callback and a receive/send. The QP + * is joined to the MC group before the MC callback is received, so it + * can receive packets, and NDIS can try to respond. We need to delay + * a response until the MC callback runs and sets the AV. + */ ipoib_endpt_deref( p_endpt ); IPOIB_EXIT( IPOIB_DBG_ENDPT ); return NDIS_STATUS_PENDING; } - /* This is the first packet for this endpoint. Query the SA. */ - p_port = __endpt_parent( p_endpt ); - - IPOIB_ENTER( IPOIB_DBG_ENDPT ); - - info.method = IB_MAD_METHOD_GETTABLE; - info.attr_id = IB_MAD_ATTR_PATH_RECORD; - info.attr_size = sizeof(ib_path_rec_t); - info.comp_mask = IB_PR_COMPMASK_DGID | IB_PR_COMPMASK_SGID | - IB_PR_COMPMASK_REVERSIBLE | IB_PR_COMPMASK_NUM_PATH; - info.p_attr = &path; - - cl_memclr( &path, sizeof(ib_path_rec_t) ); - path.dgid = p_endpt->dgid; - ib_gid_set_default( &path.sgid, p_port->p_adapter->guids.port_guid.guid ); - path.num_path = 0x1; - - cl_memclr( &query, sizeof(ib_query_req_t) ); - query.query_type = IB_QUERY_USER_DEFINED; - query.p_query_input = &info; - query.port_guid = p_port->p_adapter->guids.port_guid.guid; - query.timeout_ms = p_port->p_adapter->params.sa_timeout; - query.retry_cnt = p_port->p_adapter->params.sa_retry_cnt; - - query.query_context = p_endpt; - query.pfn_query_cb = __path_query_cb; - - status = p_port->p_adapter->p_ifc->query( - p_port->p_adapter->h_al, &query, &p_endpt->h_query ); - if( status != IB_SUCCESS ) - { - IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ENDPT, - ("ib_query for path returned %s\n", - p_port->p_adapter->p_ifc->get_err_str( status )) ); - ipoib_endpt_deref( p_endpt ); - /* Flag the adapter as hung. */ - p_port->p_adapter->hung = TRUE; - } - - IPOIB_EXIT( IPOIB_DBG_ENDPT ); - return NDIS_STATUS_PENDING; -} - - -static void -__path_query_cb( - IN ib_query_rec_t *p_query_rec ) -{ - ib_api_status_t status; - ipoib_endpt_t *p_endpt; - ipoib_port_t *p_port; - ib_av_attr_t av_attr; - ib_path_rec_t *p_path; - net32_t flow_lbl; - - IPOIB_ENTER( IPOIB_DBG_ENDPT ); - - p_endpt = (ipoib_endpt_t*)p_query_rec->query_context; + /* This is the first packet for this endpoint. Create the AV. */ p_port = __endpt_parent( p_endpt ); - cl_obj_lock( &p_endpt->obj ); - p_endpt->h_query = NULL; - if( p_endpt->obj.state == CL_DESTROYING ) - { - cl_obj_unlock( &p_endpt->obj ); - ipoib_endpt_deref( p_endpt ); - if( p_query_rec->p_result_mad ) - p_port->p_adapter->p_ifc->put_mad( p_query_rec->p_result_mad ); - IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, - ("Endpoint destroying, aborting.\n") ); - return; - } - cl_obj_unlock( &p_endpt->obj ); - - if( p_query_rec->status != IB_SUCCESS || !p_query_rec->result_cnt ) - { - p_port->p_adapter->hung = TRUE; - ipoib_endpt_deref( p_endpt ); - if( p_query_rec->p_result_mad ) - p_port->p_adapter->p_ifc->put_mad( p_query_rec->p_result_mad ); - IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, - ("Path query failed with %s\n", - p_port->p_adapter->p_ifc->get_err_str( p_query_rec->status )) ); - return; - } - - p_path = ib_get_query_path_rec( p_query_rec->p_result_mad, 0 ); - cl_memclr( &av_attr, sizeof(ib_av_attr_t) ); av_attr.port_num = p_port->port_num; - av_attr.sl = ib_path_rec_sl( p_path ); - av_attr.dlid = p_path->dlid; + ib_member_get_sl_flow_hop( + p_port->ib_mgr.bcast_rec.sl_flow_hop, + &av_attr.sl, + &flow_lbl, + &av_attr.grh.hop_limit + ); + + av_attr.dlid = p_endpt->dlid; /* * We always send the GRH so that we preferably lookup endpoints @@ -484,44 +410,17 @@ __path_query_cb( * for which there is no match, something that doesn't work when * using LIDs only. */ - flow_lbl = ib_path_rec_flow_lbl( p_path ); av_attr.grh_valid = TRUE; av_attr.grh.ver_class_flow = ib_grh_set_ver_class_flow( - 6, p_path->tclass, flow_lbl ); + 6, p_port->ib_mgr.bcast_rec.tclass, flow_lbl ); av_attr.grh.resv1 = 0; av_attr.grh.resv2 = 0; - av_attr.grh.hop_limit = ib_path_rec_hop_limit( p_path ); - av_attr.grh.src_gid = p_path->sgid; - av_attr.grh.dest_gid = p_path->dgid; - - cl_obj_lock( &p_port->obj ); - if( !p_endpt->dlid ) - { - cl_map_item_t *p_qitem; + ib_gid_set_default( &av_attr.grh.src_gid, p_port->p_adapter->guids.port_guid.guid ); + av_attr.grh.dest_gid = p_endpt->dgid; - /* This is a subnet local endpoint that does not have its LID set. */ - p_endpt->dlid = p_path->dlid; - /* - * Insert the item in the LID map so that locally routed unicast - * traffic will resolve it properly. - */ - p_qitem = cl_qmap_insert( &p_port->endpt_mgr.lid_endpts, - p_endpt->dlid, &p_endpt->lid_item ); - CL_ASSERT( p_qitem == &p_endpt->lid_item ); - } - cl_obj_unlock( &p_port->obj ); - av_attr.static_rate = ib_path_rec_rate( p_path ); + av_attr.static_rate = p_port->ib_mgr.bcast_rec.rate; av_attr.path_bits = 0; - if( p_port->p_adapter->params.cm_enabled ) - { - /* save path rec for conn req */ - p_endpt->conn.path_rec = *p_path; - } - - /* Done with the path record. Release the MAD. */ - p_port->p_adapter->p_ifc->put_mad( p_query_rec->p_result_mad ); - /* Create the AV. */ status = p_port->p_adapter->p_ifc->create_av( p_port->ib_mgr.h_pd, &av_attr, &p_endpt->h_av ); @@ -533,15 +432,11 @@ __path_query_cb( IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("ib_create_av failed with %s\n", p_port->p_adapter->p_ifc->get_err_str( status )) ); - return; + return NDIS_STATUS_FAILURE; } - /* Try to send all pending sends. */ - ipoib_port_resume( p_port ); - - /* Release the reference taken for the SA query. */ - ipoib_endpt_deref( p_endpt ); IPOIB_EXIT( IPOIB_DBG_ENDPT ); + return NDIS_STATUS_SUCCESS; } @@ -734,8 +629,13 @@ __cm_recv_desc_ctor( p_desc->alloc_buf_size = ROUNDUP( p_port->p_adapter->params.cm_xfer_block_size, BUF_ALIGN ); - p_desc->p_alloc_buf = (uint8_t *)ExAllocatePoolWithTag( - NonPagedPool, p_desc->alloc_buf_size, 'DOMC' ); + NdisMAllocateSharedMemory( + p_port->p_adapter->h_adapter, + p_desc->alloc_buf_size, + FALSE, + &p_desc->p_alloc_buf, + &p_desc->phys_alloc_buf ); + if( p_desc->p_alloc_buf == NULL ) { IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, @@ -791,7 +691,7 @@ __cm_recv_desc_dtor( if( p_pool_item == NULL || context == NULL ) return; - + p_port = (ipoib_port_t*)context; p_desc = PARENT_STRUCT( p_pool_item, ipoib_cm_desc_t, item ); @@ -799,7 +699,11 @@ __cm_recv_desc_dtor( p_port->p_adapter->p_ifc->dereg_mr( p_desc->h_mr ); if( p_desc->p_alloc_buf ) - ExFreePoolWithTag( p_desc->p_alloc_buf, 'DOMC' ); + NdisMFreeSharedMemory( p_port->p_adapter->h_adapter, + p_desc->alloc_buf_size, + FALSE, + p_desc->p_alloc_buf, + p_desc->phys_alloc_buf ); } static NDIS_PACKET* diff --git a/branches/ipoib_cm/kernel/ipoib_endpoint.h b/branches/ipoib_cm/kernel/ipoib_endpoint.h index 45f0c69f..0115b2ed 100644 --- a/branches/ipoib_cm/kernel/ipoib_endpoint.h +++ b/branches/ipoib_cm/kernel/ipoib_endpoint.h @@ -83,7 +83,6 @@ typedef struct _cm_private_data ib_net32_t recv_mtu; } cm_private_data_t; -#pragma warning ( disable : 4324 ) typedef struct _endpt_conn { ib_net64_t service_id; @@ -94,11 +93,9 @@ typedef struct _endpt_conn ib_cq_handle_t h_send_cq; ib_cq_handle_t h_recv_cq; ib_listen_handle_t h_cm_listen; - ib_path_rec_t path_rec; cm_state_t state; } endpt_conn_t; -#pragma warning( default : 4324 ) typedef struct _ipoib_endpt { diff --git a/branches/ipoib_cm/kernel/ipoib_ibat.c b/branches/ipoib_cm/kernel/ipoib_ibat.c index 55ef84c1..01fc02a8 100644 --- a/branches/ipoib_cm/kernel/ipoib_ibat.c +++ b/branches/ipoib_cm/kernel/ipoib_ibat.c @@ -328,6 +328,80 @@ __ibat_mac_to_gid( } +static NTSTATUS +__ibat_mac_to_path( + IN IRP *pIrp, + IN IO_STACK_LOCATION *pIoStack ) +{ + NTSTATUS status = STATUS_INVALID_PARAMETER; + IOCTL_IBAT_MAC_TO_PATH_IN *pIn; + IOCTL_IBAT_MAC_TO_PATH_OUT *pOut; + KLOCK_QUEUE_HANDLE hdl; + cl_list_item_t *pItem; + ipoib_adapter_t *pAdapter; + + IPOIB_ENTER(IPOIB_DBG_IOCTL); + + if( pIoStack->Parameters.DeviceIoControl.InputBufferLength != + sizeof(IOCTL_IBAT_MAC_TO_PATH_IN) ) + { + IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, + ("Invalid input buffer size.\n") ); + return STATUS_INVALID_PARAMETER; + } + + if( pIoStack->Parameters.DeviceIoControl.OutputBufferLength != + sizeof(IOCTL_IBAT_MAC_TO_PATH_OUT) ) + { + IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, + ("Invalid output buffer size.\n") ); + return STATUS_INVALID_PARAMETER; + } + + pIn = pIrp->AssociatedIrp.SystemBuffer; + pOut = pIrp->AssociatedIrp.SystemBuffer; + + if( pIn->Version != IBAT_IOCTL_VERSION ) + { + IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, + ("Invalid version.\n") ); + return STATUS_INVALID_PARAMETER; + } + + KeAcquireInStackQueuedSpinLock( &g_ipoib.lock, &hdl ); + + for( pItem = cl_qlist_head( &g_ipoib.adapter_list ); + pItem != cl_qlist_end( &g_ipoib.adapter_list ); + pItem = cl_qlist_next( pItem ) ) + { + pAdapter = CONTAINING_RECORD( pItem, ipoib_adapter_t, entry ); + if( pIn->PortGuid != pAdapter->guids.port_guid.guid ) + continue; + + /* Found the port - lookup the MAC. */ + cl_obj_lock( &pAdapter->obj ); + if( pAdapter->p_port ) + { + status = ipoib_mac_to_path( + pAdapter->p_port, *(mac_addr_t*)pIn->DestMac, &pOut->Path ); + + if( NT_SUCCESS( status ) ) + { + pIrp->IoStatus.Information = + sizeof(IOCTL_IBAT_MAC_TO_PATH_OUT); + } + } + cl_obj_unlock( &pAdapter->obj ); + break; + } + + KeReleaseInStackQueuedSpinLock( &hdl ); + + IPOIB_EXIT( IPOIB_DBG_IOCTL ); + return status; +} + + static NTSTATUS __ibat_ip_to_port( IN IRP *pIrp, @@ -573,6 +647,12 @@ __ipoib_dispatch( status = __ibat_ip_to_port( pIrp, pIoStack ); break; + case IOCTL_IBAT_MAC_TO_PATH: + IPOIB_PRINT(TRACE_LEVEL_INFORMATION, IPOIB_DBG_IOCTL, + ("IOCTL_IBAT_MAC_TO_PATH received\n" )); + status = __ibat_mac_to_path( pIrp, pIoStack ); + break; + default: IPOIB_PRINT( TRACE_LEVEL_WARNING, IPOIB_DBG_IOCTL, ("unknow IOCTL code = 0x%x\n", diff --git a/branches/ipoib_cm/kernel/ipoib_port.c b/branches/ipoib_cm/kernel/ipoib_port.c index f639f4c6..0483f0c5 100644 --- a/branches/ipoib_cm/kernel/ipoib_port.c +++ b/branches/ipoib_cm/kernel/ipoib_port.c @@ -451,9 +451,6 @@ static ib_api_status_t __port_create_bcast( IN ipoib_port_t* const p_port ); -static void -__port_info_cb( - IN ib_query_rec_t *p_query_rec ); static void @@ -908,26 +905,28 @@ __ib_mgr_init( p_port->p_adapter->p_ifc->get_err_str( status )) ); return status; } - if (!p_port->p_ca_attrs->ipoib_csum ) - { - //checksum is not supported by device - //user must specify BYPASS to explicitly cancel checksum calculation - IPOIB_PRINT( TRACE_LEVEL_WARNING, IPOIB_DBG_INIT, - ("IPOIB CHKSUM IS NOT SUPPORTED BY HCA\n" ) ); - if (p_port->p_adapter->params.send_chksum_offload == CSUM_ENABLED) - p_port->p_adapter->params.send_chksum_offload = CSUM_DISABLED; - if (p_port->p_adapter->params.recv_chksum_offload == CSUM_ENABLED) - p_port->p_adapter->params.recv_chksum_offload = CSUM_DISABLED; - } - /* Allocate the UD QP. */ + + /* Allocate the QP. */ cl_memclr( &qp_create, sizeof(qp_create) ); qp_create.qp_type = IB_QPT_UNRELIABLE_DGRM; qp_create.rq_depth = p_port->p_adapter->params.rq_depth; qp_create.rq_sge = 2; /* To support buffers spanning pages. */ qp_create.h_rq_cq = p_port->ib_mgr.h_recv_cq; qp_create.sq_depth = p_port->p_adapter->params.sq_depth; - //TODO: Figure out the right number of SGE entries for sends. - qp_create.sq_sge = MAX_SEND_SGE; + +#define UD_QP_USED_SGE 3 + qp_create.sq_sge = MAX_SEND_SGE < p_port->p_ca_attrs->max_sges ? + MAX_SEND_SGE : ( p_port->p_ca_attrs->max_sges - UD_QP_USED_SGE ); + if ( !p_port->p_ca_attrs->ipoib_csum ) + { + /* checksum is not supported by device + user must specify BYPASS to explicitly cancel checksum calculation */ + if (p_port->p_adapter->params.send_chksum_offload == CSUM_ENABLED) + p_port->p_adapter->params.send_chksum_offload = CSUM_DISABLED; + if (p_port->p_adapter->params.recv_chksum_offload == CSUM_ENABLED) + p_port->p_adapter->params.recv_chksum_offload = CSUM_DISABLED; + } + qp_create.h_sq_cq = p_port->ib_mgr.h_send_cq; qp_create.sq_signaled = TRUE; status = p_port->p_adapter->p_ifc->create_qp( @@ -1011,6 +1010,7 @@ __ib_mgr_init( p_port->p_adapter->params.send_chksum_offload = CSUM_DISABLED; p_port->p_adapter->params.recv_chksum_offload = CSUM_BYPASS; } + } IPOIB_EXIT( IPOIB_DBG_INIT ); return IB_SUCCESS; @@ -1093,7 +1093,7 @@ ipoib_port_srq_init( } /* __port_query_ca_attrs() - * returns pointer to allocated memory. + * returns a pointer to allocated memory. * must be released by caller. */ static ib_api_status_t @@ -2006,9 +2006,9 @@ __recv_get_endpts( { status = ipoib_mac_from_guid( #if IPOIB_INLINE_RECV - p_desc->buf.ib.grh.src_gid.unicast.interface_id, &mac ); + p_desc->buf.ib.grh.src_gid.unicast.interface_id, p_port->p_adapter->params.guid_mask, &mac ); #else /* IPOIB_INLINE_RECV */ - p_desc->p_buf->ib.grh.src_gid.unicast.interface_id, &mac ); + p_desc->p_buf->ib.grh.src_gid.unicast.interface_id, p_port->p_adapter->params.guid_mask, &mac ); #endif /* IPOIB_INLINE_RECV */ if( status != IB_SUCCESS ) { @@ -2024,7 +2024,7 @@ __recv_get_endpts( #else /* IPOIB_INLINE_RECV */ *pp_src = ipoib_endpt_create( &p_desc->p_buf->ib.grh.src_gid, #endif /* IPOIB_INLINE_RECV */ - 0, p_wc->recv.ud.remote_qp ); + p_wc->recv.ud.remote_lid, p_wc->recv.ud.remote_qp ); if( !*pp_src ) { IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, @@ -2166,7 +2166,7 @@ __recv_mgr_filter( } /* Successful completion. Get the receive information. */ - p_desc->ndis_csum.Value = ( ( p_wc->recv.ud.recv_opt & IB_RECV_OPT_CSUM_MASK) >> 8 ); + p_desc->ndis_csum.Value = ( ( p_wc->recv.ud.recv_opt & IB_RECV_OPT_CSUM_MASK ) >> 8 ); cl_perf_start( GetRecvEndpts ); __recv_get_endpts( p_port, p_desc, p_wc, &p_src, &p_dst ); cl_perf_stop( &p_port->p_adapter->perf, GetRecvEndpts ); @@ -2502,7 +2502,14 @@ __recv_dhcp( cl_memcpy( &gid, &p_cid[7], sizeof(ib_gid_t) ); p_cid[1] = HW_ADDR_LEN +1;// CID length p_cid[2] = DHCP_HW_TYPE_ETH;// CID type - status = ipoib_mac_from_guid( gid.unicast.interface_id, (mac_addr_t*)&p_cid[3] ); + status = ipoib_mac_from_guid( gid.unicast.interface_id, p_port->p_adapter->params.guid_mask, (mac_addr_t*)&p_cid[3] ); + if (status == IB_INVALID_GUID_MASK) + { + IPOIB_PRINT( TRACE_LEVEL_WARNING, IPOIB_DBG_ERROR, + ("Invalid GUID mask received, rejecting it") ); + ipoib_create_log(p_port->p_adapter->h_adapter, GUID_MASK_LOG_INDEX, EVENT_IPOIB_WRONG_PARAMETER_WRN); + status = IB_SUCCESS; + } p_cid[HW_ADDR_LEN + 3] = DHCP_OPT_END; //terminate tag } IPOIB_EXIT( IPOIB_DBG_RECV ); @@ -2590,6 +2597,11 @@ __recv_arp( __endpt_mgr_remove( p_port, *pp_src ); *pp_src = NULL; } + else if ( ! ((*pp_src)->dlid)) { + /* Out of date! Destroy the endpoint and replace it. */ + __endpt_mgr_remove( p_port, *pp_src ); + *pp_src = NULL; + } else if( ipoib_is_voltaire_router_gid( &(*pp_src)->dgid ) ) { if( (*pp_src)->qpn != ipoib_addr_get_qpn( &p_ib_arp->src_hw ) && @@ -2613,8 +2625,14 @@ __recv_arp( { /* Copy the src GID to allow aligned access */ cl_memcpy( &gid, &p_ib_arp->src_hw.gid, sizeof(ib_gid_t) ); - status = ipoib_mac_from_guid( gid.unicast.interface_id, &mac ); - if( status != IB_SUCCESS ) + status = ipoib_mac_from_guid( gid.unicast.interface_id, p_port->p_adapter->params.guid_mask, &mac ); + if (status == IB_INVALID_GUID_MASK) + { + IPOIB_PRINT( TRACE_LEVEL_WARNING, IPOIB_DBG_ERROR, + ("Invalid GUID mask received, rejecting it") ); + ipoib_create_log(p_port->p_adapter->h_adapter, GUID_MASK_LOG_INDEX, EVENT_IPOIB_WRONG_PARAMETER_WRN); + } + else if( status != IB_SUCCESS ) { IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, ("ipoib_mac_from_guid returned %s\n", @@ -2622,13 +2640,10 @@ __recv_arp( return status; } /* - * Create the endpoint. Note that the LID is left blank and will be - * resolved by a path query as needed. This is done because the - * remote LID/GID from the work completion may not be the original - * initiator. + * Create the endpoint. */ *pp_src = ipoib_endpt_create( &p_ib_arp->src_hw.gid, - 0, ipoib_addr_get_qpn( &p_ib_arp->src_hw ) ); + p_wc->recv.ud.remote_lid, ipoib_addr_get_qpn( &p_ib_arp->src_hw ) ); if( !*pp_src ) { @@ -2764,6 +2779,7 @@ __recv_mgr_prepare_pkt( uint32_t pkt_filter; ip_stat_sel_t type; NDIS_TCP_IP_CHECKSUM_PACKET_INFO chksum; + PERF_DECLARE( GetNdisPkt ); IPOIB_ENTER( IPOIB_DBG_RECV ); @@ -3309,7 +3325,7 @@ __send_gen( */ i = 0; if( lso_data_index ) - { //we have an LSO packet + { /* we have an LSO packet */ i = lso_data_index; j = 0; } @@ -3334,17 +3350,6 @@ __send_gen( /* Now fill in the rest of the local data segments. */ while( i < p_sgl->NumberOfElements ) { - /* try to coalesce adjacent data segments */ - if( j > 1 ) - { - if( ( p_desc->local_ds[j-1].vaddr + p_desc->local_ds[j-1].length ) == - (uint64_t)p_sgl->Elements[i].Address.QuadPart ) - { - p_desc->local_ds[j-1].length += p_sgl->Elements[i].Length; - i++; - continue; - } - } p_desc->local_ds[j].vaddr = p_sgl->Elements[i].Address.QuadPart; p_desc->local_ds[j].length = p_sgl->Elements[i].Length; p_desc->local_ds[j].lkey = p_port->ib_mgr.lkey; @@ -3828,11 +3833,8 @@ __send_mgr_filter_dhcp( p_desc->p_buf->ip.prot.udp.hdr.length = cl_ntoh16( sizeof(udp_hdr_t) + sizeof(dhcp_pkt_t) ); /* update crc in ip header */ - if( !p_port->p_adapter->params.send_chksum_offload ) - { - p_desc->p_buf->ip.hdr.chksum = 0; - p_desc->p_buf->ip.hdr.chksum = ipchksum((unsigned short*) &p_desc->p_buf->ip.hdr, sizeof(ip_hdr_t)); - } + p_desc->p_buf->ip.hdr.chksum = 0; + p_desc->p_buf->ip.hdr.chksum = ipchksum((unsigned short*) &p_desc->p_buf->ip.hdr, sizeof(ip_hdr_t)); break; /* Server messages. */ @@ -4004,8 +4006,8 @@ __send_mgr_filter_arp( if( p_port->p_adapter->params.cm_enabled ) { IPOIB_PRINT_EXIT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT, - (" ARP %s SEND to ENDPT[%p] State: %d flag: %#x, QPN: %#x MAC %02x:%02x:%02x:%02x:%02x:%02x\n", - (( p_arp->op == ARP_OP_REQ )? "REQUEST" : "REPLY" ), + (" %s SEND to ENDPT[%p] State: %d flag: %#x, QPN: %#x MAC %02x:%02x:%02x:%02x:%02x:%02x\n", + (( p_arp->op == ARP_OP_REQ )? "ARP REQUEST" : "ARP REPLY" ), p_desc->p_endpt, endpt_cm_get_state( p_desc->p_endpt ), p_desc->p_endpt->cm_flag, @@ -4033,14 +4035,14 @@ __send_mgr_get_eth_hdr( IN NDIS_PACKET* const p_packet, OUT NDIS_BUFFER** const pp_buf, OUT eth_hdr_t** const pp_eth_hdr, - OUT UINT* p_buf_len ) + OUT UINT* p_buf_len, + OUT UINT* p_tot_len ) { - UINT tot_len; IPOIB_ENTER( IPOIB_DBG_SEND ); NdisGetFirstBufferFromPacketSafe( - p_packet, pp_buf, pp_eth_hdr, p_buf_len, &tot_len, NormalPagePriority ); + p_packet, pp_buf, pp_eth_hdr, p_buf_len, p_tot_len, NormalPagePriority ); if( !*pp_eth_hdr ) { @@ -4163,6 +4165,8 @@ __build_send_desc( PktExt = NDIS_PACKET_EXTENSION_FROM_PACKET(p_desc->p_pkt); pChecksumPktInfo = (PNDIS_TCP_IP_CHECKSUM_PACKET_INFO)&PktExt->NdisPacketInfo[TcpIpChecksumPacketInfo]; mss = PtrToUlong(PktExt->NdisPacketInfo[TcpLargeSendPacketInfo]); + //TODO: optimization: we already got total length from NdisGetFirstBufferFromPacketSafe before + NdisQueryPacket(p_desc->p_pkt, (PUINT)&PhysBufCount, NULL, &FirstBuffer,(PUINT)&PacketLength); /* Format the send descriptor. */ hdr_idx = cl_atomic_inc( &p_port->hdr_idx ); @@ -4174,9 +4178,6 @@ __build_send_desc( if( p_port->p_adapter->params.lso && mss ) { memset(&TheLsoData, 0, sizeof TheLsoData ); - //TODO: optimization: we already got total length from NdisGetFirstBufferFromPacketSafe before - NdisQueryPacket(p_desc->p_pkt, (PUINT)&PhysBufCount, NULL, &FirstBuffer,(PUINT)&PacketLength); - status = GetLsoHeaderSize( p_port, FirstBuffer, @@ -4207,9 +4208,8 @@ __build_send_desc( p_desc->wr.send_opt |= (IB_SEND_OPT_TX_IP_CSUM | IB_SEND_OPT_TX_TCP_UDP_CSUM) | IB_SEND_OPT_SIGNALED; __send_gen(p_port, p_desc, IndexOfData); p_desc->wr.wr_type = WR_LSO; - } - else - { + } else { + /* Setup the first local data segment (used for the IPoIB header). */ p_desc->local_ds[0].vaddr = cl_get_physaddr( &p_port->hdr[hdr_idx] ); p_desc->local_ds[0].length = sizeof(ipoib_hdr_t); @@ -4228,6 +4228,8 @@ __build_send_desc( p_desc->wr.send_opt = IB_SEND_OPT_SIGNALED; } + + /* Setup the work request. */ p_desc->wr.p_next = NULL; p_desc->wr.wr_id = (uintn_t)p_desc->p_pkt; @@ -4239,7 +4241,7 @@ __build_send_desc( { p_desc->send_qp = p_desc->p_endpt->conn.h_work_qp; } - else // UD QP + else /* UD QP */ { if( p_port->p_adapter->params.send_chksum_offload && ( pChecksumPktInfo->Transmit.NdisPacketChecksumV4 || @@ -4313,6 +4315,7 @@ ipoib_port_send( eth_hdr_t *p_eth_hdr; NDIS_BUFFER *p_buf; UINT buf_len; + UINT tot_len; PERF_DECLARE( GetEthHdr ); PERF_DECLARE( BuildSendDesc ); @@ -4355,7 +4358,7 @@ ipoib_port_send( /* Get the ethernet header so we can find the endpoint. */ cl_perf_start( GetEthHdr ); status = __send_mgr_get_eth_hdr( - p_packet_array[i], &p_buf, &p_eth_hdr, &buf_len ); + p_packet_array[i], &p_buf, &p_eth_hdr, &buf_len, &tot_len ); cl_perf_stop( &p_port->p_adapter->perf, GetEthHdr ); if( status != NDIS_STATUS_SUCCESS ) { @@ -4374,22 +4377,39 @@ ipoib_port_send( ip_hdr_t *p_ip_hdr; NDIS_BUFFER *p_ip_hdr_buf; UINT ip_hdr_buf_len; - + + /* drop multicast packets larger than UD MTU */ + if( p_port->p_adapter->params.cm_enabled && + tot_len > p_port->p_adapter->params.payload_mtu ) + { + cl_perf_start( ProcessFailedSends ); + __process_failed_send( p_port, &desc, NDIS_STATUS_SUCCESS ); + cl_perf_stop( &p_port->p_adapter->perf, ProcessFailedSends ); + continue; + } // Extract the ip hdr - NdisGetNextBuffer( p_buf, &p_ip_hdr_buf ); - if( !p_ip_hdr_buf ) + if(buf_len >= sizeof(ip_hdr_t)+ sizeof(eth_hdr_t)) { - IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, - ("Failed to get IP header buffer.\n") ); - goto h_end; + p_ip_hdr = (ip_hdr_t*)(p_eth_hdr + 1); + ip_hdr_buf_len = sizeof(ip_hdr_t); } - - NdisQueryBufferSafe( p_ip_hdr_buf, &p_ip_hdr, &ip_hdr_buf_len, NormalPagePriority ); - if( !p_ip_hdr ) + else { - IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, - ("Failed to query IP header buffer.\n") ); - goto h_end; + NdisGetNextBuffer( p_buf, &p_ip_hdr_buf ); + if( !p_ip_hdr_buf ) + { + IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, + ("Failed to get IP header buffer.\n") ); + goto h_end; + } + + NdisQueryBufferSafe( p_ip_hdr_buf, &p_ip_hdr, &ip_hdr_buf_len, NormalPagePriority ); + if( !p_ip_hdr ) + { + IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, + ("Failed to query IP header buffer.\n") ); + goto h_end; + } } if( ip_hdr_buf_len < sizeof(ip_hdr_t) ) @@ -4483,6 +4503,7 @@ ipoib_port_resume( eth_hdr_t *p_eth_hdr; NDIS_BUFFER *p_buf; UINT buf_len; + UINT tot_len; PERF_DECLARE( GetEndpt ); PERF_DECLARE( BuildSendDesc ); @@ -4524,7 +4545,7 @@ ipoib_port_resume( /* Get the ethernet header so we can find the endpoint. */ status = __send_mgr_get_eth_hdr( - desc.p_pkt, &p_buf, &p_eth_hdr, &buf_len ); + desc.p_pkt, &p_buf, &p_eth_hdr, &buf_len, &tot_len ); if( status != NDIS_STATUS_SUCCESS ) { cl_perf_start( ProcessFailedSends ); @@ -4558,6 +4579,7 @@ ipoib_port_resume( break; } } + /* * Complete the send as if we sent it - WHQL tests don't like the * sends to fail. @@ -4953,6 +4975,21 @@ __endpt_mgr_reset_all( while( p_port->endpt_rdr ) ; +#if 0 + __endpt_mgr_remove_all(p_port); +#else + + NdisMIndicateStatus( p_port->p_adapter->h_adapter, + NDIS_STATUS_MEDIA_DISCONNECT, NULL, 0 ); + NdisMIndicateStatusComplete( p_port->p_adapter->h_adapter ); + + NdisMIndicateStatus( p_port->p_adapter->h_adapter, + NDIS_STATUS_MEDIA_CONNECT, NULL, 0 ); + NdisMIndicateStatusComplete( p_port->p_adapter->h_adapter ); + + // IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT, + // ("Link DOWN!\n") ); + if( p_port->p_local_endpt ) { ipoib_port_cancel_listen( p_port, p_port->p_local_endpt ); @@ -5019,11 +5056,13 @@ __endpt_mgr_reset_all( { cl_qmap_remove_item( &p_port->endpt_mgr.lid_endpts, &p_endpt->lid_item ); + IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_ENDPT, + ("<__endptr_mgr_reset_all: setting p_endpt->dlid to 0\n")); p_endpt->dlid = 0; } } - +#endif cl_obj_unlock( &p_port->obj ); while( cl_qlist_count( &conn_list ) ) @@ -5142,6 +5181,86 @@ ipoib_mac_to_gid( } +NTSTATUS +ipoib_mac_to_path( + IN ipoib_port_t* const p_port, + IN const mac_addr_t mac, + OUT ib_path_rec_t* p_path ) +{ + ipoib_endpt_t* p_endpt; + cl_map_item_t *p_item; + uint64_t key = 0; + uint8_t sl; + net32_t flow_lbl; + uint8_t hop_limit; + + IPOIB_ENTER( IPOIB_DBG_ENDPT ); + + cl_memcpy( &key, &mac, sizeof(mac_addr_t) ); + + cl_obj_lock( &p_port->obj ); + + if( p_port->p_local_endpt == NULL ) + { + cl_obj_unlock( &p_port->obj ); + IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, + ("No local endpoint.\n") ); + return STATUS_INVALID_PARAMETER; + } + + if( mac.addr[0] == 0 && mac.addr[1] == 0 && mac.addr[2] == 0 && + mac.addr[3] == 0 && mac.addr[4] == 0 && mac.addr[5] == 0 ) + { + p_endpt = p_port->p_local_endpt; + } + else + { + p_item = cl_qmap_get( &p_port->endpt_mgr.mac_endpts, key ); + if( p_item == cl_qmap_end( &p_port->endpt_mgr.mac_endpts ) ) + { + cl_obj_unlock( &p_port->obj ); + IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, + ("Failed endpoint lookup.\n") ); + return STATUS_INVALID_PARAMETER; + } + + p_endpt = PARENT_STRUCT( p_item, ipoib_endpt_t, mac_item ); + } + + p_path->resv0 = 0; + p_path->dgid = p_endpt->dgid; + p_path->sgid = p_port->p_local_endpt->dgid; + p_path->dlid = p_endpt->dlid; + p_path->slid = p_port->p_local_endpt->dlid; + + ib_member_get_sl_flow_hop( + p_port->ib_mgr.bcast_rec.sl_flow_hop, + &sl, + &flow_lbl, + &hop_limit + ); + ib_path_rec_set_hop_flow_raw( p_path, hop_limit, flow_lbl, FALSE ); + + p_path->tclass = p_port->ib_mgr.bcast_rec.tclass; + p_path->num_path = 1; + p_path->pkey = p_port->ib_mgr.bcast_rec.pkey; + p_path->mtu = p_port->ib_mgr.bcast_rec.mtu; + p_path->rate = p_port->ib_mgr.bcast_rec.rate; + if( p_path->slid == p_path->dlid ) + p_path->pkt_life = 0; + else + p_path->pkt_life = p_port->ib_mgr.bcast_rec.pkt_life; + p_path->preference = 0; + p_path->resv1 = 0; + p_path->resv2 = 0; + + cl_obj_unlock( &p_port->obj ); + + IPOIB_EXIT( IPOIB_DBG_ENDPT ); + return STATUS_SUCCESS; +} + + static inline NDIS_STATUS __endpt_mgr_ref( IN ipoib_port_t* const p_port, @@ -5425,7 +5544,7 @@ ipoib_port_remove_endpt( uint64_t key; IPOIB_ENTER( IPOIB_DBG_ENDPT ); - + key = 0; cl_memcpy( &key, &mac, sizeof(mac_addr_t) ); @@ -5472,7 +5591,7 @@ ipoib_port_remove_endpt( #if DBG cl_atomic_dec( &p_port->ref[ref_endpt_track] ); - IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_OBJ, + IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_ENDPT, ("ref type %d ref_cnt %d\n", ref_endpt_track, p_port->obj.ref_cnt) ); #endif @@ -5549,10 +5668,10 @@ ipoib_port_up( IN ipoib_port_t* const p_port, IN const ib_pnp_port_rec_t* const p_pnp_rec ) { - ib_api_status_t status; - ib_query_req_t query; - ib_user_query_t info; - ib_portinfo_record_t port_rec; + ib_port_info_t *p_port_info; + ib_mad_t *mad_in = NULL; + ib_mad_t *mad_out = NULL; + ib_api_status_t status = IB_INSUFFICIENT_MEMORY; IPOIB_ENTER( IPOIB_DBG_INIT ); @@ -5565,41 +5684,72 @@ ipoib_port_up( KeResetEvent( &p_port->sa_event ); cl_obj_unlock( &p_port->obj ); - info.method = IB_MAD_METHOD_GET; - info.attr_id = IB_MAD_ATTR_PORTINFO_RECORD; - info.attr_size = sizeof(ib_portinfo_record_t); - info.comp_mask = IB_PIR_COMPMASK_BASELID; - info.p_attr = &port_rec; - - /* Query requires only the base LID. */ - cl_memclr( &port_rec, sizeof(ib_portinfo_record_t) ); - port_rec.port_info.base_lid = p_pnp_rec->p_port_attr->lid; - - cl_memclr( &query, sizeof(ib_query_req_t) ); - query.query_type = IB_QUERY_USER_DEFINED; - query.p_query_input = &info; - query.port_guid = p_port->p_adapter->guids.port_guid.guid; - query.timeout_ms = p_port->p_adapter->params.sa_timeout; - query.retry_cnt = p_port->p_adapter->params.sa_retry_cnt; - query.query_context = p_port; - query.pfn_query_cb = __port_info_cb; + mad_out = (ib_mad_t*)cl_zalloc(256); + if(! mad_out) + { + IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, + ("failed to allocate mad mad_out\n")); + goto up_done; + } + mad_in = (ib_mad_t*)cl_zalloc(256); + if(! mad_in) + { + IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, + ("failed to allocate mad mad_in\n")); + goto up_done; + } - /* reference the object for the multicast query. */ - ipoib_port_ref( p_port, ref_port_up ); + mad_in->attr_id = IB_MAD_ATTR_PORT_INFO; + mad_in->method = IB_MAD_METHOD_GET; + mad_in->base_ver = 1; + mad_in->class_ver =1; + mad_in->mgmt_class = IB_MCLASS_SUBN_LID; + + status = p_port->p_adapter->p_ifc->local_mad( + p_port->ib_mgr.h_ca ,p_port->port_num ,mad_in ,mad_out); - status = p_port->p_adapter->p_ifc->query( - p_port->p_adapter->h_al, &query, &p_port->ib_mgr.h_query ); if( status != IB_SUCCESS ) { - KeSetEvent( &p_port->sa_event, EVENT_INCREMENT, FALSE ); ipoib_set_inactive( p_port->p_adapter ); - ipoib_port_deref( p_port, ref_port_up ); IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, - ("ib_query returned %s\n", + ("ib_local_mad returned %s\n", p_port->p_adapter->p_ifc->get_err_str( status )) ); - return; + goto up_done; } + p_port_info = (ib_port_info_t*)(((ib_smp_t*)mad_out)->data); + p_port->base_lid = p_pnp_rec->p_port_attr->lid; + IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT, + ("Received port info: link width = %d.\n", + p_port_info->link_width_active) ); + p_port->ib_mgr.rate = + ib_port_info_compute_rate( p_port_info ); + + ipoib_set_rate( p_port->p_adapter, + p_port_info->link_width_active, + ib_port_info_get_link_speed_active( p_port_info ) ); + + status = __port_get_bcast( p_port ); + if (status != IB_SUCCESS) + IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, + (" __port_get_bcast returned %s\n",p_port->p_adapter->p_ifc->get_err_str( status ))); + +up_done: + if( status != IB_SUCCESS ) + { + if( status != IB_CANCELED ) + { + ipoib_set_inactive( p_port->p_adapter ); + __endpt_mgr_reset_all( p_port ); + } + KeSetEvent( &p_port->sa_event, EVENT_INCREMENT, FALSE ); + } + + if(mad_out) + cl_free(mad_out); + if(mad_in) + cl_free(mad_in); + IPOIB_EXIT( IPOIB_DBG_INIT ); } @@ -5630,8 +5780,10 @@ __endpt_mgr_add_local( cl_memclr( &av_attr, sizeof(ib_av_attr_t) ); av_attr.port_num = p_port->port_num; av_attr.sl = 0; + IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ENDPT, + ("<__endpt_mgr_add_local>: av_attr.dlid = p_port_info->base_lid = %d\n",p_port_info->base_lid)); av_attr.dlid = p_port_info->base_lid; - av_attr.static_rate = ib_port_info_compute_rate( p_port_info ); + av_attr.static_rate = p_port->ib_mgr.rate; av_attr.path_bits = 0; status = p_port->p_adapter->p_ifc->create_av( p_port->ib_mgr.h_pd, &av_attr, &p_endpt->h_av ); @@ -5655,6 +5807,7 @@ __endpt_mgr_add_local( p_port->p_adapter->p_ifc->get_err_str( status )) ); return status; } + p_port->p_local_endpt = p_endpt; IPOIB_EXIT( IPOIB_DBG_INIT ); @@ -5662,109 +5815,6 @@ __endpt_mgr_add_local( } -static void -__port_info_cb( - IN ib_query_rec_t *p_query_rec ) -{ - ib_api_status_t status; - ipoib_port_t *p_port; - ib_portinfo_record_t *p_port_rec; - - IPOIB_ENTER( IPOIB_DBG_INIT ); - - p_port = (ipoib_port_t*)p_query_rec->query_context; - - cl_obj_lock( &p_port->obj ); - p_port->ib_mgr.h_query = NULL; - - if( p_port->state != IB_QPS_INIT ) - { - status = IB_CANCELED; - goto done; - } - - status = p_query_rec->status; - - switch( status ) - { - case IB_SUCCESS: - /* Note that the we report the rate from the port info. */ - p_port_rec = (ib_portinfo_record_t*) - ib_get_query_result( p_query_rec->p_result_mad, 0 ); - - status = __endpt_mgr_add_local( p_port, &p_port_rec->port_info ); - if( status == IB_SUCCESS ) - { - IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT, - ("Received port info: link width = %d.\n", - p_port_rec->port_info.link_width_active) ); - - p_port->ib_mgr.rate = - ib_port_info_compute_rate( &p_port_rec->port_info ); - - ipoib_set_rate( p_port->p_adapter, - p_port_rec->port_info.link_width_active, - ib_port_info_get_link_speed_active( &p_port_rec->port_info ) ); - - status = __port_get_bcast( p_port ); - } - else - { - IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, - ("__endpt_mgr_add_local returned %s\n", - p_port->p_adapter->p_ifc->get_err_str( status )) ); - } - break; - - case IB_CANCELED: - IPOIB_PRINT(TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT, - ("Instance destroying - Aborting.\n") ); - break; - - case IB_TIMEOUT: - NdisWriteErrorLogEntry( p_port->p_adapter->h_adapter, - EVENT_IPOIB_PORT_INFO_TIMEOUT, 0 ); - IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT, - ("Port info query timed out.\n") ); - break; - - case IB_REMOTE_ERROR: - NdisWriteErrorLogEntry( p_port->p_adapter->h_adapter, - EVENT_IPOIB_PORT_INFO_REJECT, 0 ); - IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT, - ("Port info query rejected by SA.\n") ); - break; - - default: - NdisWriteErrorLogEntry( p_port->p_adapter->h_adapter, - EVENT_IPOIB_QUERY_PORT_INFO, 1, p_query_rec->status ); - /* Hopefully we'll get an SM change event that will restart things. */ - IPOIB_PRINT( TRACE_LEVEL_INFORMATION, IPOIB_DBG_INIT, - ("Port info query failed.\n") ); - } - -done: - cl_obj_unlock( &p_port->obj ); - - if( status != IB_SUCCESS ) - { - if( status != IB_CANCELED ) - { - ipoib_set_inactive( p_port->p_adapter ); - __endpt_mgr_reset_all( p_port ); - } - KeSetEvent( &p_port->sa_event, EVENT_INCREMENT, FALSE ); - } - - /* Return the response MAD to AL. */ - if( p_query_rec->p_result_mad ) - p_port->p_adapter->p_ifc->put_mad( p_query_rec->p_result_mad ); - - /* Release the reference taken when issuing the port info query. */ - ipoib_port_deref( p_port, ref_port_info_cb ); - - IPOIB_EXIT( IPOIB_DBG_INIT ); -} static ib_api_status_t @@ -6151,7 +6201,16 @@ __bcast_cb( CL_ASSERT( p_port->p_adapter->state == IB_PNP_PORT_ADD || p_port->p_adapter->state == IB_PNP_PORT_DOWN || p_port->p_adapter->state == IB_PNP_PORT_INIT ); - status = __port_get_bcast( p_port ); + if(++p_port->bc_join_retry_cnt < p_port->p_adapter->params.bc_join_retry) + { + status = __port_get_bcast( p_port ); + } + else + { + NdisWriteErrorLogEntry( p_port->p_adapter->h_adapter, + EVENT_IPOIB_BCAST_JOIN, 1, p_mcast_rec->status ); + p_port->bc_join_retry_cnt = 0; + } } else { @@ -6171,7 +6230,21 @@ __bcast_cb( return; } cl_obj_unlock( &p_port->obj ); - + p_port->bc_join_retry_cnt = 0; + if(! p_port->p_local_endpt) + { + ib_port_info_t port_info; + cl_memclr(&port_info, sizeof(port_info)); + port_info.base_lid = p_port->base_lid; + status = __endpt_mgr_add_local( p_port, &port_info ); + if( status != IB_SUCCESS ) + { + IPOIB_PRINT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, + ("__endpt_mgr_add_local returned %s\n", + p_port->p_adapter->p_ifc->get_err_str( status )) ); + goto err; + } + } status = __endpt_mgr_add_bcast( p_port, p_mcast_rec ); if( status != IB_SUCCESS ) { @@ -6887,3 +6960,62 @@ ipoib_port_cancel_xmit( } IPOIB_EXIT( IPOIB_DBG_SEND ); } + +/* use code from ipoib_mac_to_path without lookup for endpt +* Useful if endpt is already known. +*/ +NTSTATUS +ipoib_endpt_get_path( + IN ipoib_port_t* const p_port, + IN ipoib_endpt_t* const p_endpt, + OUT ib_path_rec_t* p_path ) +{ + uint8_t sl; + net32_t flow_lbl; + uint8_t hop_limit; + + IPOIB_ENTER( IPOIB_DBG_ENDPT ); + + cl_obj_lock( &p_port->obj ); + + if( p_port->p_local_endpt == NULL || + p_endpt == NULL ) + { + cl_obj_unlock( &p_port->obj ); + IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, + ("No local endpoint.\n") ); + return STATUS_INVALID_PARAMETER; + } + + p_path->resv0 = 0; + p_path->dgid = p_endpt->dgid; + p_path->sgid = p_port->p_local_endpt->dgid; + p_path->dlid = p_endpt->dlid; + p_path->slid = p_port->p_local_endpt->dlid; + + ib_member_get_sl_flow_hop( + p_port->ib_mgr.bcast_rec.sl_flow_hop, + &sl, + &flow_lbl, + &hop_limit + ); + ib_path_rec_set_hop_flow_raw( p_path, hop_limit, flow_lbl, FALSE ); + + p_path->tclass = p_port->ib_mgr.bcast_rec.tclass; + p_path->num_path = 1; + p_path->pkey = p_port->ib_mgr.bcast_rec.pkey; + p_path->mtu = p_port->ib_mgr.bcast_rec.mtu; + p_path->rate = p_port->ib_mgr.bcast_rec.rate; + if( p_path->slid == p_path->dlid ) + p_path->pkt_life = 0; + else + p_path->pkt_life = p_port->ib_mgr.bcast_rec.pkt_life; + p_path->preference = 0; + p_path->resv1 = 0; + p_path->resv2 = 0; + + cl_obj_unlock( &p_port->obj ); + + IPOIB_EXIT( IPOIB_DBG_ENDPT ); + return STATUS_SUCCESS; +} diff --git a/branches/ipoib_cm/kernel/ipoib_port.h b/branches/ipoib_cm/kernel/ipoib_port.h index 93586bcb..cbdbd2d6 100644 --- a/branches/ipoib_cm/kernel/ipoib_port.h +++ b/branches/ipoib_cm/kernel/ipoib_port.h @@ -242,7 +242,7 @@ typedef union _recv_buf } PACK_SUFFIX ib; } PACK_SUFFIX recv_buf_t; -/* +/* * FIELDS * eth.pkt * Ethernet packet, used to indicate the receive to the OS. @@ -337,6 +337,7 @@ typedef struct _ipoib_cm_desc cl_list_item_t list_item; uint8_t* p_alloc_buf; uint8_t* p_buf; + NDIS_PHYSICAL_ADDRESS phys_alloc_buf; uint32_t alloc_buf_size; uint32_t buf_size; net32_t lkey; @@ -545,6 +546,8 @@ typedef struct _ipoib_port uint16_t pkey_index; KDPC gc_dpc; KTIMER gc_timer; + uint32_t bc_join_retry_cnt; + ib_net16_t base_lid; ipoib_hdr_t hdr[1]; /* Must be last! */ } ipoib_port_t; @@ -647,6 +650,12 @@ ipoib_mac_to_gid( IN const mac_addr_t mac, OUT ib_gid_t* p_gid ); +NTSTATUS +ipoib_mac_to_path( + IN ipoib_port_t* const p_port, + IN const mac_addr_t mac, + OUT ib_path_rec_t* p_path ); + inline void ipoib_port_ref( IN ipoib_port_t * p_port, IN int type); @@ -655,6 +664,12 @@ inline void ipoib_port_deref( IN ipoib_port_t * p_port, IN int type); +NTSTATUS +ipoib_endpt_get_path( + IN ipoib_port_t* const p_port, + IN ipoib_endpt_t* const p_endpt, + OUT ib_path_rec_t* p_path ); + #if DBG // This function is only used to monitor send failures static inline VOID NdisMSendCompleteX( @@ -671,6 +686,7 @@ static inline VOID NdisMSendCompleteX( #else #define NdisMSendCompleteX NdisMSendComplete #endif + ipoib_endpt_t* ipoib_endpt_get_by_gid( IN ipoib_port_t* const p_port, diff --git a/branches/ipoib_cm/kernel/ipoib_xfr_mgr.h b/branches/ipoib_cm/kernel/ipoib_xfr_mgr.h index 17c61881..d2ee06bf 100644 --- a/branches/ipoib_cm/kernel/ipoib_xfr_mgr.h +++ b/branches/ipoib_cm/kernel/ipoib_xfr_mgr.h @@ -239,140 +239,89 @@ ipoib_mac_from_mlx_guid( return IB_SUCCESS; } -/* -* PARAMETERS -* port_guid -* The port GUID, in network byte order, for which to generate a -* MAC address. -* -* p_mac_addr -* Pointer to a mac address in which to store the results. -* -* RETURN VALUES -* IB_SUCCESS -* The MAC address was successfully converted. -* -* IB_INVALID_GUID -* The port GUID provided was not a known GUID format. -* -* SEE ALSO -* IPOIB -*********/ - -/****f* IPOIB/ipoib_mac_from_voltaire_guid +/****f* IPOIB/ipoib_mac_from_dell_guid * NAME -* ipoib_mac_from_voltaire_guid +* ipoib_mac_from_dell_guid * * DESCRIPTION -* Generates an ethernet MAC address given a Voltaire port GUID. +* Generates an ethernet MAC address given a DELL port GUID. * * SYNOPSIS */ static inline ib_api_status_t -ipoib_mac_from_voltaire_guid( +ipoib_mac_from_dell_guid( IN const net64_t port_guid, OUT mac_addr_t* const p_mac_addr ) { const uint8_t *p_guid = (const uint8_t*)&port_guid; /* Port guid is in network byte order. OUI is in lower 3 bytes. */ - ASSERT( p_guid[0] == 0x00 && p_guid[1] == 0x08 && p_guid[2] == 0xf1 ); - + ASSERT( p_guid[0] == 0x00 && p_guid[1] == 0x18 && p_guid[2] == 0x8b ); + p_mac_addr->addr[0] = p_guid[0]; p_mac_addr->addr[1] = p_guid[1]; p_mac_addr->addr[2] = p_guid[2]; - p_mac_addr->addr[3] = p_guid[4] ^ p_guid[6]; - p_mac_addr->addr[4] = p_guid[5] ^ p_guid[7]; - p_mac_addr->addr[5] = p_guid[5] + p_guid[6] + p_guid[7]; - - return IB_SUCCESS; -} - -/****f* IPOIB/ipoib_mac_from_supermicro_guid -* NAME -* ipoib_mac_from_supermicro_guid -* -* DESCRIPTION -* Generates an ethernet MAC address given a supermicro port GUID. -* -* SYNOPSIS -*/ -static inline ib_api_status_t -ipoib_mac_from_supermicro_guid( - IN const net64_t port_guid, - OUT mac_addr_t* const p_mac_addr ) -{ - const uint8_t *p_guid = (const uint8_t*)&port_guid; - - /* Port guid is in network byte order. OUI is in lower 3 bytes. */ - ASSERT( p_guid[0] == 0x00 && p_guid[1] == 0x30 && p_guid[2] == 0x48 - && p_guid[3] == 0xff && p_guid[4] == 0xff); - - p_mac_addr->addr[0] = 0; - p_mac_addr->addr[1] = 0x30; - p_mac_addr->addr[2] = 0x48; p_mac_addr->addr[3] = p_guid[5]; p_mac_addr->addr[4] = p_guid[6]; p_mac_addr->addr[5] = p_guid[7]; - + return IB_SUCCESS; } - -/****f* IPOIB/ipoib_mac_from_cisco_guid -* NAME -* ipoib_mac_from_cisco_guid +/* +* PARAMETERS +* port_guid +* The port GUID, in network byte order, for which to generate a +* MAC address. * -* DESCRIPTION -* Generates an ethernet MAC address given a Cisco port GUID. +* p_mac_addr +* Pointer to a mac address in which to store the results. * -* SYNOPSIS -*/ -static inline ib_api_status_t -ipoib_mac_from_cisco_guid( - IN const net64_t port_guid, - OUT mac_addr_t* const p_mac_addr ) -{ - const uint8_t *p_guid = (const uint8_t*)&port_guid; - - /* Port guid is in network byte order. OUI is in lower 3 bytes. */ - ASSERT( p_guid[0] == 0x00 && p_guid[1] == 0x5 && p_guid[2] == 0xad); - - p_mac_addr->addr[0] = 0; - p_mac_addr->addr[1] = 0x5; - p_mac_addr->addr[2] = 0xad; - p_mac_addr->addr[3] = p_guid[5]; - p_mac_addr->addr[4] = p_guid[6]; - p_mac_addr->addr[5] = p_guid[7]; +* RETURN VALUES +* IB_SUCCESS +* The MAC address was successfully converted. +* +*********/ - return IB_SUCCESS; -} -/****f* IPOIB/ipoib_mac_from_hp_guid +/****f* IPOIB/ipoib_mac_from_guid_mask * NAME -* ipoib_mac_from_hp_guid +* ipoib_mac_from_guid_mask * * DESCRIPTION -* Generates an ethernet MAC address given a HP port GUID. +* Generates an ethernet MAC address given general port GUID and a bitwise mask * * SYNOPSIS */ static inline ib_api_status_t -ipoib_mac_from_hp_guid( - IN const net64_t port_guid, +ipoib_mac_from_guid_mask( + IN const uint8_t *p_guid, + IN uint32_t guid_mask, OUT mac_addr_t* const p_mac_addr ) { - const uint8_t *p_guid = (const uint8_t*)&port_guid; + static const mac_addr_size = HW_ADDR_LEN; + uint8_t i; + int digit_counter = 0; - /* Port guid is in network byte order. OUI is in lower 3 bytes. */ - ASSERT( p_guid[0] == 0x00 && p_guid[1] == 0x1a && p_guid[2] == 0x4b); + // All non-zero bits of guid_mask indicates the number of an appropriate + // byte in port_guid, that will be used in MAC address construction + for (i = 7; guid_mask; guid_mask >>= 1, --i ) + { + if( guid_mask & 1 ) + { + ++digit_counter; + if( digit_counter > mac_addr_size ) + { + //to avoid negative index + return IB_INVALID_GUID_MASK; + } + p_mac_addr->addr[mac_addr_size - digit_counter] = p_guid [i]; + } + } - p_mac_addr->addr[0] = 0; - p_mac_addr->addr[1] = 0x1a; - p_mac_addr->addr[2] = 0x4b; - p_mac_addr->addr[3] = p_guid[5]; - p_mac_addr->addr[4] = p_guid[6]; - p_mac_addr->addr[5] = p_guid[7]; + // check for the mask validity: it should have 6 non-zero bits + if( digit_counter != mac_addr_size ) + return IB_INVALID_GUID_MASK; return IB_SUCCESS; } @@ -383,6 +332,11 @@ ipoib_mac_from_hp_guid( * The port GUID, in network byte order, for which to generate a * MAC address. * +* guid_mask +* Each BIT in the mask indicates whether to include the appropriate BYTE +* to the MAC address. Bit 0 corresponds to the less significant BYTE , i.e. +* highest index in the MAC array +* * p_mac_addr * Pointer to a mac address in which to store the results. * @@ -390,11 +344,44 @@ ipoib_mac_from_hp_guid( * IB_SUCCESS * The MAC address was successfully converted. * +* IB_INVALID_GUID +* The port GUID provided was not a known GUID format. +* * SEE ALSO * IPOIB *********/ +/****f* IPOIB/ipoib_mac_from_voltaire_guid +* NAME +* ipoib_mac_from_voltaire_guid +* +* DESCRIPTION +* Generates an ethernet MAC address given a Voltaire port GUID. +* +* SYNOPSIS +*/ +static inline ib_api_status_t +ipoib_mac_from_voltaire_guid( + IN const net64_t port_guid, + OUT mac_addr_t* const p_mac_addr ) +{ + const uint8_t *p_guid = (const uint8_t*)&port_guid; + + /* Port guid is in network byte order. OUI is in lower 3 bytes. */ + ASSERT( p_guid[0] == 0x00 && p_guid[1] == 0x08 && p_guid[2] == 0xf1 ); + + p_mac_addr->addr[0] = p_guid[0]; + p_mac_addr->addr[1] = p_guid[1]; + p_mac_addr->addr[2] = p_guid[2]; + p_mac_addr->addr[3] = p_guid[4] ^ p_guid[6]; + p_mac_addr->addr[4] = p_guid[5] ^ p_guid[7]; + p_mac_addr->addr[5] = p_guid[5] + p_guid[6] + p_guid[7]; + + return IB_SUCCESS; +} + + /****f* IPOIB/ipoib_mac_from_guid * NAME * ipoib_mac_from_guid @@ -407,53 +394,59 @@ ipoib_mac_from_hp_guid( static inline ib_api_status_t ipoib_mac_from_guid( IN const net64_t port_guid, - OUT mac_addr_t* const p_mac_addr ) + IN uint32_t guid_mask, + OUT mac_addr_t* const p_mac_addr + ) { - ib_api_status_t status; + static const uint32_t guid_default_mask = 0xE7; //==0b 11100111 + ib_api_status_t status = IB_INVALID_GUID; const uint8_t *p_guid = (const uint8_t*)&port_guid; uint32_t laa; - if( p_guid[0] == 0 ) + /* Port guid is in network byte order. OUI is in lower 3 bytes. */ + if( p_guid[0] == 0 ) { if( p_guid[1] == 0x02 && p_guid[2] == 0xc9 ) { status = ipoib_mac_from_mlx_guid( port_guid, p_mac_addr ); - if( status == IB_SUCCESS ) - return IB_SUCCESS; } else if( p_guid[1] == 0x08 && p_guid[2] == 0xf1 ) { status = ipoib_mac_from_voltaire_guid( port_guid, p_mac_addr ); - if( status == IB_SUCCESS ) - return IB_SUCCESS; } else if( p_guid[1] == 0x30 && p_guid[2] == 0x48 ) { - status = ipoib_mac_from_supermicro_guid( port_guid, p_mac_addr ); - if( status == IB_SUCCESS ) - return IB_SUCCESS; + //Supermicro GUID + status =ipoib_mac_from_guid_mask( p_guid, guid_default_mask, p_mac_addr ); } else if( p_guid[1] == 0x05 && p_guid[2] == 0xad ) { - status = ipoib_mac_from_cisco_guid( port_guid, p_mac_addr ); - if( status == IB_SUCCESS ) - return IB_SUCCESS; - } - /* Port guid is in network byte order. OUI is in lower 3 bytes. */ + //Cisco GUID + status =ipoib_mac_from_guid_mask( p_guid, guid_default_mask, p_mac_addr ); + } else if( p_guid[1] == 0x06 && p_guid[2] == 0x6a ) { status = ipoib_mac_from_sst_guid( port_guid, p_mac_addr ); - if( status == IB_SUCCESS ) - return IB_SUCCESS; } - else if( p_guid[1] == 0x1a && p_guid[2] == 0x4b ) + else if( p_guid[1] == 0x1a && p_guid[2] == 0x4b || + p_guid[1] == 0x17 && p_guid[2] == 0x08 ) + { + //HP GUID + status =ipoib_mac_from_guid_mask( p_guid, guid_default_mask, p_mac_addr ); + } + else if( p_guid[1] == 0x18 && p_guid[2] == 0x8b ) { - status = ipoib_mac_from_hp_guid( port_guid, p_mac_addr ); - if( status == IB_SUCCESS ) - return IB_SUCCESS; + //DELL GUID + status =ipoib_mac_from_guid_mask( p_guid, guid_default_mask, p_mac_addr ); } + + if( status == IB_SUCCESS ) + return status; } + if( guid_mask ) + return ipoib_mac_from_guid_mask( p_guid, guid_mask, p_mac_addr ); + /* Value of zero is reserved. */ laa = cl_atomic_inc( &g_ipoib.laa_idx ); -- 2.46.0