From: tzachid Date: Mon, 4 Jan 2010 17:34:19 +0000 (+0000) Subject: [mlx4_bus] Add support for RoCEE to the low level driver. X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=56bf0ee7312a476f3621ad623dde368027edad54;p=~shefty%2Frdma-win.git [mlx4_bus] Add support for RoCEE to the low level driver. git-svn-id: svn://openib.tc.cornell.edu/gen1@2647 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86 --- diff --git a/trunk/hw/mlx4/kernel/bus/core/ud_header.c b/trunk/hw/mlx4/kernel/bus/core/ud_header.c index 4be128e2..3585fef2 100644 --- a/trunk/hw/mlx4/kernel/bus/core/ud_header.c +++ b/trunk/hw/mlx4/kernel/bus/core/ud_header.c @@ -62,6 +62,15 @@ static const struct ib_field lrh_table[] = { { STRUCT_FIELD_INIT(lrh, source_lid, 1, 16, 16) } }; +static const struct ib_field eth_table[] = { + { STRUCT_FIELD_INIT(eth, dmac_h, 0, 0, 32) }, + { STRUCT_FIELD_INIT(eth, dmac_l, 1, 0, 16) }, + { STRUCT_FIELD_INIT(eth, smac_h, 1, 16,16) }, + { STRUCT_FIELD_INIT(eth, smac_l, 2, 0 ,32) }, + { STRUCT_FIELD_INIT(eth, type, 3, 0, 16)} +}; + + static const struct ib_field grh_table[] = { { STRUCT_FIELD_INIT(grh, ip_version, 0, 0, 4) }, { STRUCT_FIELD_INIT(grh, traffic_class, 0, 4, 8) }, @@ -279,3 +288,93 @@ int ib_ud_header_unpack(u8 *buf, return 0; } EXPORT_SYMBOL(ib_ud_header_unpack); + +/** + * ib_rdmaoe_ud_header_init - Initialize UD header structure + * @payload_bytes:Length of packet payload + * @grh_present:GRH flag (if non-zero, GRH will be included) + * @header:Structure to initialize + * + * ib_rdmaoe_ud_header_init() initializes the grh.ip_version, grh.payload_length, + * grh.next_header, bth.opcode, bth.pad_count and + * bth.transport_header_version fields of a &struct eth_ud_header given + * the payload length and whether a GRH will be included. + */ +void ib_rdmaoe_ud_header_init(int payload_bytes, + int grh_present, + struct eth_ud_header *header) +{ + int header_len; + + memset(header, 0, sizeof *header); + + header_len = + sizeof header->eth + + IB_BTH_BYTES + + IB_DETH_BYTES; + if (grh_present) + header_len += IB_GRH_BYTES; + + header->grh_present = grh_present; + if (grh_present) { + header->grh.ip_version = 6; + header->grh.payload_length = + cpu_to_be16((IB_BTH_BYTES + + IB_DETH_BYTES + + payload_bytes + + 4 + /* ICRC */ + 3) & ~3); /* round up */ + header->grh.next_header = 0x1b; + } + + if (header->immediate_present) + header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; + else + header->bth.opcode = IB_OPCODE_UD_SEND_ONLY; + header->bth.pad_count =(u8) ((4 - payload_bytes) & 3); + header->bth.transport_header_version = 0; +} + + + +/** + * rdmaoe_ud_header_pack - Pack UD header struct into eth wire format + * @header:UD header struct + * @buf:Buffer to pack into + * + * ib_ud_header_pack() packs the UD header structure @header into wire + * format in the buffer @buf. + */ +int rdmaoe_ud_header_pack(struct eth_ud_header *header, + void *buf) +{ + int len = 0; + + ib_pack(eth_table, ARRAY_SIZE(eth_table), + &header->eth, buf); + len += IB_ETH_BYTES; + + if (header->grh_present) { + ib_pack(grh_table, ARRAY_SIZE(grh_table), + &header->grh, (u8*)buf + len); + len += IB_GRH_BYTES; + } + + ib_pack(bth_table, ARRAY_SIZE(bth_table), + &header->bth, (u8*)buf + len); + len += IB_BTH_BYTES; + + ib_pack(deth_table, ARRAY_SIZE(deth_table), + &header->deth, (u8*)buf + len); + len += IB_DETH_BYTES; + + if (header->immediate_present) { + memcpy((u8*)buf + len, &header->immediate_data, + sizeof header->immediate_data); + len += sizeof header->immediate_data; + } + + return len; +} + + diff --git a/trunk/hw/mlx4/kernel/bus/core/verbs.c b/trunk/hw/mlx4/kernel/bus/core/verbs.c index 33e08f51..1f7845bf 100644 --- a/trunk/hw/mlx4/kernel/bus/core/verbs.c +++ b/trunk/hw/mlx4/kernel/bus/core/verbs.c @@ -336,3 +336,28 @@ int ib_destroy_ah(struct ib_ah *ah) } EXPORT_SYMBOL(ib_destroy_ah); +enum rdma_transport_type +rdma_node_get_transport(enum rdma_node_type node_type) +{ + switch (node_type) { + case RDMA_NODE_IB_CA: + case RDMA_NODE_IB_SWITCH: + case RDMA_NODE_IB_ROUTER: + return RDMA_TRANSPORT_IB; + case RDMA_NODE_RNIC: + return RDMA_TRANSPORT_IWARP; + default: + ASSERT(FALSE); + return 0; + } +} + +enum rdma_transport_type rdma_port_get_transport(struct ib_device *device, + u8 port_num) +{ + return device->get_port_transport ? + device->get_port_transport(device, port_num) : + rdma_node_get_transport(device->node_type); +} +EXPORT_SYMBOL(rdma_port_get_transport); + diff --git a/trunk/hw/mlx4/kernel/bus/drv/drv.c b/trunk/hw/mlx4/kernel/bus/drv/drv.c index 7a2f6156..eebf3cc4 100644 --- a/trunk/hw/mlx4/kernel/bus/drv/drv.c +++ b/trunk/hw/mlx4/kernel/bus/drv/drv.c @@ -95,7 +95,6 @@ EvtInterruptIsr( #endif -static NTSTATUS __create_child( __in WDFDEVICE Device, @@ -228,44 +227,54 @@ Routine Description: if ( p_fdo->children_created ) goto end; - + // eventually we'll have all information about children in Registry // DriverEntry will read it into a Global storage and // this routine will create all the children on base on this info number_of_ib_ports = mlx4_count_ib_ports(mdev); ASSERT(number_of_ib_ports >=0 && number_of_ib_ports <=2); +#if 0 + //For now we it's either IB or ETH, and we always create LLE if it's ETH + if((number_of_ib_ports > 0) && (mdev->caps.port_type[1] == MLX4_PORT_TYPE_IB) ) { + status = __create_child(Device, BUS_HARDWARE_IDS, BUS_HARDWARE_DESCRIPTION, 0 ); + if (!NT_SUCCESS(status)) { + MLX4_PRINT_EV(TRACE_LEVEL_ERROR, MLX4_DBG_DRV, ("__create_child (ib)failed with 0x%x\n", status)); + } + } +#endif + for (i = 1; i <= mdev->caps.num_ports; i++) { - if (mlx4_is_enabled_port(mdev, i)) { - if(mlx4_is_eth_port(mdev, i)) { - status = __create_child(Device, ETH_HARDWARE_IDS, ETH_HARDWARE_DESCRIPTION, i); - if (!NT_SUCCESS(status)) { - MLX4_PRINT_EV(TRACE_LEVEL_ERROR, MLX4_DBG_DRV, ("__create_child (eth) failed with 0x%x\n", status)); - break; - } - eth_created = TRUE; - } else { - if (eth_created){ - // - // Illegal configuration the IB should be the first port - // - status = STATUS_INVALID_PARAMETER; - MLX4_PRINT_EV(TRACE_LEVEL_ERROR, MLX4_DBG_DRV, ("__create_child (IB) failed. Invalid configuration, IB should be the first port.")); - break; - } - - if (ib_created){ - continue; - } - - status = __create_child(Device, BUS_HARDWARE_IDS, BUS_HARDWARE_DESCRIPTION, 0 ); - if (!NT_SUCCESS(status)) { - MLX4_PRINT_EV(TRACE_LEVEL_ERROR, MLX4_DBG_DRV, ("__create_child (ib)failed with 0x%x\n", status)); - break; - } - ib_created = TRUE; - } - } + if (mlx4_is_enabled_port(mdev, i)) { + if(mlx4_is_eth_port(mdev, i)) { + status = __create_child(Device, ETH_HARDWARE_IDS, ETH_HARDWARE_DESCRIPTION, i); + if (!NT_SUCCESS(status)) { + MLX4_PRINT_EV(TRACE_LEVEL_ERROR, MLX4_DBG_DRV, ("__create_child (eth) failed with 0x%x\n", status)); + break; + } + eth_created = TRUE; + } else { + if (eth_created){ + // + // Illegal configuration the IB should be the first port + // + status = STATUS_INVALID_PARAMETER; + MLX4_PRINT_EV(TRACE_LEVEL_ERROR, MLX4_DBG_DRV, ("__create_child (IB) failed. Invalid configuration, IB should be the first port.")); + break; + } + + if (ib_created){ + continue; + } + + status = __create_child(Device, BUS_HARDWARE_IDS, BUS_HARDWARE_DESCRIPTION, 0 ); + if (!NT_SUCCESS(status)) { + MLX4_PRINT_EV(TRACE_LEVEL_ERROR, MLX4_DBG_DRV, ("__create_child (ib)failed with 0x%x\n", status)); + break; + } + ib_created = TRUE; + } + } } p_fdo->children_created = TRUE; @@ -869,6 +878,9 @@ EvtPrepareHardware( goto err; } + pdev->p_wdf_device = Device; + pdev->ib_hca_created = 0; + // start the card status = __start_card(Device, p_fdo ); if( !NT_SUCCESS( status ) ) diff --git a/trunk/hw/mlx4/kernel/bus/drv/stat.c b/trunk/hw/mlx4/kernel/bus/drv/stat.c index 10c3b11e..727bdd23 100644 --- a/trunk/hw/mlx4/kernel/bus/drv/stat.c +++ b/trunk/hw/mlx4/kernel/bus/drv/stat.c @@ -113,7 +113,7 @@ static void __print_mlx( struct mlx4_dev *mdev, struct mlx4_wqe_mlx_seg *p) void st_print_mlx_header( struct mlx4_dev *mdev, struct mlx4_ib_sqp *sqp, struct mlx4_wqe_mlx_seg *mlx ) { if ( mdev->pdev->p_stat_dev->flags & MLX4_MAD_TRACE_UDH ) - __print_ud_header( mdev, &sqp->ud_header ); + __print_ud_header( mdev, &sqp->hdr.ib ); if ( mdev->pdev->p_stat_dev->flags & MLX4_MAD_TRACE_WQE ) __print_mlx( mdev, mlx ); } diff --git a/trunk/hw/mlx4/kernel/bus/ib/ah.c b/trunk/hw/mlx4/kernel/bus/ib/ah.c index 85a0da13..cffd92cf 100644 --- a/trunk/hw/mlx4/kernel/bus/ib/ah.c +++ b/trunk/hw/mlx4/kernel/bus/ib/ah.c @@ -32,68 +32,199 @@ #include "mlx4_ib.h" -struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +static inline int rdma_link_local_addr(struct in6_addr *addr) +{ + if (addr->s6_addr32[0] == cpu_to_be32(0xfe800000) && + addr->s6_addr32[1] == 0) + return 1; + else + return 0; +} + +inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac) +{ + memcpy(mac, &addr->s6_addr[8], 3); + memcpy(mac + 3, &addr->s6_addr[13], 3); + mac[0] ^= 2; +} + +static inline int rdma_is_multicast_addr(struct in6_addr *addr) +{ + return addr->s6_addr[0] == 0xff ? 1 : 0; +} + +static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac) +{ + int i; + + mac[0] = 0x33; + mac[1] = 0x33; + for (i = 2; i < 6; ++i) + mac[i] = addr->s6_addr[i + 10]; + +} + +int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr, + u8 *mac, int *is_mcast) +{ + int err = 0; + struct sockaddr_in6 dst; + + UNREFERENCED_PARAMETER(dev); + + *is_mcast = 0; + memcpy(dst.sin6_addr.s6_addr, ah_attr->grh.dgid.raw, sizeof(ah_attr->grh.dgid.raw)); + + if (rdma_link_local_addr(&dst.sin6_addr)) + rdma_get_ll_mac(&dst.sin6_addr, mac); + else if (rdma_is_multicast_addr(&dst.sin6_addr)) { + rdma_get_mcast_mac(&dst.sin6_addr, mac); + *is_mcast = 1; + } else { + err = -EINVAL; //jyang:todo + ASSERT(FALSE); + } + return err; +} + +static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct mlx4_ib_ah *ah) { struct mlx4_dev *dev = to_mdev(pd->device)->dev; - struct mlx4_ib_ah *ah; if (mlx4_is_barred(pd->device->dma_device)) return ERR_PTR(-EFAULT); - ah = kmalloc(sizeof *ah, GFP_ATOMIC); - if (!ah) - return ERR_PTR(-ENOMEM); - memset(&ah->av, 0, sizeof ah->av); - - ah->av.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); - ah->av.g_slid = ah_attr->src_path_bits; - ah->av.dlid = cpu_to_be16(ah_attr->dlid); - if (ah_attr->static_rate) { - ah->av.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; - while (ah->av.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && - !(1 << ah->av.stat_rate & dev->caps.stat_rate_support)) - --ah->av.stat_rate; - } - ah->av.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); + ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); + ah->av.ib.g_slid = ah_attr->src_path_bits; if (ah_attr->ah_flags & IB_AH_GRH) { - ah->av.g_slid |= 0x80; - ah->av.gid_index = ah_attr->grh.sgid_index; - ah->av.hop_limit = ah_attr->grh.hop_limit; - ah->av.sl_tclass_flowlabel |= + ah->av.ib.g_slid |= 0x80; + ah->av.ib.gid_index = ah_attr->grh.sgid_index; + ah->av.ib.hop_limit = ah_attr->grh.hop_limit; + ah->av.ib.sl_tclass_flowlabel |= cpu_to_be32((ah_attr->grh.traffic_class << 20) | ah_attr->grh.flow_label); - memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16); + memcpy(ah->av.ib.dgid, ah_attr->grh.dgid.raw, 16); } + ah->av.ib.dlid = cpu_to_be16(ah_attr->dlid); + if (ah_attr->static_rate) { + ah->av.ib.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; + while (ah->av.ib.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && + !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support)) + --ah->av.ib.stat_rate; + } + ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); + + return &ah->ibah; +} + +struct ib_ah *create_rdmaoe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct mlx4_ib_ah *ah) +{ + struct mlx4_ib_dev *ibdev = to_mdev(pd->device); + struct mlx4_dev *dev = ibdev->dev; + u8 mac[6]; + int err; + int is_mcast; + + if (mlx4_is_barred(pd->device->dma_device)) + return ERR_PTR(-EFAULT); + + err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast); + if (err) + return ERR_PTR(err); + + memcpy(ah->av.eth.mac_0_1, mac, 2); + memcpy(ah->av.eth.mac_2_5, mac + 2, 4); + ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); + ah->av.ib.g_slid = 0x80; + if (ah_attr->static_rate) { + ah->av.ib.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; + while (ah->av.ib.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && + !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support)) + --ah->av.ib.stat_rate; + } + + /* + * HW requires multicast LID so we just choose one. + */ + if (is_mcast) + ah->av.ib.dlid = cpu_to_be16(0xc000); + + memcpy(ah->av.ib.dgid, ah_attr->grh.dgid.raw, 16); + ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); + return &ah->ibah; } + +struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +{ + struct mlx4_ib_ah *ah; + enum rdma_transport_type transport; + + struct ib_ah *ret; + + ah = kzalloc(sizeof *ah, GFP_ATOMIC); + if (!ah) + return ERR_PTR(-ENOMEM); + + transport = rdma_port_get_transport(pd->device, ah_attr->port_num); + if (transport == RDMA_TRANSPORT_RDMAOE) { + if (!(ah_attr->ah_flags & IB_AH_GRH)) { + ret = ERR_PTR(-EINVAL); + goto out; + } else { + /* TBD: need to handle the case when we get called + in an atomic context and there we might sleep. We + don't expect this currently since we're working with + link local addresses which we can translate without + going to sleep */ + ret = create_rdmaoe_ah(pd, ah_attr, ah); + if (IS_ERR(ret)) + goto out; + else + return ret; + } + } else + return create_ib_ah(pd, ah_attr, ah); /* never fails */ + +out: + kfree(ah); + return ret; +} + + int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) { struct mlx4_ib_ah *ah = to_mah(ibah); + enum rdma_transport_type transport; + + transport = rdma_port_get_transport(ibah->device, ah_attr->port_num); if (mlx4_is_barred(ibah->device->dma_device)) return -EFAULT; memset(ah_attr, 0, sizeof *ah_attr); - ah_attr->dlid = be16_to_cpu(ah->av.dlid); - ah_attr->sl = (u8)(be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28); - ah_attr->port_num = (u8)(be32_to_cpu(ah->av.port_pd) >> 24); - if (ah->av.stat_rate) - ah_attr->static_rate = ah->av.stat_rate - MLX4_STAT_RATE_OFFSET; - ah_attr->src_path_bits = ah->av.g_slid & 0x7F; + ah_attr->dlid = transport == RDMA_TRANSPORT_IB ? be16_to_cpu(ah->av.ib.dlid) : 0; + ah_attr->sl = (u8)(be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28); + ah_attr->port_num = (u8)(be32_to_cpu(ah->av.ib.port_pd) >> 24); + if (ah->av.ib.stat_rate) + ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET; + ah_attr->src_path_bits = ah->av.ib.g_slid & 0x7F; if (mlx4_ib_ah_grh_present(ah)) { ah_attr->ah_flags = IB_AH_GRH; ah_attr->grh.traffic_class = - (u8)(be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20); + (u8)(be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20); ah_attr->grh.flow_label = - be32_to_cpu(ah->av.sl_tclass_flowlabel) & 0xfffff; - ah_attr->grh.hop_limit = ah->av.hop_limit; - ah_attr->grh.sgid_index = ah->av.gid_index; - memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, 16); + be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) & 0xfffff; + ah_attr->grh.hop_limit = ah->av.ib.hop_limit; + ah_attr->grh.sgid_index = ah->av.ib.gid_index; + memcpy(ah_attr->grh.dgid.raw, ah->av.ib.dgid, 16); } return 0; @@ -108,7 +239,7 @@ int mlx4_ib_destroy_ah(struct ib_ah *ah) // Leo: temporary int mlx4_ib_modify_ah( struct ib_ah *ibah, struct ib_ah_attr *ah_attr ) { - struct mlx4_av *av = &to_mah(ibah)->av; + struct mlx4_av *av = &to_mah(ibah)->av.ib; struct mlx4_dev *dev = to_mdev(ibah->pd->device)->dev; if (mlx4_is_barred(dev)) diff --git a/trunk/hw/mlx4/kernel/bus/ib/main.c b/trunk/hw/mlx4/kernel/bus/ib/main.c index 79375d9e..98d38e46 100644 --- a/trunk/hw/mlx4/kernel/bus/ib/main.c +++ b/trunk/hw/mlx4/kernel/bus/ib/main.c @@ -133,31 +133,21 @@ out: return err; } -static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, - struct ib_port_attr *props) -{ - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; - int err = -ENOMEM; - if (mlx4_is_barred(ibdev->dma_device)) - return -EFAULT; - - in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); - out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); - if (!in_mad || !out_mad) - goto out; - - memset(props, 0, sizeof *props); +static enum rdma_transport_type +mlx4_ib_port_get_transport(struct ib_device *device, u8 port_num) +{ + struct mlx4_dev *dev = to_mdev(device)->dev; - init_query_mad(in_mad); - in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; - in_mad->attr_mod = cpu_to_be32(port); + return dev->caps.port_mask & (1 << (port_num - 1)) ? + RDMA_TRANSPORT_IB : RDMA_TRANSPORT_RDMAOE; +} - err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); - if (err) - goto out; +static void ib_link_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props, + struct ib_smp *out_mad) +{ props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16)); props->lmc = out_mad->data[34] & 0x7; props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18)); @@ -177,6 +167,63 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, props->subnet_timeout = out_mad->data[51] & 0x1f; props->max_vl_num = out_mad->data[37] >> 4; props->init_type_reply = out_mad->data[41] >> 4; + props->transport= RDMA_TRANSPORT_IB; +} + +static void eth_link_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props, + struct ib_smp *out_mad) +{ + + props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20)); + props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port]; + props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz; + props->pkey_tbl_len = (u16)to_mdev(ibdev)->dev->caps.pkey_table_len[port]; + props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); + props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); + props->active_width = out_mad->data[31] & 0xf; + props->active_speed = out_mad->data[35] >> 4; + props->max_mtu = out_mad->data[41] & 0xf; + //props->active_mtu = rdmaoe->mtu[port - 1]; + props->active_mtu = 1500; //jyang:hardcoded + props->subnet_timeout = out_mad->data[51] & 0x1f; + props->max_vl_num = out_mad->data[37] >> 4; + props->init_type_reply = out_mad->data[41] >> 4; + props->transport= RDMA_TRANSPORT_RDMAOE; + + //props->state = netif_running(ndev) && netif_oper_up(ndev) ? + // IB_PORT_ACTIVE : IB_PORT_DOWN; + props->state = IB_PORT_ACTIVE; //jyang: just hardcoded it now + props->phys_state = props->state; +} + + + +static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + memset(props, 0, sizeof *props); + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + mlx4_ib_port_get_transport(ibdev, port) == RDMA_TRANSPORT_IB ? + ib_link_query_port(ibdev, port, props, out_mad) : + eth_link_query_port(ibdev, port, props, out_mad); out: kfree(in_mad); @@ -522,6 +569,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; ibdev->ib_dev.query_device = mlx4_ib_query_device; ibdev->ib_dev.query_port = mlx4_ib_query_port; + ibdev->ib_dev.get_port_transport = mlx4_ib_port_get_transport; ibdev->ib_dev.query_gid_chunk = mlx4_ib_query_gid_chunk; ibdev->ib_dev.query_pkey_chunk = mlx4_ib_query_pkey_chunk; ibdev->ib_dev.modify_device = mlx4_ib_modify_device; diff --git a/trunk/hw/mlx4/kernel/bus/ib/mlx4_ib.h b/trunk/hw/mlx4/kernel/bus/ib/mlx4_ib.h index 92255af5..c2a2cc80 100644 --- a/trunk/hw/mlx4/kernel/bus/ib/mlx4_ib.h +++ b/trunk/hw/mlx4/kernel/bus/ib/mlx4_ib.h @@ -165,14 +165,15 @@ struct mlx4_ib_srq { struct mlx4_ib_ah { struct ib_ah ibah; - struct mlx4_av av; + union mlx4_ext_av av; }; + enum { /* * Largest possible UD header: send with GRH and immediate data. */ - MLX4_IB_UD_HEADER_SIZE = 72 + MLX4_IB_UD_HEADER_SIZE = 76 }; struct mlx4_ib_sqp { @@ -180,7 +181,10 @@ struct mlx4_ib_sqp { int pkey_index; u32 qkey; u32 send_psn; - struct ib_ud_header ud_header; + union { + struct ib_ud_header ib; + struct eth_ud_header eth; + } hdr; u8 header_buf[MLX4_IB_UD_HEADER_SIZE]; }; @@ -340,9 +344,14 @@ void mlx4_ib_qp_init(); int __init mlx4_ib_init(void); void __exit mlx4_ib_cleanup(void); +int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr, + u8 *mac, int *is_mcast); + + static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) { - return !!(ah->av.g_slid & 0x80); + return !!(ah->av.ib.g_slid & 0x80); + } #endif /* MLX4_IB_H */ diff --git a/trunk/hw/mlx4/kernel/bus/ib/qp.c b/trunk/hw/mlx4/kernel/bus/ib/qp.c index 8ffca0f5..263a47ae 100644 --- a/trunk/hw/mlx4/kernel/bus/ib/qp.c +++ b/trunk/hw/mlx4/kernel/bus/ib/qp.c @@ -46,7 +46,13 @@ enum { enum { MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83, - MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f + MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f, + MLX4_IB_LINK_TYPE_IB = 0, + MLX4_IB_LINK_TYPE_ETH = 1 +}; + +enum { + MLX4_RDMAOE_ETHERTYPE = 0x8915 }; enum { @@ -62,9 +68,23 @@ static const __be32 mlx4_ib_opcode[] = { __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), /* [IB_WR_ATOMIC_CMP_AND_SWP] */ __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), /* [IB_WR_ATOMIC_FETCH_AND_ADD]*/ __constant_cpu_to_be32(MLX4_OPCODE_LSO | (1 << 6)), /* [IB_WR_LSO] */ + + + __constant_cpu_to_be32(MLX4_OPCODE_SEND_INVAL), /* [IB_WR_SEND_WITH_INV] */ + __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ), /* [IB_WR_RDMA_READ_WITH_INV] */ + __constant_cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), /* [IB_WR_LOCAL_INV] */ + __constant_cpu_to_be32(MLX4_OPCODE_FMR), /* [IB_WR_FAST_REG_MR] */ + + + __constant_cpu_to_be32(MLX4_OPCODE_NOP) /* [IB_WR_NOP] */ }; + +//????????????????? IB_WR_RDMA_READ_WITH_INV, //??????????????? + +extern inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac); + static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) { return container_of(mqp, struct mlx4_ib_sqp, qp); @@ -724,6 +744,12 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port) static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, struct mlx4_qp_path *path, u8 port) { + int err; + int is_eth = rdma_port_get_transport(&dev->ib_dev, port) == + RDMA_TRANSPORT_RDMAOE ? 1 : 0; + u8 mac[6]; + int is_mcast; + path->grh_mylmc = ah->src_path_bits & 0x7f; path->rlid = cpu_to_be16(ah->dlid); if (ah->static_rate) { @@ -754,7 +780,21 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((port - 1) << 6) | ((ah->sl & 0xf) << 2); - return 0; + if (is_eth) { + if (!(ah->ah_flags & IB_AH_GRH)) + return -1; + + err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast); + if (err) + return err; + + memcpy(path->dmac, mac, 6); + path->ackto = MLX4_IB_LINK_TYPE_ETH; + /* use index 0 into MAC table for RDMAoE */ + path->grh_mylmc &= 0x80; + } + + return 0; } static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, @@ -1146,79 +1186,132 @@ static enum ib_wr_opcode to_wr_opcode(struct _ib_send_wr *wr) return opcode; } + + + static int build_mlx_header(struct mlx4_ib_sqp *sqp, ib_send_wr_t *wr, - void *wqe) + void *wqe, unsigned *mlx_seg_len) { enum ib_wr_opcode opcode = to_wr_opcode(wr); struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev; struct mlx4_wqe_mlx_seg *mlx = wqe; struct mlx4_wqe_inline_seg *inl = (void*)((u8*)wqe + sizeof *mlx); struct mlx4_ib_ah *ah = to_mah((struct ib_ah *)wr->dgrm.ud.h_av); - __be16 pkey; + u16 pkey; int send_size; int header_size; int spc; - u32 i; + u16 i; + struct ib_ud_header *ib = NULL; + struct eth_ud_header *eth = NULL; + struct ib_unpacked_grh *grh; + struct ib_unpacked_bth *bth; + struct ib_unpacked_deth *deth; + u8 *tmp; + u8 mac[6]; send_size = 0; for (i = 0; i < wr->num_ds; ++i) send_size += wr->ds_array[i].length; - ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), &sqp->ud_header); + if (rdma_port_get_transport(sqp->qp.ibqp.device, sqp->qp.port) == RDMA_TRANSPORT_IB) { + + ib = &sqp->hdr.ib; + grh = &ib->grh; + bth = &ib->bth; + deth = &ib->deth; + ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), ib); + ib->lrh.service_level = + (u8)(be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28); + ib->lrh.destination_lid = ah->av.ib.dlid; + ib->lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f); + } else { + eth = &sqp->hdr.eth; + grh = ð->grh; + bth = ð->bth; + deth = ð->deth; + ib_rdmaoe_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), eth); + } - sqp->ud_header.lrh.service_level = - (u8)(be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28); - sqp->ud_header.lrh.destination_lid = ah->av.dlid; - sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.g_slid & 0x7f); + if (mlx4_ib_ah_grh_present(ah)) { - sqp->ud_header.grh.traffic_class = - (u8)((be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff); - sqp->ud_header.grh.flow_label = - ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff); - sqp->ud_header.grh.hop_limit = ah->av.hop_limit; - ib_get_cached_gid(ib_dev, (u8)(be32_to_cpu(ah->av.port_pd) >> 24), - ah->av.gid_index, &sqp->ud_header.grh.source_gid); - memcpy(sqp->ud_header.grh.destination_gid.raw, - ah->av.dgid, 16); + grh->traffic_class = + (u8)((be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff); + grh->flow_label = + ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); + grh->hop_limit = ah->av.ib.hop_limit; + ib_get_cached_gid(ib_dev, (u8)(be32_to_cpu(ah->av.ib.port_pd) >> 24), + ah->av.ib.gid_index, &grh->source_gid); + memcpy(grh->destination_gid.raw, + ah->av.ib.dgid, 16); } mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); - mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | - (sqp->ud_header.lrh.destination_lid == - XIB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | - (sqp->ud_header.lrh.service_level << 8)); - mlx->rlid = sqp->ud_header.lrh.destination_lid; + + if (ib) { + mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | + (ib->lrh.destination_lid == + IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | + (ib->lrh.service_level << 8)); + mlx->rlid = ib->lrh.destination_lid; + + } switch (opcode) { case IB_WR_SEND: - sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; - sqp->ud_header.immediate_present = 0; + bth->opcode = IB_OPCODE_UD_SEND_ONLY; + if (ib) + ib->immediate_present = 0; + else + eth->immediate_present = 0; break; case IB_WR_SEND_WITH_IMM: - sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; - sqp->ud_header.immediate_present = 1; - sqp->ud_header.immediate_data = wr->immediate_data; + bth->opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; + if (ib) { + ib->immediate_present = 1; + ib->immediate_data = wr->immediate_data; + } else { + eth->immediate_present = 1; + eth->immediate_data = wr->immediate_data; + } break; default: return -EINVAL; } - sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; - if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) - sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; - sqp->ud_header.bth.solicited_event = (u8)(!!(wr->send_opt & IB_SEND_OPT_SOLICITED)); + if (ib) { + ib->lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; + if (ib->lrh.destination_lid == IB_LID_PERMISSIVE) + ib->lrh.source_lid = IB_LID_PERMISSIVE; + } else { + memcpy(eth->eth.dmac_h, ah->av.eth.mac_0_1, 2); + memcpy(eth->eth.dmac_h + 2, ah->av.eth.mac_2_5, 2); + memcpy(eth->eth.dmac_l, ah->av.eth.mac_2_5 + 2, 2); + rdma_get_ll_mac((struct in6_addr *)&grh->source_gid, mac); + + tmp = mac; + memcpy(eth->eth.smac_h, tmp, 2); + memcpy(eth->eth.smac_l, tmp + 2, 4); + eth->eth.type = cpu_to_be16(MLX4_RDMAOE_ETHERTYPE); + } + + bth->solicited_event = (u8)(!!(wr->send_opt & IB_SEND_SOLICITED)); + if (!sqp->qp.ibqp.qp_num) ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey); else ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->dgrm.ud.pkey_index, &pkey); - sqp->ud_header.bth.pkey = pkey; - sqp->ud_header.bth.destination_qpn = wr->dgrm.ud.remote_qp; - sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); - sqp->ud_header.deth.qkey = wr->dgrm.ud.remote_qkey & 0x00000080 ? - cpu_to_be32(sqp->qkey) : wr->dgrm.ud.remote_qkey; - sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); - - header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf); + bth->pkey = pkey; + bth->destination_qpn = wr->dgrm.ud.remote_qp; + bth->psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); + deth->qkey = wr->dgrm.ud.remote_qkey & 0x80000000 ? + cpu_to_be32(sqp->qkey) : wr->dgrm.ud.remote_qkey; + deth->source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); + + if (ib) + header_size = ib_ud_header_pack(ib, sqp->header_buf); + else + header_size = rdmaoe_ud_header_pack(eth, sqp->header_buf); #if 0 { @@ -1271,7 +1364,10 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, ib_send_wr_t *wr, i = 2; } - return ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); + *mlx_seg_len = + ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); + return 0; + } static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq) @@ -1314,9 +1410,13 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, ib_send_wr_t *wr) static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, ib_send_wr_t *wr) { + memcpy(dseg->av, &to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->av, sizeof (struct mlx4_av)); dseg->dqpn = wr->dgrm.ud.remote_qp; dseg->qkey = wr->dgrm.ud.remote_qkey; + dseg->vlan = to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->av.eth.vlan; + memcpy(dseg->mac_0_1, to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->av.eth.mac_0_1, 6); + } static void set_mlx_icrc_seg(void *dseg) @@ -1398,7 +1498,7 @@ static int build_lso_seg(struct mlx4_lso_seg *wqe, ib_send_wr_t *wr, int mlx4_ib_post_send(struct ib_qp *ibqp, ib_send_wr_t *wr, ib_send_wr_t **bad_wr) { - enum ib_wr_opcode opcode; + enum ib_wr_opcode opcode;// = to_wr_opcode(wr); struct mlx4_ib_qp *qp = to_mqp(ibqp); struct mlx4_dev *dev = to_mdev(ibqp->device)->dev; u8 *wqe /*, *wqe_start*/; @@ -1525,16 +1625,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, ib_send_wr_t *wr, case IB_QPT_SMI: case IB_QPT_GSI: - err = build_mlx_header(to_msqp(qp), wr, ctrl); + err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen); if (err < 0) { if (bad_wr) *bad_wr = wr; goto out; } - - wqe += err; - size += err / 16; - + wqe += seglen; + size += seglen / 16; err = 0; break; diff --git a/trunk/hw/mlx4/kernel/bus/inc/cmd.h b/trunk/hw/mlx4/kernel/bus/inc/cmd.h index 94f01a4f..56e2be53 100644 --- a/trunk/hw/mlx4/kernel/bus/inc/cmd.h +++ b/trunk/hw/mlx4/kernel/bus/inc/cmd.h @@ -138,6 +138,7 @@ enum { MLX4_SET_PORT_MAC_TABLE = 0x2, MLX4_SET_PORT_VLAN_TABLE = 0x3, MLX4_SET_PORT_PRIO_MAP = 0x4, + MLX4_SET_PORT_GID_TABLE = 0x5, }; struct mlx4_dev; diff --git a/trunk/hw/mlx4/kernel/bus/inc/device.h b/trunk/hw/mlx4/kernel/bus/inc/device.h index 5f6f134e..daf2bee1 100644 --- a/trunk/hw/mlx4/kernel/bus/inc/device.h +++ b/trunk/hw/mlx4/kernel/bus/inc/device.h @@ -208,8 +208,9 @@ struct mlx4_caps { int log_num_prios; int num_fc_exch; enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1]; - enum mlx4_port_state port_state[MLX4_MAX_PORTS + 1]; - int reserved_fexch_mpts_base; + u32 port_mask; + enum mlx4_port_state port_state[MLX4_MAX_PORTS + 1]; + int reserved_fexch_mpts_base; int total_reserved_qps; }; @@ -343,6 +344,28 @@ struct mlx4_av { u8 dgid[16]; }; +struct mlx4_eth_av { + __be32 port_pd; + u8 reserved1; + u8 smac_idx; + u16 reserved2; + u8 reserved3; + u8 gid_index; + u8 stat_rate; + u8 hop_limit; + __be32 sl_tclass_flowlabel; + u8 dgid[16]; + u32 reserved4[2]; + __be16 vlan; + u8 mac_0_1[2]; + u8 mac_2_5[4]; +}; + +union mlx4_ext_av { + struct mlx4_av ib; + struct mlx4_eth_av eth; +}; + #define MLX4_DEV_SIGNATURE 0xf1b34a6e struct mlx4_dev_params { diff --git a/trunk/hw/mlx4/kernel/bus/inc/ib_pack.h b/trunk/hw/mlx4/kernel/bus/inc/ib_pack.h index ac7283d5..6c50e11a 100644 --- a/trunk/hw/mlx4/kernel/bus/inc/ib_pack.h +++ b/trunk/hw/mlx4/kernel/bus/inc/ib_pack.h @@ -39,6 +39,7 @@ enum { IB_LRH_BYTES = 8, + IB_ETH_BYTES = 14, IB_GRH_BYTES = 40, IB_BTH_BYTES = 12, IB_DETH_BYTES = 8 @@ -212,6 +213,15 @@ struct ib_unpacked_deth { __be32 source_qpn; }; +struct ib_unpacked_eth { + u8 dmac_h[4]; + u8 dmac_l[2]; + u8 smac_h[2]; + u8 smac_l[4]; + __be16 type; +}; + + struct ib_ud_header { struct ib_unpacked_lrh lrh; int grh_present; @@ -222,6 +232,19 @@ struct ib_ud_header { __be32 immediate_data; }; + + +struct eth_ud_header { + struct ib_unpacked_eth eth; + int grh_present; + struct ib_unpacked_grh grh; + struct ib_unpacked_bth bth; + struct ib_unpacked_deth deth; + int immediate_present; + __be32 immediate_data; +}; + + void ib_pack(const struct ib_field *desc, int desc_len, void *structure, @@ -236,10 +259,18 @@ void ib_ud_header_init(int payload_bytes, int grh_present, struct ib_ud_header *header); +void ib_rdmaoe_ud_header_init(int payload_bytes, + int grh_present, + struct eth_ud_header *header); + int ib_ud_header_pack(struct ib_ud_header *header, void *buf); int ib_ud_header_unpack(void *buf, struct ib_ud_header *header); +int rdmaoe_ud_header_pack(struct eth_ud_header *header, + void *buf); + + #endif /* IB_PACK_H */ diff --git a/trunk/hw/mlx4/kernel/bus/inc/ib_verbs.h b/trunk/hw/mlx4/kernel/bus/inc/ib_verbs.h index a832ae83..c8bd01b8 100644 --- a/trunk/hw/mlx4/kernel/bus/inc/ib_verbs.h +++ b/trunk/hw/mlx4/kernel/bus/inc/ib_verbs.h @@ -53,6 +53,34 @@ union ib_gid { #include "ib_verbs_ex.h" +/* + * IPv6 address structure + */ + +struct in6_addr +{ + union + { + __u8 u6_addr8[16]; + __be16 u6_addr16[8]; + __be32 u6_addr32[4]; + } in6_u; +#define s6_addr in6_u.u6_addr8 +#define s6_addr16 in6_u.u6_addr16 +#define s6_addr32 in6_u.u6_addr32 +}; + + +struct sockaddr_in6 { + unsigned short int sin6_family; /* AF_INET6 */ + __be16 sin6_port; /* Transport layer port # */ + __be32 sin6_flowinfo; /* IPv6 flow information */ + struct in6_addr sin6_addr; /* IPv6 address */ + __u32 sin6_scope_id; /* scope id (new in RFC2553) */ +}; + +#define AF_INET6 10 /* IP version 6 */ + enum rdma_node_type { /* IB values map to NodeInfo:NodeType. */ RDMA_NODE_IB_CA = 1, @@ -63,7 +91,8 @@ enum rdma_node_type { enum rdma_transport_type { RDMA_TRANSPORT_IB, - RDMA_TRANSPORT_IWARP + RDMA_TRANSPORT_IWARP, + RDMA_TRANSPORT_RDMAOE }; enum rdma_transport_type @@ -231,6 +260,7 @@ struct ib_port_attr { u8 active_width; u8 active_speed; u8 phys_state; + enum rdma_transport_type transport; }; enum ib_device_modify_flags { @@ -633,6 +663,10 @@ enum ib_wr_opcode { IB_WR_ATOMIC_CMP_AND_SWP, IB_WR_ATOMIC_FETCH_AND_ADD, IB_WR_LSO, + IB_WR_SEND_WITH_INV, + IB_WR_RDMA_READ_WITH_INV, + IB_WR_LOCAL_INV, + IB_WR_FAST_REG_MR, IB_WR_NOP }; @@ -920,6 +954,9 @@ struct ib_device { int (*query_port)(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr); + enum rdma_transport_type (*get_port_transport)(struct ib_device *device, + u8 port_num); + int (*query_gid_chunk)(struct ib_device *device, u8 port_num, int index, union ib_gid gid[8], int size); @@ -1127,6 +1164,11 @@ int ib_query_device(struct ib_device *device, int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr); +enum rdma_transport_type rdma_port_get_transport(struct ib_device *device, + u8 port_num); +int rdma_is_transport_supported(struct ib_device *device, + enum rdma_transport_type transport); + int ib_query_gid_chunk(struct ib_device *device, u8 port_num, int index, union ib_gid gid[8], int size); diff --git a/trunk/hw/mlx4/kernel/bus/inc/qp.h b/trunk/hw/mlx4/kernel/bus/inc/qp.h index 89e32951..a6ba2377 100644 --- a/trunk/hw/mlx4/kernel/bus/inc/qp.h +++ b/trunk/hw/mlx4/kernel/bus/inc/qp.h @@ -113,7 +113,9 @@ struct mlx4_qp_path { u8 snooper_flags; u8 reserved3[2]; u8 counter_index; - u8 reserved4[7]; + u8 reserved4; + u8 dmac[6]; + }; struct mlx4_qp_context { @@ -213,7 +215,9 @@ struct mlx4_wqe_datagram_seg { __be32 av[8]; __be32 dqpn; __be32 qkey; - __be32 reservd[2]; + __be16 vlan; + u8 mac_0_1[2]; + u8 mac_2_5[4]; }; #pragma warning( disable : 4200) diff --git a/trunk/hw/mlx4/kernel/bus/net/SOURCES b/trunk/hw/mlx4/kernel/bus/net/SOURCES index 37441cf0..e06a1251 100644 --- a/trunk/hw/mlx4/kernel/bus/net/SOURCES +++ b/trunk/hw/mlx4/kernel/bus/net/SOURCES @@ -31,7 +31,7 @@ SOURCES= net.rc \ srq.c \ port.c \ -INCLUDES=..;..\inc;..\..\inc;..\core\$O;..\..\..\..\..\inc;..\..\..\..\..\inc\kernel; +INCLUDES=..;..\inc;..\..\inc;..\..\..\inc;..\core\$O;..\..\..\..\..\inc;..\..\..\..\..\inc\kernel; C_DEFINES=$(C_DEFINES) -DDRIVER -DDEPRECATE_DDK_FUNCTIONS -D__LITTLE_ENDIAN -DUSE_WDM_INTERRUPTS #-DFORCE_LIVEFISH diff --git a/trunk/hw/mlx4/kernel/bus/net/main.c b/trunk/hw/mlx4/kernel/bus/net/main.c index c9fdc1d9..54fd42c6 100644 --- a/trunk/hw/mlx4/kernel/bus/net/main.c +++ b/trunk/hw/mlx4/kernel/bus/net/main.c @@ -170,6 +170,16 @@ BOOLEAN mlx4_is_enabled_port(struct mlx4_dev *dev, int port_number) return FALSE; } +static void mlx4_set_port_mask(struct mlx4_dev *dev) +{ + int i; + + dev->caps.port_mask = 0; + for (i = 1; i <= dev->caps.num_ports; ++i) + if (dev->caps.port_type[i] == MLX4_PORT_TYPE_IB) + dev->caps.port_mask |= 1 << (i - 1); +} + static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) { int err; @@ -309,6 +319,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) ++num_eth_ports; } + mlx4_set_port_mask(dev); + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps; dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] = diff --git a/trunk/hw/mlx4/kernel/bus/net/port.c b/trunk/hw/mlx4/kernel/bus/net/port.c index 0381400f..3bc3b800 100644 --- a/trunk/hw/mlx4/kernel/bus/net/port.c +++ b/trunk/hw/mlx4/kernel/bus/net/port.c @@ -33,7 +33,9 @@ #include "mlx4.h" #include "cmd.h" +#include "public.h" +extern NTSTATUS __create_child(); void mlx4_init_mac_table(struct mlx4_dev *dev, u8 port) { @@ -88,6 +90,52 @@ static int mlx4_SET_PORT_mac_table(struct mlx4_dev *dev, u8 port, return err; } +static void mlx4_addrconf_ifid_eui48_win(u8 *eui, u64 mac) +{ + u8 *p = (u8*)&mac+2; //mac 6 bytes + memcpy(eui, p, 3); + memcpy(eui + 5, p + 3, 3); + eui[3] = 0xFF; + eui[4] = 0xFE; + eui[0] ^= 2; +} + + +static int update_ipv6_gids_win(struct mlx4_dev *dev, int port, int clear, u64 mac) +{ + struct mlx4_cmd_mailbox *mailbox; + union ib_gid *gids, *tmpgids; + int err; + + tmpgids = kzalloc(128 * sizeof *gids, GFP_ATOMIC); + if (!tmpgids) + return -ENOMEM; + + if (!clear) { + mlx4_addrconf_ifid_eui48_win(&tmpgids[0].raw[8], cpu_to_be64(mac)); + tmpgids[0].global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); + } + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); + goto out; + } + + gids = mailbox->buf; + memcpy(gids, tmpgids, 128 * sizeof *gids); + + err = mlx4_cmd(dev, mailbox->dma.da, MLX4_SET_PORT_GID_TABLE << 8 | port, + 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B); + + mlx4_free_cmd_mailbox(dev, mailbox); + +out: + kfree(tmpgids); + return err; +} + + int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index) { struct mlx4_mac_table *table = @@ -136,6 +184,26 @@ int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index) *index = free; ++table->total; + + //update port guid with mac address + update_ipv6_gids_win(dev, port, 0, mac); + +#if 0 + +// TODO: Tzachid 9/12/2009 Need to think of a better way of how to create the LLE +// interface + + + if(!InterlockedExchange(&dev->pdev->ib_hca_created, 1)) + { + NTSTATUS status = STATUS_SUCCESS; + status = __create_child(dev->pdev->p_wdf_device, BUS_HARDWARE_IDS, BUS_HARDWARE_DESCRIPTION, 0 ); + if (!NT_SUCCESS(status)) { + mlx4_err(dev, "__create_child (ib)failed with 0x%x\n", status); + dev->pdev->ib_hca_created = FALSE; + } + } +#endif out: up(&table->mac_sem); return err; diff --git a/trunk/hw/mlx4/kernel/hca/av.c b/trunk/hw/mlx4/kernel/hca/av.c index ccd8393f..3184874d 100644 --- a/trunk/hw/mlx4/kernel/hca/av.c +++ b/trunk/hw/mlx4/kernel/hca/av.c @@ -74,6 +74,7 @@ mlnx_create_av ( p_ib_ah = p_ib_pd->device->create_ah(p_ib_pd, &ah_attr); if (IS_ERR(p_ib_ah)) { err = PTR_ERR(p_ib_ah); + status = errno_to_iberr(err); HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_AV ,("create_ah failed (%d)\n", err)); goto err_create_ah; } diff --git a/trunk/hw/mlx4/kernel/hca/data.c b/trunk/hw/mlx4/kernel/hca/data.c index 5962a143..52630b7b 100644 --- a/trunk/hw/mlx4/kernel/hca/data.c +++ b/trunk/hw/mlx4/kernel/hca/data.c @@ -339,6 +339,7 @@ from_hca_cap( ibal_port_p->max_vls = mthca_port_p->max_vl_num; ibal_port_p->sm_lid = cl_ntoh16(mthca_port_p->sm_lid); ibal_port_p->sm_sl = mthca_port_p->sm_sl; + ibal_port_p->transport = mthca_port_p->transport; ibal_port_p->link_state = (mthca_port_p->state != 0) ? (uint8_t)mthca_port_p->state : IB_LINK_DOWN; ibal_port_p->num_gids = (uint16_t)mthca_port_p->gid_tbl_len; ibal_port_p->num_pkeys = mthca_port_p->pkey_tbl_len; diff --git a/trunk/hw/mlx4/kernel/inc/l2w.h b/trunk/hw/mlx4/kernel/inc/l2w.h index 819cb425..a46ea43e 100644 --- a/trunk/hw/mlx4/kernel/inc/l2w.h +++ b/trunk/hw/mlx4/kernel/inc/l2w.h @@ -185,6 +185,8 @@ struct pci_dev DMA_ADAPTER * p_dma_adapter; /* HCA adapter object */ DEVICE_OBJECT * p_self_do; /* mlx4_bus's FDO */ DEVICE_OBJECT * pdo; /* mlx4_bus's PDO */ + PVOID p_wdf_device; /* wdf_device */ + LONG ib_hca_created; // mlx4_ib: various objects and info struct ib_device * ib_dev; // mlx4_net: various objects and info diff --git a/trunk/inc/iba/ib_types.h b/trunk/inc/iba/ib_types.h index c7cdf385..4f942ffd 100644 --- a/trunk/inc/iba/ib_types.h +++ b/trunk/inc/iba/ib_types.h @@ -9419,6 +9419,8 @@ typedef struct _ib_port_attr TO_LONG_PTR(ib_gid_t*, p_gid_table); TO_LONG_PTR(ib_net16_t*,p_pkey_table); + enum rdma_transport_type transport; + } ib_port_attr_t; /* * SEE ALSO diff --git a/trunk/ulp/opensm/user/include/iba/ib_types.h b/trunk/ulp/opensm/user/include/iba/ib_types.h index 1b0f1f4c..7465a92e 100644 --- a/trunk/ulp/opensm/user/include/iba/ib_types.h +++ b/trunk/ulp/opensm/user/include/iba/ib_types.h @@ -8861,7 +8861,11 @@ typedef struct _ib_port_attr { */ ib_gid_t *p_gid_table; ib_net16_t *p_pkey_table; + + enum rdma_transport_type transport; + } ib_port_attr_t; + /* * SEE ALSO * uint8_t, ib_port_cap_t, ib_link_states_t