From 7ada839ae4e288fadf40ffa49b6b6fb0e1a6bf78 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Tue, 28 Aug 2012 12:33:04 -0700 Subject: [PATCH] librdmacm: Support using GIDs with rdma_getaddrinfo Allow the user to specify a GID as the node parameter into rdma_getaddrinfo. To distinguish between the node being an IPv6 address or a GID, we add a new flag, RAI_FAMILY, which can be set as part of the hints to rdma_getaddrinfo. When set, this flag indicates that the value of ai_family in the hints should be used when interpretting the node parameter. Signed-off-by: Sean Hefty --- include/rdma/rdma_cma.h | 2 + man/rdma_getaddrinfo.3 | 3 + src/acm.c | 31 ++------ src/addrinfo.c | 165 +++++++++++++++++++++++++++------------- src/cma.c | 2 +- src/cma.h | 6 +- 6 files changed, 132 insertions(+), 77 deletions(-) diff --git a/include/rdma/rdma_cma.h b/include/rdma/rdma_cma.h index f3c89211..4c4a057e 100755 --- a/include/rdma/rdma_cma.h +++ b/include/rdma/rdma_cma.h @@ -77,6 +77,7 @@ enum rdma_port_space { #define RDMA_IB_IP_PORT_MASK 0x000000000000FFFFULL #define RDMA_IB_IP_PS_TCP 0x0000000001060000ULL #define RDMA_IB_IP_PS_UDP 0x0000000001110000ULL +#define RDMA_IB_PS_IB 0x00000000013F0000ULL /* * Global qkey value for UDP QPs and multicast groups created via the @@ -176,6 +177,7 @@ struct rdma_cm_event { #define RAI_PASSIVE 0x00000001 #define RAI_NUMERICHOST 0x00000002 #define RAI_NOROUTE 0x00000004 +#define RAI_FAMILY 0x00000008 struct rdma_addrinfo { int ai_flags; diff --git a/man/rdma_getaddrinfo.3 b/man/rdma_getaddrinfo.3 index 86e00cad..cd819c6c 100755 --- a/man/rdma_getaddrinfo.3 +++ b/man/rdma_getaddrinfo.3 @@ -48,6 +48,9 @@ If specified, then the node parameter, if provided, must be a numerical network address. This flag suppresses any lengthy address resolution. .IP "RAI_NOROUTE" 12 If set, this flag suppresses any lengthy route resolution. +.IP "RAI_FAMILY" 12 +If set, the ai_family setting should be used as an input hint for interpretting +the node parameter. .IP "ai_family" 12 Address family for the source and destination address. Supported families are: AF_INET, AF_INET6, and AF_IB. diff --git a/src/acm.c b/src/acm.c index 95eee73f..3d8c912c 100755 --- a/src/acm.c +++ b/src/acm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 Intel Corporation. All rights reserved. + * Copyright (c) 2010-2012 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -123,23 +123,6 @@ void ucma_ib_cleanup(void) } } -static void ucma_set_sid(enum rdma_port_space ps, struct sockaddr *addr, - struct sockaddr_ib *sib) -{ - uint16_t port; - - if (addr->sa_family == AF_INET) - port = ((struct sockaddr_in *) addr)->sin_port; - else - port = ((struct sockaddr_in6 *) addr)->sin6_port; - - sib->sib_sid = htonll(((uint64_t) ps << 16) + ntohs(port)); - if (port) - sib->sib_sid_mask = ~0ULL; - else - sib->sib_sid_mask = htonll(RDMA_IB_IP_PS_MASK); -} - static int ucma_ib_set_addr(struct rdma_addrinfo *ib_rai, struct rdma_addrinfo *rai) { @@ -184,6 +167,9 @@ static int ucma_ib_set_connect(struct rdma_addrinfo *ib_rai, { struct ib_connect_hdr *hdr; + if (rai->ai_family == AF_IB) + return 0; + hdr = calloc(1, sizeof *hdr); if (!hdr) return ERR(ENOMEM); @@ -360,16 +346,16 @@ void ucma_ib_resolve(struct rdma_addrinfo **rai, struct rdma_addrinfo *hints) if (ucma_inet_addr((*rai)->ai_dst_addr, (*rai)->ai_dst_len)) { data->flags = ACM_EP_FLAG_DEST; - if ((*rai)->ai_flags & (RAI_NUMERICHOST | RAI_NOROUTE)) + if (hints->ai_flags & (RAI_NUMERICHOST | RAI_NOROUTE)) data->flags |= ACM_FLAGS_NODELAY; ucma_set_ep_addr(data, (*rai)->ai_dst_addr); data++; msg.hdr.length += ACM_MSG_EP_LENGTH; } - if (hints && (hints->ai_route_len || + if (hints->ai_route_len || ucma_ib_addr((*rai)->ai_src_addr, (*rai)->ai_src_len) || - ucma_ib_addr((*rai)->ai_dst_addr, (*rai)->ai_dst_len))) { + ucma_ib_addr((*rai)->ai_dst_addr, (*rai)->ai_dst_len)) { struct ibv_path_record *path; if (hints->ai_route_len == sizeof(struct ibv_path_record)) @@ -409,8 +395,7 @@ void ucma_ib_resolve(struct rdma_addrinfo **rai, struct rdma_addrinfo *hints) ucma_ib_save_resp(*rai, &msg); - if (af_ib_support && !((*rai)->ai_flags & RAI_ROUTEONLY) && - (*rai)->ai_route_len && ((*rai)->ai_family != AF_IB)) + if (af_ib_support && !(hints->ai_flags & RAI_ROUTEONLY) && (*rai)->ai_route_len) ucma_resolve_af_ib(rai); } diff --git a/src/addrinfo.c b/src/addrinfo.c index 2da35f01..e7643f7c 100755 --- a/src/addrinfo.c +++ b/src/addrinfo.c @@ -53,12 +53,17 @@ #define RDMA_QPT_XRC_RECV 10 #endif +struct rdma_addrinfo nohints; + static void ucma_convert_to_ai(struct addrinfo *ai, struct rdma_addrinfo *rai) { memset(ai, 0, sizeof *ai); - ai->ai_flags = (rai->ai_flags & RAI_PASSIVE) ? AI_PASSIVE : 0; - ai->ai_flags |= (rai->ai_flags & RAI_NUMERICHOST) ? AI_NUMERICHOST : 0; - ai->ai_family = rai->ai_family; + if (rai->ai_flags & RAI_PASSIVE) + ai->ai_flags = AI_PASSIVE; + if (rai->ai_flags & RAI_NUMERICHOST) + ai->ai_flags |= AI_NUMERICHOST; + if (rai->ai_family != AF_IB) + ai->ai_family = rai->ai_family; switch (rai->ai_qp_type) { case IBV_QPT_RC: @@ -99,15 +104,60 @@ static void ucma_convert_to_ai(struct addrinfo *ai, struct rdma_addrinfo *rai) ai->ai_next = NULL; } +static int ucma_copy_addr(struct sockaddr **dst, socklen_t *dst_len, + struct sockaddr *src, socklen_t src_len) +{ + *dst = malloc(src_len); + if (!(*dst)) + return ERR(ENOMEM); + + memcpy(*dst, src, src_len); + *dst_len = src_len; + return 0; +} + +void ucma_set_sid(enum rdma_port_space ps, struct sockaddr *addr, + struct sockaddr_ib *sib) +{ + uint16_t port; + + port = addr ? ucma_get_port(addr) : 0; + sib->sib_sid = htonll(((uint64_t) ps << 16) + ntohs(port)); + + if (ps) + sib->sib_sid_mask = htonll(RDMA_IB_IP_PS_MASK); + if (port) + sib->sib_sid_mask |= htonll(RDMA_IB_IP_PORT_MASK); +} + +static int ucma_convert_in6(int ps, struct sockaddr_ib **dst, socklen_t *dst_len, + struct sockaddr_in6 *src, socklen_t src_len) +{ + *dst = calloc(1, sizeof(struct sockaddr_ib)); + if (!(*dst)) + return ERR(ENOMEM); +printf("converting ipv6 to ib\n"); + + (*dst)->sib_family = AF_IB; + (*dst)->sib_pkey = 0xFFFF; + (*dst)->sib_flowinfo = src->sin6_flowinfo; + ib_addr_set(&(*dst)->sib_addr, src->sin6_addr.s6_addr32[0], + src->sin6_addr.s6_addr32[1], src->sin6_addr.s6_addr32[2], + src->sin6_addr.s6_addr32[3]); + ucma_set_sid(ps, (struct sockaddr *) src, *dst); +printf("sid %llx %llx\n", (*dst)->sib_sid, (*dst)->sib_sid_mask); + (*dst)->sib_scope_id = src->sin6_scope_id; + + *dst_len = sizeof(struct sockaddr_ib); + return 0; +} + static int ucma_convert_to_rai(struct rdma_addrinfo *rai, struct rdma_addrinfo *hints, struct addrinfo *ai) { - struct sockaddr *addr; - char *canonname; - - rai->ai_family = ai->ai_family; + int ret; - if (hints && hints->ai_qp_type) { + if (hints->ai_qp_type) { rai->ai_qp_type = hints->ai_qp_type; } else { switch (ai->ai_socktype) { @@ -119,8 +169,9 @@ static int ucma_convert_to_rai(struct rdma_addrinfo *rai, break; } } +printf("qp type %d\n", rai->ai_qp_type); - if (hints && hints->ai_port_space) { + if (hints->ai_port_space) { rai->ai_port_space = hints->ai_port_space; } else { switch (ai->ai_protocol) { @@ -132,44 +183,63 @@ static int ucma_convert_to_rai(struct rdma_addrinfo *rai, break; } } - - addr = malloc(ai->ai_addrlen); - if (!addr) - return ERR(ENOMEM); - - canonname = ai->ai_canonname ? strdup(ai->ai_canonname) : NULL; - - memcpy(addr, ai->ai_addr, ai->ai_addrlen); - if (ai->ai_flags & RAI_PASSIVE) { - rai->ai_src_addr = addr; - rai->ai_src_len = ai->ai_addrlen; - rai->ai_src_canonname = canonname; +printf("port space %d\n", rai->ai_port_space); + + if (ai->ai_flags & AI_PASSIVE) { + rai->ai_flags = RAI_PASSIVE; + if (ai->ai_canonname) + rai->ai_src_canonname = strdup(ai->ai_canonname); + + if ((hints->ai_flags & RAI_FAMILY) && (hints->ai_family == AF_IB) && + (hints->ai_flags & RAI_NUMERICHOST)) { + rai->ai_family = AF_IB; + ret = ucma_convert_in6(rai->ai_port_space, + (struct sockaddr_ib **) &rai->ai_src_addr, + &rai->ai_src_len, + (struct sockaddr_in6 *) ai->ai_addr, + ai->ai_addrlen); +printf("using af ib \n"); + } else { + rai->ai_family = ai->ai_family; +printf("af %d\n", rai->ai_family); + ret = ucma_copy_addr(&rai->ai_src_addr, &rai->ai_src_len, + ai->ai_addr, ai->ai_addrlen); + } } else { - rai->ai_dst_addr = addr; - rai->ai_dst_len = ai->ai_addrlen; - rai->ai_dst_canonname = canonname; + if (ai->ai_canonname) + rai->ai_dst_canonname = strdup(ai->ai_canonname); + + if ((hints->ai_flags & RAI_FAMILY) && (hints->ai_family == AF_IB) && + (hints->ai_flags & RAI_NUMERICHOST)) { + rai->ai_family = AF_IB; + ret = ucma_convert_in6(rai->ai_port_space, + (struct sockaddr_ib **) &rai->ai_dst_addr, + &rai->ai_dst_len, + (struct sockaddr_in6 *) ai->ai_addr, + ai->ai_addrlen); + } else { + rai->ai_family = ai->ai_family; + ret = ucma_copy_addr(&rai->ai_dst_addr, &rai->ai_dst_len, + ai->ai_addr, ai->ai_addrlen); + } } - - return 0; + return ret; } -static int ucma_convert_gai(char *node, char *service, +static int ucma_getaddrinfo(char *node, char *service, struct rdma_addrinfo *hints, struct rdma_addrinfo *rai) { struct addrinfo ai_hints; - struct addrinfo *ai, *aih; + struct addrinfo *ai; int ret; - if (hints) { + if (hints != &nohints) { ucma_convert_to_ai(&ai_hints, hints); - rai->ai_flags = hints->ai_flags; - aih = &ai_hints; + ret = getaddrinfo(node, service, &ai_hints, &ai); } else { - aih = NULL; + ret = getaddrinfo(node, service, NULL, &ai); } - - ret = getaddrinfo(node, service, aih, &ai); if (ret) return ret; @@ -178,18 +248,6 @@ static int ucma_convert_gai(char *node, char *service, return ret; } -static int ucma_copy_ai_addr(struct sockaddr **dst, socklen_t *dst_len, - struct sockaddr *src, socklen_t src_len) -{ - *dst = calloc(1, src_len); - if (!(*dst)) - return ERR(ENOMEM); - - memcpy(*dst, src, src_len); - *dst_len = src_len; - return 0; -} - int rdma_getaddrinfo(char *node, char *service, struct rdma_addrinfo *hints, struct rdma_addrinfo **res) @@ -208,24 +266,27 @@ int rdma_getaddrinfo(char *node, char *service, if (!rai) return ERR(ENOMEM); + if (!hints) + hints = &nohints; + if (node || service) { - ret = ucma_convert_gai(node, service, hints, rai); + ret = ucma_getaddrinfo(node, service, hints, rai); } else { rai->ai_flags = hints->ai_flags; rai->ai_family = hints->ai_family; rai->ai_qp_type = hints->ai_qp_type; rai->ai_port_space = hints->ai_port_space; if (hints->ai_dst_len) { - ret = ucma_copy_ai_addr(&rai->ai_dst_addr, &rai->ai_dst_len, - hints->ai_dst_addr, hints->ai_dst_len); + ret = ucma_copy_addr(&rai->ai_dst_addr, &rai->ai_dst_len, + hints->ai_dst_addr, hints->ai_dst_len); } } if (ret) goto err; - if (!rai->ai_src_len && hints && hints->ai_src_len) { - ret = ucma_copy_ai_addr(&rai->ai_src_addr, &rai->ai_src_len, - hints->ai_src_addr, hints->ai_src_len); + if (!rai->ai_src_len && hints->ai_src_len) { + ret = ucma_copy_addr(&rai->ai_src_addr, &rai->ai_src_len, + hints->ai_src_addr, hints->ai_src_len); if (ret) goto err; } diff --git a/src/cma.c b/src/cma.c index f4609672..d168df67 100755 --- a/src/cma.c +++ b/src/cma.c @@ -2240,7 +2240,7 @@ int ucma_max_qpsize(struct rdma_cm_id *id) return id_priv->cma_dev->max_qpsize; } -static uint16_t ucma_get_port(struct sockaddr *addr) +uint16_t ucma_get_port(struct sockaddr *addr) { switch (addr->sa_family) { case AF_INET: diff --git a/src/cma.h b/src/cma.h index 6c3df275..6a1cd752 100644 --- a/src/cma.h +++ b/src/cma.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005-2010 Intel Corporation. All rights reserved. + * Copyright (c) 2005-2012 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -45,6 +45,7 @@ #include #include +#include #ifdef INCLUDE_VALGRIND # include @@ -137,6 +138,9 @@ typedef struct { volatile int val; } atomic_t; #define atomic_get(v) ((v)->val) #define atomic_set(v, s) ((v)->val = s) +uint16_t ucma_get_port(struct sockaddr *addr); +void ucma_set_sid(enum rdma_port_space ps, struct sockaddr *addr, + struct sockaddr_ib *sib); int ucma_max_qpsize(struct rdma_cm_id *id); int ucma_complete(struct rdma_cm_id *id); static inline int ERR(int err) -- 2.41.0