From 7141b45fa54147324b28e177a64ca5542621e754 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Wed, 18 Jun 2014 10:48:07 -0700 Subject: [PATCH] commit --- meta | 7 +- patches/cmtime-rework-program-to-be-mu | 454 ------------------------- patches/rdma_client-handle-ibv_send_in | 94 ----- patches/rdma_client-use-perror-unwind- | 109 ------ patches/rdma_server-handle-ibv_send_in | 134 -------- patches/rdma_server-use-perror-unwind- | 129 ------- 6 files changed, 1 insertion(+), 926 deletions(-) delete mode 100644 patches/cmtime-rework-program-to-be-mu delete mode 100644 patches/rdma_client-handle-ibv_send_in delete mode 100644 patches/rdma_client-use-perror-unwind- delete mode 100644 patches/rdma_server-handle-ibv_send_in delete mode 100644 patches/rdma_server-use-perror-unwind- diff --git a/meta b/meta index 0a7a0797..3d74a960 100644 --- a/meta +++ b/meta @@ -1,12 +1,7 @@ Version: 1 -Previous: 36c7978a5a5fb79ffd3de0265a943c93656f6dd5 +Previous: 251e0dbcef802ee43e9b937a6605f56ab5a7c657 Head: 6c7d6d3038524c275ecfb7468b4455fe2cc39a19 Applied: - cmtime-rework-program-to-be-mu: 05fc15b44805a23a4e8562d1953074243950dfbe - rdma_client-use-perror-unwind-: 1bc834aeca99a4dd0c5bea733e2735f148b4418c - rdma_server-use-perror-unwind-: 2c2e44e144f17c2cef4af052ec91a680c9a81fb9 - rdma_client-handle-ibv_send_in: 9fe390a793203a13b0507472848e1e7da8c75bed - rdma_server-handle-ibv_send_in: 6c7d6d3038524c275ecfb7468b4455fe2cc39a19 Unapplied: old-af-ib: aaa0d9ca917c8c361a978e5a116963c2cceac5ba old-seterr: 47eb0c419687c2690292c1910acae83a46e5388c diff --git a/patches/cmtime-rework-program-to-be-mu b/patches/cmtime-rework-program-to-be-mu deleted file mode 100644 index 597a3f5f..00000000 --- a/patches/cmtime-rework-program-to-be-mu +++ /dev/null @@ -1,454 +0,0 @@ -Bottom: dba4f99efe6d0a331ffd180015186c49ea238a31 -Top: 6125ab74e3cad30fbf9ea3400c43db59a8137731 -Author: Doug Ledford -Date: 2014-06-18 10:43:04 -0700 - -cmtime: rework program to be multithread - -When using very large numbers of connections (10,000 was in use here), -we ran into a problem where when we resolved a performance problem in -the kernel cma.c code, we suddenly developed a new problem. That new -problem turned out to be the fact that with the underlying kernel issue -resolved, 10,000 connect requests would flood the server side of the -test and the cmtime application would respond as quickly as possible. -However, the client side would not bother to check any of the returns -until after having sent all 10,000 connect requests. When the kernel -had a serializing performance problem, this was OK. When it was fixed, -this caused a general slowdown in connect operations due to overruns in -the event processing. This patch causes the client side to fire off -threads that will handle responses to connect requests as they come in -instead of allowing them to backlog uncontrollably. Times for a 10,000 -connect run changed from this: - -[root@rdma-dev-01 ~]# more -3.12.0-rc1.cached_gids+optimized_connect+trimmed_cache+.output -ib1: -step total ms max ms min us us / conn -create id : 46.64 0.10 1.00 4.66 -bind addr : 89.61 0.04 7.00 8.96 -resolve addr : 50.63 26.18 23976.00 5.06 -resolve route: 565.44 538.77 26736.00 56.54 -create qp : 4028.31 5.70 326.00 402.83 -connect : 50077.42 49990.49 90734.00 5007.74 -disconnect : 5277.25 4850.35 380017.00 527.72 -destroy : 42.15 0.04 2.00 4.21 - -ib0: -step total ms max ms min us us / conn -create id : 34.82 0.04 1.00 3.48 -bind addr : 25.94 0.02 1.00 2.59 -resolve addr : 48.18 25.01 22779.00 4.82 -resolve route: 501.28 476.26 25071.00 50.13 -create qp : 3274.12 6.05 257.00 327.41 -connect : 55549.64 55490.32 62150.00 5554.96 -disconnect : 5263.64 4851.18 375628.00 526.36 -destroy : 47.20 0.07 2.00 4.72 - -to this: - -[root@rdma-dev-01 ~]# more -3.12.0-rc1.cached_gids+optimized_connect+trimmed_cache+-fixed-cmtime.output -ib1: -step total ms max ms min us us / conn -create id : 34.45 0.08 1.00 3.44 -bind addr : 88.41 0.04 7.00 8.84 -resolve addr : 33.59 4.65 612.00 3.36 -resolve route: 618.68 0.61 97.00 61.87 -create qp : 4024.03 6.30 341.00 402.40 -connect : 6983.35 6886.33 8509.00 698.33 -disconnect : 5066.47 230.34 831.00 506.65 -destroy : 37.02 0.03 2.00 3.70 - -ib0: -step total ms max ms min us us / conn -create id : 42.61 0.14 1.00 4.26 -bind addr : 27.05 0.03 2.00 2.70 -resolve addr : 40.65 10.73 869.00 4.06 -resolve route: 626.75 0.60 103.00 62.68 -create qp : 3334.50 6.48 273.00 333.45 -connect : 6310.29 6251.59 13298.00 631.03 -disconnect : 5111.12 365.87 867.00 511.11 -destroy : 36.57 0.02 2.00 3.66 - -with this patch. - -Signed-off-by: Doug Ledford -Signed-off-by: Sean Hefty - - ---- - -diff --git a/examples/cmtime.c b/examples/cmtime.c -index 6761f2c..ebc660b 100644 ---- a/examples/cmtime.c -+++ b/examples/cmtime.c -@@ -84,10 +84,27 @@ struct node { - int retries; - }; - -+struct list_head { -+ struct list_head *prev; -+ struct list_head *next; -+ struct rdma_cm_id *id; -+}; -+ -+struct work_list { -+ pthread_mutex_t lock; -+ pthread_cond_t cond; -+ struct list_head list; -+}; -+ -+#define INIT_LIST(x) ((x)->prev = (x)->next = (x)) -+ -+static struct work_list req_work; -+static struct work_list disc_work; - static struct node *nodes; - static struct timeval times[STEP_CNT][2]; - static int connections = 100; --static int left[STEP_CNT]; -+static volatile int started[STEP_CNT]; -+static volatile int completed[STEP_CNT]; - static struct ibv_qp_init_attr init_qp_attr; - static struct rdma_conn_param conn_param; - -@@ -96,6 +113,59 @@ static struct rdma_conn_param conn_param; - #define start_time(s) gettimeofday(×[s][0], NULL) - #define end_time(s) gettimeofday(×[s][1], NULL) - -+static inline void __list_delete(struct list_head *list) -+{ -+ struct list_head *prev, *next; -+ prev = list->prev; -+ next = list->next; -+ prev->next = next; -+ next->prev = prev; -+ INIT_LIST(list); -+} -+ -+static inline int __list_empty(struct work_list *list) -+{ -+ return list->list.next == &list->list; -+} -+ -+static inline int list_empty(struct work_list *work_list) -+{ -+ pthread_mutex_lock(&work_list->lock); -+ return work_list->list.next == &work_list->list; -+ pthread_mutex_unlock(&work_list->lock); -+} -+ -+static inline struct list_head *__list_remove_head(struct work_list *work_list) -+{ -+ struct list_head *list_item; -+ -+ list_item = work_list->list.next; -+ __list_delete(list_item); -+ return list_item; -+} -+ -+static inline struct list_head *list_remove_head(struct work_list *work_list) -+{ -+ struct list_head *list_item; -+ pthread_mutex_lock(&work_list->lock); -+ list_item = __list_remove_head(work_list); -+ pthread_mutex_unlock(&work_list->lock); -+ return list_item; -+} -+ -+static inline void list_add_tail(struct work_list *work_list, struct list_head *req) -+{ -+ int empty; -+ pthread_mutex_lock(&work_list->lock); -+ empty = __list_empty(work_list); -+ req->prev = work_list->list.prev; -+ req->next = &work_list->list; -+ req->prev->next = work_list->list.prev = req; -+ pthread_mutex_unlock(&work_list->lock); -+ if (empty) -+ pthread_cond_signal(&work_list->cond); -+} -+ - static int zero_time(struct timeval *t) - { - return !(t->tv_sec || t->tv_usec); -@@ -140,28 +210,28 @@ static void show_perf(void) - static void addr_handler(struct node *n) - { - end_perf(n, STEP_RESOLVE_ADDR); -- left[STEP_RESOLVE_ADDR]--; -+ completed[STEP_RESOLVE_ADDR]++; - } - - static void route_handler(struct node *n) - { - end_perf(n, STEP_RESOLVE_ROUTE); -- left[STEP_RESOLVE_ROUTE]--; -+ completed[STEP_RESOLVE_ROUTE]++; - } - - static void conn_handler(struct node *n) - { - end_perf(n, STEP_CONNECT); -- left[STEP_CONNECT]--; -+ completed[STEP_CONNECT]++; - } - - static void disc_handler(struct node *n) - { - end_perf(n, STEP_DISCONNECT); -- left[STEP_DISCONNECT]--; -+ completed[STEP_DISCONNECT]++; - } - --static int req_handler(struct rdma_cm_id *id) -+static void __req_handler(struct rdma_cm_id *id) - { - int ret; - -@@ -176,17 +246,50 @@ static int req_handler(struct rdma_cm_id *id) - perror("failure accepting"); - goto err; - } -- return 0; -+ return; - - err: - printf("failing connection request\n"); - rdma_reject(id, NULL, 0); -- return ret; -+ rdma_destroy_id(id); -+ return; -+} -+ -+static void *req_handler_thread(void *arg) -+{ -+ struct list_head *work; -+ do { -+ pthread_mutex_lock(&req_work.lock); -+ if (__list_empty(&req_work)) -+ pthread_cond_wait(&req_work.cond, &req_work.lock); -+ work = __list_remove_head(&req_work); -+ pthread_mutex_unlock(&req_work.lock); -+ __req_handler(work->id); -+ free(work); -+ } while (1); -+ return NULL; -+} -+ -+static void *disc_handler_thread(void *arg) -+{ -+ struct list_head *work; -+ do { -+ pthread_mutex_lock(&disc_work.lock); -+ if (__list_empty(&disc_work)) -+ pthread_cond_wait(&disc_work.cond, &disc_work.lock); -+ work = __list_remove_head(&disc_work); -+ pthread_mutex_unlock(&disc_work.lock); -+ rdma_disconnect(work->id); -+ rdma_destroy_id(work->id); -+ free(work); -+ } while (1); -+ return NULL; - } - - static void cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) - { - struct node *n = id->context; -+ struct list_head *request; - - switch (event->event) { - case RDMA_CM_EVENT_ADDR_RESOLVED: -@@ -196,10 +299,15 @@ static void cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) - route_handler(n); - break; - case RDMA_CM_EVENT_CONNECT_REQUEST: -- if (req_handler(id)) { -- rdma_ack_cm_event(event); -+ request = malloc(sizeof *request); -+ if (!request) { -+ perror("out of memory accepting connect request"); -+ rdma_reject(id, NULL, 0); - rdma_destroy_id(id); -- return; -+ } else { -+ INIT_LIST(request); -+ request->id = id; -+ list_add_tail(&req_work, request); - } - break; - case RDMA_CM_EVENT_ESTABLISHED: -@@ -235,12 +343,18 @@ static void cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) - break; - case RDMA_CM_EVENT_DISCONNECTED: - if (!n) { -- rdma_disconnect(id); -- rdma_ack_cm_event(event); -- rdma_destroy_id(id); -- return; -- } -- disc_handler(n); -+ request = malloc(sizeof *request); -+ if (!request) { -+ perror("out of memory queueing disconnect request, handling synchronously"); -+ rdma_disconnect(id); -+ rdma_destroy_id(id); -+ } else { -+ INIT_LIST(request); -+ request->id = id; -+ list_add_tail(&disc_work, request); -+ } -+ } else -+ disc_handler(n); - break; - case RDMA_CM_EVENT_DEVICE_REMOVAL: - /* Cleanup will occur after test completes. */ -@@ -296,29 +410,67 @@ static void cleanup_nodes(void) - end_time(STEP_DESTROY); - } - --static int process_events(int *left) -+static void *process_events(void *arg) - { - struct rdma_cm_event *event; - int ret = 0; - -- while ((!left || *left) && !ret) { -+ while (!ret) { - ret = rdma_get_cm_event(channel, &event); - if (!ret) { - cma_handler(event->id, event); - } else { -- perror("failure in rdma_get_cm_event in connect events"); -+ perror("failure in rdma_get_cm_event in process_server_events"); - ret = errno; - } - } -- -- return ret; -+ return NULL; - } - - static int run_server(void) - { -+ pthread_t req_thread, disc_thread; - struct rdma_cm_id *listen_id; - int ret; - -+ INIT_LIST(&req_work.list); -+ INIT_LIST(&disc_work.list); -+ ret = pthread_mutex_init(&req_work.lock, NULL); -+ if (ret) { -+ perror("initializing mutex for req work"); -+ return ret; -+ } -+ -+ ret = pthread_mutex_init(&disc_work.lock, NULL); -+ if (ret) { -+ perror("initializing mutex for disc work"); -+ return ret; -+ } -+ -+ ret = pthread_cond_init(&req_work.cond, NULL); -+ if (ret) { -+ perror("initializing cond for req work"); -+ return ret; -+ } -+ -+ ret = pthread_cond_init(&disc_work.cond, NULL); -+ if (ret) { -+ perror("initializing cond for disc work"); -+ return ret; -+ } -+ -+ ret = pthread_create(&req_thread, NULL, req_handler_thread, NULL); -+ if (ret) { -+ perror("failed to create req handler thread"); -+ return ret; -+ } -+ -+ ret = pthread_create(&disc_thread, NULL, disc_handler_thread, NULL); -+ if (ret) { -+ perror("failed to create disconnect handler thread"); -+ return ret; -+ } -+ - ret = rdma_create_id(channel, &listen_id, NULL, hints.ai_port_space); - if (ret) { - perror("listen request failed"); -@@ -351,6 +503,7 @@ static int run_server(void) - - static int run_client(void) - { -+ pthread_t event_thread; - int i, ret; - - ret = get_rdma_addr(src_addr, dst_addr, port, &hints, &rai); -@@ -365,6 +518,12 @@ static int run_client(void) - conn_param.private_data = rai->ai_connect; - conn_param.private_data_len = rai->ai_connect_len; - -+ ret = pthread_create(&event_thread, NULL, process_events, NULL); -+ if (ret) { -+ perror("failure creating event thread"); -+ return ret; -+ } -+ - if (src_addr) { - printf("binding source address\n"); - start_time(STEP_BIND); -@@ -395,11 +554,9 @@ static int run_client(void) - nodes[i].error = 1; - continue; - } -- left[STEP_RESOLVE_ADDR]++; -+ started[STEP_RESOLVE_ADDR]++; - } -- ret = process_events(&left[STEP_RESOLVE_ADDR]); -- if (ret) -- return ret; -+ while (started[STEP_RESOLVE_ADDR] != completed[STEP_RESOLVE_ADDR]) sched_yield(); - end_time(STEP_RESOLVE_ADDR); - - printf("resolving route\n"); -@@ -415,11 +572,9 @@ static int run_client(void) - nodes[i].error = 1; - continue; - } -- left[STEP_RESOLVE_ROUTE]++; -+ started[STEP_RESOLVE_ROUTE]++; - } -- ret = process_events(&left[STEP_RESOLVE_ROUTE]); -- if (ret) -- return ret; -+ while (started[STEP_RESOLVE_ROUTE] != completed[STEP_RESOLVE_ROUTE]) sched_yield(); - end_time(STEP_RESOLVE_ROUTE); - - printf("creating qp\n"); -@@ -450,11 +605,9 @@ static int run_client(void) - nodes[i].error = 1; - continue; - } -- left[STEP_CONNECT]++; -+ started[STEP_CONNECT]++; - } -- ret = process_events(&left[STEP_CONNECT]); -- if (ret) -- return ret; -+ while (started[STEP_CONNECT] != completed[STEP_CONNECT]) sched_yield(); - end_time(STEP_CONNECT); - - printf("disconnecting\n"); -@@ -464,11 +617,9 @@ static int run_client(void) - continue; - start_perf(&nodes[i], STEP_DISCONNECT); - rdma_disconnect(nodes[i].id); -- left[STEP_DISCONNECT]++; -+ started[STEP_DISCONNECT]++; - } -- ret = process_events(&left[STEP_DISCONNECT]); -- if (ret) -- return ret; -+ while (started[STEP_DISCONNECT] != completed[STEP_DISCONNECT]) sched_yield(); - end_time(STEP_DISCONNECT); - - return ret; diff --git a/patches/rdma_client-handle-ibv_send_in b/patches/rdma_client-handle-ibv_send_in deleted file mode 100644 index 65d02c99..00000000 --- a/patches/rdma_client-handle-ibv_send_in +++ /dev/null @@ -1,94 +0,0 @@ -Bottom: aba93beee1c72b4d9c4dd0f26b5772e6b16f4ece -Top: 3c374149dc43dd63cbfda16b151381c36d912259 -Author: Doug Ledford -Date: 2014-06-18 10:44:49 -0700 - -rdma_client: handle IBV_SEND_INLINE correctly - -Not all RDMA devices support IBV_SEND_INLINE. At least some of those -that don't will ignore the flag passed to rdma_post_send and attempt to -send the command by using an sge entry instead. Because we don't -register the send memory, this fails. The proper way to deal with the -fact that IBV_SEND_INLINE is not guaranteed is to check the returned -value in our cap struct to see if we have support for inline data, and -if not, fall back to non-inline sends and to register the send memory -region. - -Signed-off-by: Doug Ledford -Signed-off-by: Sean Hefty - - ---- - -diff --git a/examples/rdma_client.c b/examples/rdma_client.c -index e0e176b..f676b70 100644 ---- a/examples/rdma_client.c -+++ b/examples/rdma_client.c -@@ -39,7 +39,8 @@ static char *server = "127.0.0.1"; - static char *port = "7471"; - - struct rdma_cm_id *id; --struct ibv_mr *mr; -+struct ibv_mr *mr, *send_mr; -+int send_flags; - uint8_t send_msg[16]; - uint8_t recv_msg[16]; - -@@ -65,6 +66,13 @@ static int run(void) - attr.qp_context = id; - attr.sq_sig_all = 1; - ret = rdma_create_ep(&id, res, NULL, &attr); -+ // Check to see if we got inline data allowed or not -+ if (attr.cap.max_inline_data >= 16) -+ send_flags = IBV_SEND_INLINE; -+ else -+ printf("rdma_client: device doesn't support IBV_SEND_INLINE, " -+ "using sge sends\n"); -+ - if (ret) { - perror("rdma_create_ep"); - goto out_free_addrinfo; -@@ -76,20 +84,28 @@ static int run(void) - ret = -1; - goto out_destroy_ep; - } -+ if ((send_flags & IBV_SEND_INLINE) == 0) { -+ send_mr = rdma_reg_msgs(id, send_msg, 16); -+ if (!send_mr) { -+ perror("rdma_reg_msgs for send_msg"); -+ ret = -1; -+ goto out_dereg_recv; -+ } -+ } - - ret = rdma_post_recv(id, NULL, recv_msg, 16, mr); - if (ret) { - perror("rdma_post_recv"); -- goto out_dereg; -+ goto out_dereg_send; - } - - ret = rdma_connect(id, NULL); - if (ret) { - perror("rdma_connect"); -- goto out_dereg; -+ goto out_dereg_send; - } - -- ret = rdma_post_send(id, NULL, send_msg, 16, NULL, IBV_SEND_INLINE); -+ ret = rdma_post_send(id, NULL, send_msg, 16, send_mr, send_flags); - if (ret) { - perror("rdma_post_send"); - goto out_disconnect; -@@ -109,7 +125,10 @@ static int run(void) - - out_disconnect: - rdma_disconnect(id); --out_dereg: -+out_dereg_send: -+ if ((send_flags & IBV_SEND_INLINE) == 0) -+ rdma_dereg_mr(send_mr); -+out_dereg_recv: - rdma_dereg_mr(mr); - out_destroy_ep: - rdma_destroy_ep(id); diff --git a/patches/rdma_client-use-perror-unwind- b/patches/rdma_client-use-perror-unwind- deleted file mode 100644 index 3db1d788..00000000 --- a/patches/rdma_client-use-perror-unwind- +++ /dev/null @@ -1,109 +0,0 @@ -Bottom: 6125ab74e3cad30fbf9ea3400c43db59a8137731 -Top: 0a87dd48b46cfe180a2b5d331c443345a20ff873 -Author: Doug Ledford -Date: 2014-06-18 10:44:13 -0700 - -rdma_client: use perror, unwind allocs on failure - -Our main test function prints out errno directly, which is hard to read -as it's not decoded at all. Instead, use perror() to make failures more -readable. Also redo the failure flow so that we can do a simple unwind -at the end of the function and just jump to the right unwind spot on -error. - -Signed-off-by: Doug Ledford -Signed-off-by: Sean Hefty - - ---- - -diff --git a/examples/rdma_client.c b/examples/rdma_client.c -index 7a59d97..e0e176b 100644 ---- a/examples/rdma_client.c -+++ b/examples/rdma_client.c -@@ -54,8 +54,8 @@ static int run(void) - hints.ai_port_space = RDMA_PS_TCP; - ret = rdma_getaddrinfo(server, port, &hints, &res); - if (ret) { -- printf("rdma_getaddrinfo %d\n", errno); -- return ret; -+ perror("rdma_getaddrinfo"); -+ goto out; - } - - memset(&attr, 0, sizeof attr); -@@ -65,46 +65,58 @@ static int run(void) - attr.qp_context = id; - attr.sq_sig_all = 1; - ret = rdma_create_ep(&id, res, NULL, &attr); -- rdma_freeaddrinfo(res); - if (ret) { -- printf("rdma_create_ep %d\n", errno); -- return ret; -+ perror("rdma_create_ep"); -+ goto out_free_addrinfo; - } - - mr = rdma_reg_msgs(id, recv_msg, 16); - if (!mr) { -- printf("rdma_reg_msgs %d\n", errno); -- return ret; -+ perror("rdma_reg_msgs for recv_msg"); -+ ret = -1; -+ goto out_destroy_ep; - } - - ret = rdma_post_recv(id, NULL, recv_msg, 16, mr); - if (ret) { -- printf("rdma_post_recv %d\n", errno); -- return ret; -+ perror("rdma_post_recv"); -+ goto out_dereg; - } - - ret = rdma_connect(id, NULL); - if (ret) { -- printf("rdma_connect %d\n", errno); -- return ret; -+ perror("rdma_connect"); -+ goto out_dereg; - } - - ret = rdma_post_send(id, NULL, send_msg, 16, NULL, IBV_SEND_INLINE); - if (ret) { -- printf("rdma_post_send %d\n", errno); -- return ret; -+ perror("rdma_post_send"); -+ goto out_disconnect; - } - -- ret = rdma_get_recv_comp(id, &wc); -- if (ret <= 0) { -- printf("rdma_get_recv_comp %d\n", ret); -- return ret; -+ while ((ret = rdma_get_send_comp(id, &wc)) == 0); -+ if (ret < 0) { -+ perror("rdma_get_send_comp"); -+ goto out_disconnect; - } - -+ while ((ret = rdma_get_recv_comp(id, &wc)) == 0); -+ if (ret < 0) -+ perror("rdma_get_recv_comp"); -+ else -+ ret = 0; -+ -+out_disconnect: - rdma_disconnect(id); -+out_dereg: - rdma_dereg_mr(mr); -+out_destroy_ep: - rdma_destroy_ep(id); -- return 0; -+out_free_addrinfo: -+ rdma_freeaddrinfo(res); -+out: -+ return ret; - } - - int main(int argc, char **argv) diff --git a/patches/rdma_server-handle-ibv_send_in b/patches/rdma_server-handle-ibv_send_in deleted file mode 100644 index 586e0842..00000000 --- a/patches/rdma_server-handle-ibv_send_in +++ /dev/null @@ -1,134 +0,0 @@ -Bottom: 3c374149dc43dd63cbfda16b151381c36d912259 -Top: 9ae3b5be84001aad21beaa018d680978a03434ee -Author: Doug Ledford -Date: 2014-06-18 10:45:23 -0700 - -rdma_server: handle IBV_SEND_INLINE correctly - -Not all RDMA devices support IBV_SEND_INLINE. At least some of those -that don't will ignore the flag passed to rdma_post_send and attempt to -send the command by using an sge entry instead. Because we don't -register the send memory, this fails. The proper way to deal with the -fact that IBV_SEND_INLINE is not guaranteed is to check the returned -value in our cap struct to see if we have support for inline data, and -if not, fall back to non-inline sends and to register the send memory -region. - -Signed-off-by: Doug Ledford -Signed-off-by: Sean Hefty - - ---- - -diff --git a/examples/rdma_server.c b/examples/rdma_server.c -index 54922fc..129cf42 100644 ---- a/examples/rdma_server.c -+++ b/examples/rdma_server.c -@@ -39,14 +39,16 @@ - static char *port = "7471"; - - struct rdma_cm_id *listen_id, *id; --struct ibv_mr *mr; -+struct ibv_mr *mr, *send_mr; -+int send_flags; - uint8_t send_msg[16]; - uint8_t recv_msg[16]; - - static int run(void) - { - struct rdma_addrinfo hints, *res; -- struct ibv_qp_init_attr attr; -+ struct ibv_qp_init_attr init_attr; -+ struct ibv_qp_attr qp_attr; - struct ibv_wc wc; - int ret; - -@@ -59,12 +61,12 @@ static int run(void) - return ret; - } - -- memset(&attr, 0, sizeof attr); -- attr.cap.max_send_wr = attr.cap.max_recv_wr = 1; -- attr.cap.max_send_sge = attr.cap.max_recv_sge = 1; -- attr.cap.max_inline_data = 16; -- attr.sq_sig_all = 1; -- ret = rdma_create_ep(&listen_id, res, NULL, &attr); -+ memset(&init_attr, 0, sizeof init_attr); -+ init_attr.cap.max_send_wr = init_attr.cap.max_recv_wr = 1; -+ init_attr.cap.max_send_sge = init_attr.cap.max_recv_sge = 1; -+ init_attr.cap.max_inline_data = 16; -+ init_attr.sq_sig_all = 1; -+ ret = rdma_create_ep(&listen_id, res, NULL, &init_attr); - if (ret) { - perror("rdma_create_ep"); - goto out_free_addrinfo; -@@ -82,23 +84,45 @@ static int run(void) - goto out_destroy_listen_ep; - } - -+ memset(&qp_attr, 0, sizeof qp_attr); -+ memset(&init_attr, 0, sizeof init_attr); -+ ret = ibv_query_qp(id->qp, &qp_attr, IBV_QP_CAP, -+ &init_attr); -+ if (ret) { -+ perror("ibv_query_qp"); -+ goto out_destroy_accept_ep; -+ } -+ if (init_attr.cap.max_inline_data >= 16) -+ send_flags = IBV_SEND_INLINE; -+ else -+ printf("rdma_server: device doesn't support IBV_SEND_INLINE, " -+ "using sge sends\n"); -+ - mr = rdma_reg_msgs(id, recv_msg, 16); - if (!mr) { - ret = -1; -- perror("rdma_reg_msgs"); -+ perror("rdma_reg_msgs for recv_msg"); - goto out_destroy_accept_ep; - } -+ if ((send_flags & IBV_SEND_INLINE) == 0) { -+ send_mr = rdma_reg_msgs(id, send_msg, 16); -+ if (!send_mr) { -+ ret = -1; -+ perror("rdma_reg_msgs for send_msg"); -+ goto out_dereg_recv; -+ } -+ } - - ret = rdma_post_recv(id, NULL, recv_msg, 16, mr); - if (ret) { - perror("rdma_post_recv"); -- goto out_dereg; -+ goto out_dereg_send; - } - - ret = rdma_accept(id, NULL); - if (ret) { - perror("rdma_accept"); -- goto out_dereg; -+ goto out_dereg_send; - } - - while ((ret = rdma_get_recv_comp(id, &wc)) == 0); -@@ -107,7 +131,7 @@ static int run(void) - goto out_disconnect; - } - -- ret = rdma_post_send(id, NULL, send_msg, 16, NULL, IBV_SEND_INLINE); -+ ret = rdma_post_send(id, NULL, send_msg, 16, send_mr, send_flags); - if (ret) { - perror("rdma_post_send"); - goto out_disconnect; -@@ -121,7 +145,10 @@ static int run(void) - - out_disconnect: - rdma_disconnect(id); --out_dereg: -+out_dereg_send: -+ if ((send_flags & IBV_SEND_INLINE) == 0) -+ rdma_dereg_mr(send_mr); -+out_dereg_recv: - rdma_dereg_mr(mr); - out_destroy_accept_ep: - rdma_destroy_ep(id); diff --git a/patches/rdma_server-use-perror-unwind- b/patches/rdma_server-use-perror-unwind- deleted file mode 100644 index 5f65e172..00000000 --- a/patches/rdma_server-use-perror-unwind- +++ /dev/null @@ -1,129 +0,0 @@ -Bottom: 0a87dd48b46cfe180a2b5d331c443345a20ff873 -Top: aba93beee1c72b4d9c4dd0f26b5772e6b16f4ece -Author: Doug Ledford -Date: 2014-06-18 10:44:28 -0700 - -rdma_server: use perror, unwind allocs on failure - -Our main test function prints out errno directly, which is hard to read -as it's not decoded at all. Instead, use perror() to make failures more -readable. Also redo the failure flow so that we can do a simple unwind -at the end of the function and just jump to the right unwind spot on -error. - -Signed-off-by: Doug Ledford -Signed-off-by: Sean Hefty - - ---- - -diff --git a/examples/rdma_server.c b/examples/rdma_server.c -index 5b9e16d..54922fc 100644 ---- a/examples/rdma_server.c -+++ b/examples/rdma_server.c -@@ -55,7 +55,7 @@ static int run(void) - hints.ai_port_space = RDMA_PS_TCP; - ret = rdma_getaddrinfo(NULL, port, &hints, &res); - if (ret) { -- printf("rdma_getaddrinfo %d\n", errno); -+ perror("rdma_getaddrinfo"); - return ret; - } - -@@ -65,65 +65,71 @@ static int run(void) - attr.cap.max_inline_data = 16; - attr.sq_sig_all = 1; - ret = rdma_create_ep(&listen_id, res, NULL, &attr); -- rdma_freeaddrinfo(res); - if (ret) { -- printf("rdma_create_ep %d\n", errno); -- return ret; -+ perror("rdma_create_ep"); -+ goto out_free_addrinfo; - } - - ret = rdma_listen(listen_id, 0); - if (ret) { -- printf("rdma_listen %d\n", errno); -- return ret; -+ perror("rdma_listen"); -+ goto out_destroy_listen_ep; - } - - ret = rdma_get_request(listen_id, &id); - if (ret) { -- printf("rdma_get_request %d\n", errno); -- return ret; -+ perror("rdma_get_request"); -+ goto out_destroy_listen_ep; - } - - mr = rdma_reg_msgs(id, recv_msg, 16); - if (!mr) { -- printf("rdma_reg_msgs %d\n", errno); -- return ret; -+ ret = -1; -+ perror("rdma_reg_msgs"); -+ goto out_destroy_accept_ep; - } - - ret = rdma_post_recv(id, NULL, recv_msg, 16, mr); - if (ret) { -- printf("rdma_post_recv %d\n", errno); -- return ret; -+ perror("rdma_post_recv"); -+ goto out_dereg; - } - - ret = rdma_accept(id, NULL); - if (ret) { -- printf("rdma_accept %d\n", errno); -- return ret; -+ perror("rdma_accept"); -+ goto out_dereg; - } - -- ret = rdma_get_recv_comp(id, &wc); -- if (ret <= 0) { -- printf("rdma_get_recv_comp %d\n", ret); -- return ret; -+ while ((ret = rdma_get_recv_comp(id, &wc)) == 0); -+ if (ret < 0) { -+ perror("rdma_get_recv_comp"); -+ goto out_disconnect; - } - - ret = rdma_post_send(id, NULL, send_msg, 16, NULL, IBV_SEND_INLINE); - if (ret) { -- printf("rdma_post_send %d\n", errno); -- return ret; -+ perror("rdma_post_send"); -+ goto out_disconnect; - } - -- ret = rdma_get_send_comp(id, &wc); -- if (ret <= 0) { -- printf("rdma_get_send_comp %d\n", ret); -- return ret; -- } -+ while ((ret = rdma_get_send_comp(id, &wc)) == 0); -+ if (ret < 0) -+ perror("rdma_get_send_comp"); -+ else -+ ret = 0; - -+out_disconnect: - rdma_disconnect(id); -+out_dereg: - rdma_dereg_mr(mr); -+out_destroy_accept_ep: - rdma_destroy_ep(id); -+out_destroy_listen_ep: - rdma_destroy_ep(listen_id); -- return 0; -+out_free_addrinfo: -+ rdma_freeaddrinfo(res); -+ return ret; - } - - int main(int argc, char **argv) -- 2.46.0