From fb254b21a61393eaa1e4698f4520b782024f0cd7 Mon Sep 17 00:00:00 2001 From: Stan Smith Date: Fri, 22 Oct 2010 19:22:03 +0000 Subject: [PATCH] [DAPL2] spawn test corrections. git-svn-id: svn://openib.tc.cornell.edu/gen1@2974 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86 --- branches/WOF2-3/ulp/dapl2/Makefile.am | 1162 ++--- branches/WOF2-3/ulp/dapl2/configure.in | 14 + .../WOF2-3/ulp/dapl2/dapl/common/dapl_debug.c | 495 +- .../ulp/dapl2/dapl/common/dapl_ep_free.c | 445 +- .../ulp/dapl2/dapl/common/dapl_ep_util.c | 1270 +++-- .../ulp/dapl2/dapl/common/dapl_evd_dequeue.c | 269 +- .../WOF2-3/ulp/dapl2/dapl/ibal/dapl_ibal_qp.c | 1418 +++--- .../WOF2-3/ulp/dapl2/dapl/openib_cma/device.c | 1 - .../dapl2/dapl/openib_common/dapl_ib_common.h | 807 +-- .../WOF2-3/ulp/dapl2/dapl/openib_common/qp.c | 1278 ++--- .../WOF2-3/ulp/dapl2/dapl/openib_scm/SOURCES | 3 +- .../WOF2-3/ulp/dapl2/dapl/openib_scm/cm.c | 10 +- .../WOF2-3/ulp/dapl2/dapl/openib_ucm/SOURCES | 2 +- .../WOF2-3/ulp/dapl2/dapl/openib_ucm/cm.c | 36 +- .../ulp/dapl2/dapl/openib_ucm/dapl_ib_util.h | 274 +- .../WOF2-3/ulp/dapl2/dapl/openib_ucm/device.c | 1343 ++--- trunk/ulp/dapl2/Makefile.am | 1162 ++--- trunk/ulp/dapl2/configure.in | 222 +- trunk/ulp/dapl2/dapl/common/dapl_debug.c | 495 +- trunk/ulp/dapl2/dapl/common/dapl_ep_free.c | 445 +- trunk/ulp/dapl2/dapl/common/dapl_ep_util.c | 1270 +++-- .../ulp/dapl2/dapl/common/dapl_evd_dequeue.c | 269 +- trunk/ulp/dapl2/dapl/ibal/dapl_ibal_qp.c | 1418 +++--- trunk/ulp/dapl2/dapl/openib_cma/device.c | 1 - .../dapl2/dapl/openib_common/dapl_ib_common.h | 807 +-- trunk/ulp/dapl2/dapl/openib_common/qp.c | 1278 ++--- trunk/ulp/dapl2/dapl/openib_scm/SOURCES | 3 +- trunk/ulp/dapl2/dapl/openib_scm/cm.c | 3858 +++++++-------- trunk/ulp/dapl2/dapl/openib_ucm/SOURCES | 2 +- trunk/ulp/dapl2/dapl/openib_ucm/cm.c | 4342 +++++++++-------- .../ulp/dapl2/dapl/openib_ucm/dapl_ib_util.h | 274 +- trunk/ulp/dapl2/dapl/openib_ucm/device.c | 1343 ++--- 32 files changed, 13111 insertions(+), 12905 deletions(-) diff --git a/branches/WOF2-3/ulp/dapl2/Makefile.am b/branches/WOF2-3/ulp/dapl2/Makefile.am index 0be62980..553fea0f 100644 --- a/branches/WOF2-3/ulp/dapl2/Makefile.am +++ b/branches/WOF2-3/ulp/dapl2/Makefile.am @@ -1,581 +1,581 @@ -# $Id: $ - -OSFLAGS = -DOS_RELEASE=$(shell expr `uname -r | cut -f1 -d.` \* 65536 + `uname -r | cut -f2 -d.`) -# Check for RedHat, needed for ia64 udapl atomic operations (IA64_FETCHADD syntax) -# and built-in atomics for RedHat EL5 -if OS_RHEL4 -OSFLAGS += -DREDHAT_EL4 -endif - -if OS_RHEL5 -OSFLAGS += -DREDHAT_EL5 -endif - -if OS_SUSE11 -OSFLAGS += -DSUSE_11 -endif - -if EXT_TYPE_IB -XFLAGS = -DDAT_EXTENSIONS -XPROGRAMS = dapl/openib_common/ib_extensions.c -else -XFLAGS = -XPROGRAMS = -endif - -if DEFINE_ATTR_LINK_LAYER -XFLAGS += -DDEFINE_ATTR_LINK_LAYER -endif - -if DEBUG -AM_CFLAGS = -g -Wall -D_GNU_SOURCE -DDAPL_DBG -DDAT_CONF="\"$(sysconfdir)/dat.conf\"" -else -AM_CFLAGS = -g -Wall -D_GNU_SOURCE -DDAT_CONF="\"$(sysconfdir)/dat.conf\"" -endif - -datlibdir = $(libdir) -dapllibofadir = $(libdir) -daplliboscmdir = $(libdir) -daplliboucmdir = $(libdir) - -datlib_LTLIBRARIES = dat/udat/libdat2.la -dapllibofa_LTLIBRARIES = dapl/udapl/libdaplofa.la -daplliboscm_LTLIBRARIES = dapl/udapl/libdaploscm.la -daplliboucm_LTLIBRARIES = dapl/udapl/libdaploucm.la - -dat_udat_libdat2_la_CFLAGS = $(AM_CFLAGS) -D_GNU_SOURCE $(OSFLAGS) $(XFLAGS) \ - -I$(srcdir)/dat/include/ -I$(srcdir)/dat/udat/ \ - -I$(srcdir)/dat/udat/linux -I$(srcdir)/dat/common/ - -dapl_udapl_libdaplofa_la_CFLAGS = $(AM_CFLAGS) -D_GNU_SOURCE $(OSFLAGS) $(XFLAGS) \ - -DOPENIB -DCQ_WAIT_OBJECT \ - -I$(srcdir)/dat/include/ -I$(srcdir)/dapl/include/ \ - -I$(srcdir)/dapl/common -I$(srcdir)/dapl/udapl/linux \ - -I$(srcdir)/dapl/openib_common \ - -I$(srcdir)/dapl/openib_cma \ - -I$(srcdir)/dapl/openib_cma/linux - -dapl_udapl_libdaploscm_la_CFLAGS = $(AM_CFLAGS) -D_GNU_SOURCE $(OSFLAGS) $(XFLAGS) \ - -DOPENIB -DCQ_WAIT_OBJECT \ - -I$(srcdir)/dat/include/ -I$(srcdir)/dapl/include/ \ - -I$(srcdir)/dapl/common -I$(srcdir)/dapl/udapl/linux \ - -I$(srcdir)/dapl/openib_common \ - -I$(srcdir)/dapl/openib_scm \ - -I$(srcdir)/dapl/openib_scm/linux - -dapl_udapl_libdaploucm_la_CFLAGS = $(AM_CFLAGS) -D_GNU_SOURCE $(OSFLAGS) $(XFLAGS) \ - -DOPENIB -DCQ_WAIT_OBJECT \ - -I$(srcdir)/dat/include/ -I$(srcdir)/dapl/include/ \ - -I$(srcdir)/dapl/common -I$(srcdir)/dapl/udapl/linux \ - -I$(srcdir)/dapl/openib_common \ - -I$(srcdir)/dapl/openib_ucm \ - -I$(srcdir)/dapl/openib_ucm/linux - -if HAVE_LD_VERSION_SCRIPT - dat_version_script = -Wl,--version-script=$(srcdir)/dat/udat/libdat2.map - daplofa_version_script = -Wl,--version-script=$(srcdir)/dapl/udapl/libdaplofa.map - daploscm_version_script = -Wl,--version-script=$(srcdir)/dapl/udapl/libdaploscm.map - daploucm_version_script = -Wl,--version-script=$(srcdir)/dapl/udapl/libdaploucm.map -else - dat_version_script = - daplofa_version_script = - daploscm_version_script = - daploucm_version_script = -endif - -# -# uDAT: libdat2.so -# -dat_udat_libdat2_la_SOURCES = dat/udat/udat.c \ - dat/udat/udat_api.c \ - dat/udat/udat_sr_parser.c \ - dat/udat/linux/dat_osd.c \ - dat/common/dat_api.c \ - dat/common/dat_dictionary.c \ - dat/common/dat_strerror.c \ - dat/common/dat_init.c \ - dat/common/dat_dr.c \ - dat/common/dat_sr.c -dat_udat_libdat2_la_LDFLAGS = -version-info 2:0:0 $(dat_version_script) -ldl - -# -# uDAPL OpenFabrics rdma_cm version: libdaplofa.so -# -dapl_udapl_libdaplofa_la_SOURCES = dapl/udapl/dapl_init.c \ - dapl/udapl/dapl_evd_create.c \ - dapl/udapl/dapl_evd_query.c \ - dapl/udapl/dapl_cno_create.c \ - dapl/udapl/dapl_cno_modify_agent.c \ - dapl/udapl/dapl_cno_free.c \ - dapl/udapl/dapl_cno_wait.c \ - dapl/udapl/dapl_cno_query.c \ - dapl/udapl/dapl_lmr_create.c \ - dapl/udapl/dapl_evd_wait.c \ - dapl/udapl/dapl_evd_disable.c \ - dapl/udapl/dapl_evd_enable.c \ - dapl/udapl/dapl_evd_modify_cno.c \ - dapl/udapl/dapl_evd_set_unwaitable.c \ - dapl/udapl/dapl_evd_clear_unwaitable.c \ - dapl/udapl/linux/dapl_osd.c \ - dapl/common/dapl_cookie.c \ - dapl/common/dapl_cr_accept.c \ - dapl/common/dapl_cr_query.c \ - dapl/common/dapl_cr_reject.c \ - dapl/common/dapl_cr_util.c \ - dapl/common/dapl_cr_callback.c \ - dapl/common/dapl_cr_handoff.c \ - dapl/common/dapl_ep_connect.c \ - dapl/common/dapl_ep_create.c \ - dapl/common/dapl_ep_disconnect.c \ - dapl/common/dapl_ep_dup_connect.c \ - dapl/common/dapl_ep_free.c \ - dapl/common/dapl_ep_reset.c \ - dapl/common/dapl_ep_get_status.c \ - dapl/common/dapl_ep_modify.c \ - dapl/common/dapl_ep_post_rdma_read.c \ - dapl/common/dapl_ep_post_rdma_write.c \ - dapl/common/dapl_ep_post_recv.c \ - dapl/common/dapl_ep_post_send.c \ - dapl/common/dapl_ep_query.c \ - dapl/common/dapl_ep_util.c \ - dapl/common/dapl_evd_dequeue.c \ - dapl/common/dapl_evd_free.c \ - dapl/common/dapl_evd_post_se.c \ - dapl/common/dapl_evd_resize.c \ - dapl/common/dapl_evd_util.c \ - dapl/common/dapl_evd_cq_async_error_callb.c \ - dapl/common/dapl_evd_qp_async_error_callb.c \ - dapl/common/dapl_evd_un_async_error_callb.c \ - dapl/common/dapl_evd_connection_callb.c \ - dapl/common/dapl_evd_dto_callb.c \ - dapl/common/dapl_get_consumer_context.c \ - dapl/common/dapl_get_handle_type.c \ - dapl/common/dapl_hash.c \ - dapl/common/dapl_hca_util.c \ - dapl/common/dapl_ia_close.c \ - dapl/common/dapl_ia_open.c \ - dapl/common/dapl_ia_query.c \ - dapl/common/dapl_ia_util.c \ - dapl/common/dapl_llist.c \ - dapl/common/dapl_lmr_free.c \ - dapl/common/dapl_lmr_query.c \ - dapl/common/dapl_lmr_util.c \ - dapl/common/dapl_lmr_sync_rdma_read.c \ - dapl/common/dapl_lmr_sync_rdma_write.c \ - dapl/common/dapl_mr_util.c \ - dapl/common/dapl_provider.c \ - dapl/common/dapl_sp_util.c \ - dapl/common/dapl_psp_create.c \ - dapl/common/dapl_psp_create_any.c \ - dapl/common/dapl_psp_free.c \ - dapl/common/dapl_psp_query.c \ - dapl/common/dapl_pz_create.c \ - dapl/common/dapl_pz_free.c \ - dapl/common/dapl_pz_query.c \ - dapl/common/dapl_pz_util.c \ - dapl/common/dapl_rmr_create.c \ - dapl/common/dapl_rmr_free.c \ - dapl/common/dapl_rmr_bind.c \ - dapl/common/dapl_rmr_query.c \ - dapl/common/dapl_rmr_util.c \ - dapl/common/dapl_rsp_create.c \ - dapl/common/dapl_rsp_free.c \ - dapl/common/dapl_rsp_query.c \ - dapl/common/dapl_cno_util.c \ - dapl/common/dapl_set_consumer_context.c \ - dapl/common/dapl_ring_buffer_util.c \ - dapl/common/dapl_name_service.c \ - dapl/common/dapl_timer_util.c \ - dapl/common/dapl_ep_create_with_srq.c \ - dapl/common/dapl_ep_recv_query.c \ - dapl/common/dapl_ep_set_watermark.c \ - dapl/common/dapl_srq_create.c \ - dapl/common/dapl_srq_free.c \ - dapl/common/dapl_srq_query.c \ - dapl/common/dapl_srq_resize.c \ - dapl/common/dapl_srq_post_recv.c \ - dapl/common/dapl_srq_set_lw.c \ - dapl/common/dapl_srq_util.c \ - dapl/common/dapl_debug.c \ - dapl/common/dapl_ia_ha.c \ - dapl/common/dapl_csp.c \ - dapl/common/dapl_ep_post_send_invalidate.c \ - dapl/common/dapl_ep_post_rdma_read_to_rmr.c \ - dapl/openib_common/mem.c \ - dapl/openib_common/cq.c \ - dapl/openib_common/qp.c \ - dapl/openib_common/util.c \ - dapl/openib_cma/cm.c \ - dapl/openib_cma/device.c $(XPROGRAMS) - -dapl_udapl_libdaplofa_la_LDFLAGS = -version-info 2:0:0 $(daplofa_version_script) \ - -Wl,-init,dapl_init -Wl,-fini,dapl_fini \ - -lpthread -libverbs -lrdmacm - -# -# uDAPL OpenFabrics Socket CM version for IB: libdaplscm.so -# -dapl_udapl_libdaploscm_la_SOURCES = dapl/udapl/dapl_init.c \ - dapl/udapl/dapl_evd_create.c \ - dapl/udapl/dapl_evd_query.c \ - dapl/udapl/dapl_cno_create.c \ - dapl/udapl/dapl_cno_modify_agent.c \ - dapl/udapl/dapl_cno_free.c \ - dapl/udapl/dapl_cno_wait.c \ - dapl/udapl/dapl_cno_query.c \ - dapl/udapl/dapl_lmr_create.c \ - dapl/udapl/dapl_evd_wait.c \ - dapl/udapl/dapl_evd_disable.c \ - dapl/udapl/dapl_evd_enable.c \ - dapl/udapl/dapl_evd_modify_cno.c \ - dapl/udapl/dapl_evd_set_unwaitable.c \ - dapl/udapl/dapl_evd_clear_unwaitable.c \ - dapl/udapl/linux/dapl_osd.c \ - dapl/common/dapl_cookie.c \ - dapl/common/dapl_cr_accept.c \ - dapl/common/dapl_cr_query.c \ - dapl/common/dapl_cr_reject.c \ - dapl/common/dapl_cr_util.c \ - dapl/common/dapl_cr_callback.c \ - dapl/common/dapl_cr_handoff.c \ - dapl/common/dapl_ep_connect.c \ - dapl/common/dapl_ep_create.c \ - dapl/common/dapl_ep_disconnect.c \ - dapl/common/dapl_ep_dup_connect.c \ - dapl/common/dapl_ep_free.c \ - dapl/common/dapl_ep_reset.c \ - dapl/common/dapl_ep_get_status.c \ - dapl/common/dapl_ep_modify.c \ - dapl/common/dapl_ep_post_rdma_read.c \ - dapl/common/dapl_ep_post_rdma_write.c \ - dapl/common/dapl_ep_post_recv.c \ - dapl/common/dapl_ep_post_send.c \ - dapl/common/dapl_ep_query.c \ - dapl/common/dapl_ep_util.c \ - dapl/common/dapl_evd_dequeue.c \ - dapl/common/dapl_evd_free.c \ - dapl/common/dapl_evd_post_se.c \ - dapl/common/dapl_evd_resize.c \ - dapl/common/dapl_evd_util.c \ - dapl/common/dapl_evd_cq_async_error_callb.c \ - dapl/common/dapl_evd_qp_async_error_callb.c \ - dapl/common/dapl_evd_un_async_error_callb.c \ - dapl/common/dapl_evd_connection_callb.c \ - dapl/common/dapl_evd_dto_callb.c \ - dapl/common/dapl_get_consumer_context.c \ - dapl/common/dapl_get_handle_type.c \ - dapl/common/dapl_hash.c \ - dapl/common/dapl_hca_util.c \ - dapl/common/dapl_ia_close.c \ - dapl/common/dapl_ia_open.c \ - dapl/common/dapl_ia_query.c \ - dapl/common/dapl_ia_util.c \ - dapl/common/dapl_llist.c \ - dapl/common/dapl_lmr_free.c \ - dapl/common/dapl_lmr_query.c \ - dapl/common/dapl_lmr_util.c \ - dapl/common/dapl_lmr_sync_rdma_read.c \ - dapl/common/dapl_lmr_sync_rdma_write.c \ - dapl/common/dapl_mr_util.c \ - dapl/common/dapl_provider.c \ - dapl/common/dapl_sp_util.c \ - dapl/common/dapl_psp_create.c \ - dapl/common/dapl_psp_create_any.c \ - dapl/common/dapl_psp_free.c \ - dapl/common/dapl_psp_query.c \ - dapl/common/dapl_pz_create.c \ - dapl/common/dapl_pz_free.c \ - dapl/common/dapl_pz_query.c \ - dapl/common/dapl_pz_util.c \ - dapl/common/dapl_rmr_create.c \ - dapl/common/dapl_rmr_free.c \ - dapl/common/dapl_rmr_bind.c \ - dapl/common/dapl_rmr_query.c \ - dapl/common/dapl_rmr_util.c \ - dapl/common/dapl_rsp_create.c \ - dapl/common/dapl_rsp_free.c \ - dapl/common/dapl_rsp_query.c \ - dapl/common/dapl_cno_util.c \ - dapl/common/dapl_set_consumer_context.c \ - dapl/common/dapl_ring_buffer_util.c \ - dapl/common/dapl_name_service.c \ - dapl/common/dapl_timer_util.c \ - dapl/common/dapl_ep_create_with_srq.c \ - dapl/common/dapl_ep_recv_query.c \ - dapl/common/dapl_ep_set_watermark.c \ - dapl/common/dapl_srq_create.c \ - dapl/common/dapl_srq_free.c \ - dapl/common/dapl_srq_query.c \ - dapl/common/dapl_srq_resize.c \ - dapl/common/dapl_srq_post_recv.c \ - dapl/common/dapl_srq_set_lw.c \ - dapl/common/dapl_srq_util.c \ - dapl/common/dapl_debug.c \ - dapl/common/dapl_ia_ha.c \ - dapl/common/dapl_csp.c \ - dapl/common/dapl_ep_post_send_invalidate.c \ - dapl/common/dapl_ep_post_rdma_read_to_rmr.c \ - dapl/openib_common/mem.c \ - dapl/openib_common/cq.c \ - dapl/openib_common/qp.c \ - dapl/openib_common/util.c \ - dapl/openib_scm/cm.c \ - dapl/openib_scm/device.c $(XPROGRAMS) - -dapl_udapl_libdaploscm_la_LDFLAGS = -version-info 2:0:0 $(daploscm_version_script) \ - -Wl,-init,dapl_init -Wl,-fini,dapl_fini \ - -lpthread -libverbs - -# -# uDAPL OpenFabrics UD CM version for IB: libdaplucm.so -# -dapl_udapl_libdaploucm_la_SOURCES = dapl/udapl/dapl_init.c \ - dapl/udapl/dapl_evd_create.c \ - dapl/udapl/dapl_evd_query.c \ - dapl/udapl/dapl_cno_create.c \ - dapl/udapl/dapl_cno_modify_agent.c \ - dapl/udapl/dapl_cno_free.c \ - dapl/udapl/dapl_cno_wait.c \ - dapl/udapl/dapl_cno_query.c \ - dapl/udapl/dapl_lmr_create.c \ - dapl/udapl/dapl_evd_wait.c \ - dapl/udapl/dapl_evd_disable.c \ - dapl/udapl/dapl_evd_enable.c \ - dapl/udapl/dapl_evd_modify_cno.c \ - dapl/udapl/dapl_evd_set_unwaitable.c \ - dapl/udapl/dapl_evd_clear_unwaitable.c \ - dapl/udapl/linux/dapl_osd.c \ - dapl/common/dapl_cookie.c \ - dapl/common/dapl_cr_accept.c \ - dapl/common/dapl_cr_query.c \ - dapl/common/dapl_cr_reject.c \ - dapl/common/dapl_cr_util.c \ - dapl/common/dapl_cr_callback.c \ - dapl/common/dapl_cr_handoff.c \ - dapl/common/dapl_ep_connect.c \ - dapl/common/dapl_ep_create.c \ - dapl/common/dapl_ep_disconnect.c \ - dapl/common/dapl_ep_dup_connect.c \ - dapl/common/dapl_ep_free.c \ - dapl/common/dapl_ep_reset.c \ - dapl/common/dapl_ep_get_status.c \ - dapl/common/dapl_ep_modify.c \ - dapl/common/dapl_ep_post_rdma_read.c \ - dapl/common/dapl_ep_post_rdma_write.c \ - dapl/common/dapl_ep_post_recv.c \ - dapl/common/dapl_ep_post_send.c \ - dapl/common/dapl_ep_query.c \ - dapl/common/dapl_ep_util.c \ - dapl/common/dapl_evd_dequeue.c \ - dapl/common/dapl_evd_free.c \ - dapl/common/dapl_evd_post_se.c \ - dapl/common/dapl_evd_resize.c \ - dapl/common/dapl_evd_util.c \ - dapl/common/dapl_evd_cq_async_error_callb.c \ - dapl/common/dapl_evd_qp_async_error_callb.c \ - dapl/common/dapl_evd_un_async_error_callb.c \ - dapl/common/dapl_evd_connection_callb.c \ - dapl/common/dapl_evd_dto_callb.c \ - dapl/common/dapl_get_consumer_context.c \ - dapl/common/dapl_get_handle_type.c \ - dapl/common/dapl_hash.c \ - dapl/common/dapl_hca_util.c \ - dapl/common/dapl_ia_close.c \ - dapl/common/dapl_ia_open.c \ - dapl/common/dapl_ia_query.c \ - dapl/common/dapl_ia_util.c \ - dapl/common/dapl_llist.c \ - dapl/common/dapl_lmr_free.c \ - dapl/common/dapl_lmr_query.c \ - dapl/common/dapl_lmr_util.c \ - dapl/common/dapl_lmr_sync_rdma_read.c \ - dapl/common/dapl_lmr_sync_rdma_write.c \ - dapl/common/dapl_mr_util.c \ - dapl/common/dapl_provider.c \ - dapl/common/dapl_sp_util.c \ - dapl/common/dapl_psp_create.c \ - dapl/common/dapl_psp_create_any.c \ - dapl/common/dapl_psp_free.c \ - dapl/common/dapl_psp_query.c \ - dapl/common/dapl_pz_create.c \ - dapl/common/dapl_pz_free.c \ - dapl/common/dapl_pz_query.c \ - dapl/common/dapl_pz_util.c \ - dapl/common/dapl_rmr_create.c \ - dapl/common/dapl_rmr_free.c \ - dapl/common/dapl_rmr_bind.c \ - dapl/common/dapl_rmr_query.c \ - dapl/common/dapl_rmr_util.c \ - dapl/common/dapl_rsp_create.c \ - dapl/common/dapl_rsp_free.c \ - dapl/common/dapl_rsp_query.c \ - dapl/common/dapl_cno_util.c \ - dapl/common/dapl_set_consumer_context.c \ - dapl/common/dapl_ring_buffer_util.c \ - dapl/common/dapl_name_service.c \ - dapl/common/dapl_timer_util.c \ - dapl/common/dapl_ep_create_with_srq.c \ - dapl/common/dapl_ep_recv_query.c \ - dapl/common/dapl_ep_set_watermark.c \ - dapl/common/dapl_srq_create.c \ - dapl/common/dapl_srq_free.c \ - dapl/common/dapl_srq_query.c \ - dapl/common/dapl_srq_resize.c \ - dapl/common/dapl_srq_post_recv.c \ - dapl/common/dapl_srq_set_lw.c \ - dapl/common/dapl_srq_util.c \ - dapl/common/dapl_debug.c \ - dapl/common/dapl_ia_ha.c \ - dapl/common/dapl_csp.c \ - dapl/common/dapl_ep_post_send_invalidate.c \ - dapl/common/dapl_ep_post_rdma_read_to_rmr.c \ - dapl/openib_common/mem.c \ - dapl/openib_common/cq.c \ - dapl/openib_common/qp.c \ - dapl/openib_common/util.c \ - dapl/openib_ucm/cm.c \ - dapl/openib_ucm/device.c $(XPROGRAMS) - -dapl_udapl_libdaploucm_la_LDFLAGS = -version-info 2:0:0 $(daploscm_version_script) \ - -Wl,-init,dapl_init -Wl,-fini,dapl_fini \ - -lpthread -libverbs - -libdatincludedir = $(includedir)/dat2 - -libdatinclude_HEADERS = dat/include/dat2/dat.h \ - dat/include/dat2/dat_error.h \ - dat/include/dat2/dat_platform_specific.h \ - dat/include/dat2/dat_redirection.h \ - dat/include/dat2/dat_registry.h \ - dat/include/dat2/dat_vendor_specific.h \ - dat/include/dat2/udat_config.h \ - dat/include/dat2/udat.h \ - dat/include/dat2/udat_redirection.h \ - dat/include/dat2/udat_vendor_specific.h \ - dat/include/dat2/dat_ib_extensions.h - -man_MANS = man/dtest.1 man/dapltest.1 man/dat.conf.5 - -EXTRA_DIST = dat/common/dat_dictionary.h \ - dat/common/dat_dr.h \ - dat/common/dat_init.h \ - dat/common/dat_sr.h \ - dat/udat/udat_sr_parser.h \ - dat/udat/linux/dat_osd.h \ - dat/include/dat2/dat.h \ - dat/include/dat2/dat_error.h \ - dat/include/dat2/dat_platform_specific.h \ - dat/include/dat2/dat_redirection.h \ - dat/include/dat2/dat_registry.h \ - dat/include/dat2/dat_vendor_specific.h \ - dat/include/dat2/udat_config.h \ - dat/include/dat2/udat.h \ - dat/include/dat2/udat_redirection.h \ - dat/include/dat2/udat_vendor_specific.h \ - dapl/common/dapl_adapter_util.h \ - dapl/common/dapl_cno_util.h \ - dapl/common/dapl_cookie.h \ - dapl/common/dapl_cr_util.h \ - dapl/common/dapl_ep_util.h \ - dapl/common/dapl_evd_util.h \ - dapl/common/dapl_hash.h \ - dapl/common/dapl_hca_util.h \ - dapl/common/dapl_ia_util.h \ - dapl/common/dapl_init.h \ - dapl/common/dapl_lmr_util.h \ - dapl/common/dapl_mr_util.h \ - dapl/common/dapl_name_service.h \ - dapl/common/dapl_provider.h \ - dapl/common/dapl_pz_util.h \ - dapl/common/dapl_ring_buffer_util.h \ - dapl/common/dapl_rmr_util.h \ - dapl/common/dapl_sp_util.h \ - dapl/common/dapl_srq_util.h \ - dapl/common/dapl_timer_util.h \ - dapl/udapl/linux/dapl_osd.h \ - dapl/include/dapl.h \ - dapl/include/dapl_debug.h \ - dapl/include/dapl_ipoib_names.h \ - dapl/include/dapl_vendor.h \ - dapl/openib_common/dapl_ib_dto.h \ - dapl/openib_common/dapl_ib_common.h \ - dapl/openib_cma/dapl_ib_util.h \ - dapl/openib_cma/linux/openib_osd.h \ - dapl/openib_scm/dapl_ib_util.h \ - dapl/openib_scm/linux/openib_osd.h \ - dapl/openib_ucm/dapl_ib_util.h \ - dapl/openib_ucm/linux/openib_osd.h \ - dat/udat/libdat2.map \ - dapl/udapl/libdaplofa.map \ - dapl/udapl/libdaploscm.map \ - dapl/udapl/libdaploucm.map \ - LICENSE.txt \ - LICENSE2.txt \ - LICENSE3.txt \ - dapl.spec.in \ - $(man_MANS) \ - test/dapltest/include/dapl_bpool.h \ - test/dapltest/include/dapl_client_info.h \ - test/dapltest/include/dapl_common.h \ - test/dapltest/include/dapl_execute.h \ - test/dapltest/include/dapl_fft_cmd.h \ - test/dapltest/include/dapl_fft_util.h \ - test/dapltest/include/dapl_getopt.h \ - test/dapltest/include/dapl_global.h \ - test/dapltest/include/dapl_limit_cmd.h \ - test/dapltest/include/dapl_mdep.h \ - test/dapltest/include/dapl_memlist.h \ - test/dapltest/include/dapl_params.h \ - test/dapltest/include/dapl_performance_cmd.h \ - test/dapltest/include/dapl_performance_stats.h \ - test/dapltest/include/dapl_performance_test.h \ - test/dapltest/include/dapl_proto.h \ - test/dapltest/include/dapl_quit_cmd.h \ - test/dapltest/include/dapl_server_cmd.h \ - test/dapltest/include/dapl_server_info.h \ - test/dapltest/include/dapl_tdep.h \ - test/dapltest/include/dapl_tdep_print.h \ - test/dapltest/include/dapl_test_data.h \ - test/dapltest/include/dapl_transaction_cmd.h \ - test/dapltest/include/dapl_transaction_stats.h \ - test/dapltest/include/dapl_transaction_test.h \ - test/dapltest/include/dapl_version.h \ - test/dapltest/mdep/linux/dapl_mdep_user.h - -dist-hook: dapl.spec - cp dapl.spec $(distdir) - -install-exec-hook: - if ! test -d $(DESTDIR)$(sysconfdir); then \ - mkdir -p $(DESTDIR)$(sysconfdir); \ - fi; \ - if test -e $(DESTDIR)$(sysconfdir)/dat.conf; then \ - sed -e '/ofa-v2-.* u2/d' < $(DESTDIR)$(sysconfdir)/dat.conf > /tmp/$$$$ofadapl; \ - cp /tmp/$$$$ofadapl $(DESTDIR)$(sysconfdir)/dat.conf; \ - fi; \ - echo ofa-v2-mlx4_0-1 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"mlx4_0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-mlx4_0-2 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"mlx4_0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-ib0 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 '"ib0 0" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-ib1 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 '"ib1 0" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-mthca0-1 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"mthca0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-mthca0-2 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"mthca0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-ipath0-1 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"ipath0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-ipath0-2 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"ipath0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-ehca0-2 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"ehca0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-iwarp u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 '"eth2 0" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-mlx4_0-1u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 '"mlx4_0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-mlx4_0-2u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 '"mlx4_0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-mthca0-1u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 '"mthca0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-mthca0-2u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 '"mthca0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-cma-roe-eth2 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 '"eth2 0" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-cma-roe-eth3 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 '"eth3 0" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-scm-roe-mlx4_0-1 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"mlx4_0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-scm-roe-mlx4_0-2 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"mlx4_0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; - -uninstall-hook: - if test -e $(DESTDIR)$(sysconfdir)/dat.conf; then \ - sed -e '/ofa-v2-.* u2/d' < $(DESTDIR)$(sysconfdir)/dat.conf > /tmp/$$$$ofadapl; \ - cp /tmp/$$$$ofadapl $(DESTDIR)$(sysconfdir)/dat.conf; \ - fi; - -SUBDIRS = . test/dtest test/dapltest +# $Id: $ + +OSFLAGS = -DOS_RELEASE=$(shell expr `uname -r | cut -f1 -d.` \* 65536 + `uname -r | cut -f2 -d.`) +# Check for RedHat, needed for ia64 udapl atomic operations (IA64_FETCHADD syntax) +# and built-in atomics for RedHat EL5 +if OS_RHEL4 +OSFLAGS += -DREDHAT_EL4 +endif + +if OS_RHEL5 +OSFLAGS += -DREDHAT_EL5 +endif + +if OS_SUSE11 +OSFLAGS += -DSUSE_11 +endif + +if EXT_TYPE_IB +XFLAGS = -DDAT_EXTENSIONS +XPROGRAMS = dapl/openib_common/ib_extensions.c +else +XFLAGS = +XPROGRAMS = +endif + +if DEFINE_ATTR_LINK_LAYER +XFLAGS += -DDEFINE_ATTR_LINK_LAYER +endif + +if DEBUG +AM_CFLAGS = -g -Wall -D_GNU_SOURCE -DDAPL_DBG -DDAT_CONF="\"$(sysconfdir)/dat.conf\"" +else +AM_CFLAGS = -g -Wall -D_GNU_SOURCE -DDAT_CONF="\"$(sysconfdir)/dat.conf\"" +endif + +datlibdir = $(libdir) +dapllibofadir = $(libdir) +daplliboscmdir = $(libdir) +daplliboucmdir = $(libdir) + +datlib_LTLIBRARIES = dat/udat/libdat2.la +dapllibofa_LTLIBRARIES = dapl/udapl/libdaplofa.la +daplliboscm_LTLIBRARIES = dapl/udapl/libdaploscm.la +daplliboucm_LTLIBRARIES = dapl/udapl/libdaploucm.la + +dat_udat_libdat2_la_CFLAGS = $(AM_CFLAGS) -D_GNU_SOURCE $(OSFLAGS) $(XFLAGS) \ + -I$(srcdir)/dat/include/ -I$(srcdir)/dat/udat/ \ + -I$(srcdir)/dat/udat/linux -I$(srcdir)/dat/common/ + +dapl_udapl_libdaplofa_la_CFLAGS = $(AM_CFLAGS) -D_GNU_SOURCE $(OSFLAGS) $(XFLAGS) \ + -DOPENIB -DCQ_WAIT_OBJECT \ + -I$(srcdir)/dat/include/ -I$(srcdir)/dapl/include/ \ + -I$(srcdir)/dapl/common -I$(srcdir)/dapl/udapl/linux \ + -I$(srcdir)/dapl/openib_common \ + -I$(srcdir)/dapl/openib_cma \ + -I$(srcdir)/dapl/openib_cma/linux + +dapl_udapl_libdaploscm_la_CFLAGS = $(AM_CFLAGS) -D_GNU_SOURCE $(OSFLAGS) $(XFLAGS) \ + -DOPENIB -DCQ_WAIT_OBJECT \ + -I$(srcdir)/dat/include/ -I$(srcdir)/dapl/include/ \ + -I$(srcdir)/dapl/common -I$(srcdir)/dapl/udapl/linux \ + -I$(srcdir)/dapl/openib_common \ + -I$(srcdir)/dapl/openib_scm \ + -I$(srcdir)/dapl/openib_scm/linux + +dapl_udapl_libdaploucm_la_CFLAGS = $(AM_CFLAGS) -D_GNU_SOURCE $(OSFLAGS) $(XFLAGS) \ + -DOPENIB -DCQ_WAIT_OBJECT \ + -I$(srcdir)/dat/include/ -I$(srcdir)/dapl/include/ \ + -I$(srcdir)/dapl/common -I$(srcdir)/dapl/udapl/linux \ + -I$(srcdir)/dapl/openib_common \ + -I$(srcdir)/dapl/openib_ucm \ + -I$(srcdir)/dapl/openib_ucm/linux + +if HAVE_LD_VERSION_SCRIPT + dat_version_script = -Wl,--version-script=$(srcdir)/dat/udat/libdat2.map + daplofa_version_script = -Wl,--version-script=$(srcdir)/dapl/udapl/libdaplofa.map + daploscm_version_script = -Wl,--version-script=$(srcdir)/dapl/udapl/libdaploscm.map + daploucm_version_script = -Wl,--version-script=$(srcdir)/dapl/udapl/libdaploucm.map +else + dat_version_script = + daplofa_version_script = + daploscm_version_script = + daploucm_version_script = +endif + +# +# uDAT: libdat2.so +# +dat_udat_libdat2_la_SOURCES = dat/udat/udat.c \ + dat/udat/udat_api.c \ + dat/udat/udat_sr_parser.c \ + dat/udat/linux/dat_osd.c \ + dat/common/dat_api.c \ + dat/common/dat_dictionary.c \ + dat/common/dat_strerror.c \ + dat/common/dat_init.c \ + dat/common/dat_dr.c \ + dat/common/dat_sr.c +dat_udat_libdat2_la_LDFLAGS = -version-info 2:0:0 $(dat_version_script) -ldl + +# +# uDAPL OpenFabrics rdma_cm version: libdaplofa.so +# +dapl_udapl_libdaplofa_la_SOURCES = dapl/udapl/dapl_init.c \ + dapl/udapl/dapl_evd_create.c \ + dapl/udapl/dapl_evd_query.c \ + dapl/udapl/dapl_cno_create.c \ + dapl/udapl/dapl_cno_modify_agent.c \ + dapl/udapl/dapl_cno_free.c \ + dapl/udapl/dapl_cno_wait.c \ + dapl/udapl/dapl_cno_query.c \ + dapl/udapl/dapl_lmr_create.c \ + dapl/udapl/dapl_evd_wait.c \ + dapl/udapl/dapl_evd_disable.c \ + dapl/udapl/dapl_evd_enable.c \ + dapl/udapl/dapl_evd_modify_cno.c \ + dapl/udapl/dapl_evd_set_unwaitable.c \ + dapl/udapl/dapl_evd_clear_unwaitable.c \ + dapl/udapl/linux/dapl_osd.c \ + dapl/common/dapl_cookie.c \ + dapl/common/dapl_cr_accept.c \ + dapl/common/dapl_cr_query.c \ + dapl/common/dapl_cr_reject.c \ + dapl/common/dapl_cr_util.c \ + dapl/common/dapl_cr_callback.c \ + dapl/common/dapl_cr_handoff.c \ + dapl/common/dapl_ep_connect.c \ + dapl/common/dapl_ep_create.c \ + dapl/common/dapl_ep_disconnect.c \ + dapl/common/dapl_ep_dup_connect.c \ + dapl/common/dapl_ep_free.c \ + dapl/common/dapl_ep_reset.c \ + dapl/common/dapl_ep_get_status.c \ + dapl/common/dapl_ep_modify.c \ + dapl/common/dapl_ep_post_rdma_read.c \ + dapl/common/dapl_ep_post_rdma_write.c \ + dapl/common/dapl_ep_post_recv.c \ + dapl/common/dapl_ep_post_send.c \ + dapl/common/dapl_ep_query.c \ + dapl/common/dapl_ep_util.c \ + dapl/common/dapl_evd_dequeue.c \ + dapl/common/dapl_evd_free.c \ + dapl/common/dapl_evd_post_se.c \ + dapl/common/dapl_evd_resize.c \ + dapl/common/dapl_evd_util.c \ + dapl/common/dapl_evd_cq_async_error_callb.c \ + dapl/common/dapl_evd_qp_async_error_callb.c \ + dapl/common/dapl_evd_un_async_error_callb.c \ + dapl/common/dapl_evd_connection_callb.c \ + dapl/common/dapl_evd_dto_callb.c \ + dapl/common/dapl_get_consumer_context.c \ + dapl/common/dapl_get_handle_type.c \ + dapl/common/dapl_hash.c \ + dapl/common/dapl_hca_util.c \ + dapl/common/dapl_ia_close.c \ + dapl/common/dapl_ia_open.c \ + dapl/common/dapl_ia_query.c \ + dapl/common/dapl_ia_util.c \ + dapl/common/dapl_llist.c \ + dapl/common/dapl_lmr_free.c \ + dapl/common/dapl_lmr_query.c \ + dapl/common/dapl_lmr_util.c \ + dapl/common/dapl_lmr_sync_rdma_read.c \ + dapl/common/dapl_lmr_sync_rdma_write.c \ + dapl/common/dapl_mr_util.c \ + dapl/common/dapl_provider.c \ + dapl/common/dapl_sp_util.c \ + dapl/common/dapl_psp_create.c \ + dapl/common/dapl_psp_create_any.c \ + dapl/common/dapl_psp_free.c \ + dapl/common/dapl_psp_query.c \ + dapl/common/dapl_pz_create.c \ + dapl/common/dapl_pz_free.c \ + dapl/common/dapl_pz_query.c \ + dapl/common/dapl_pz_util.c \ + dapl/common/dapl_rmr_create.c \ + dapl/common/dapl_rmr_free.c \ + dapl/common/dapl_rmr_bind.c \ + dapl/common/dapl_rmr_query.c \ + dapl/common/dapl_rmr_util.c \ + dapl/common/dapl_rsp_create.c \ + dapl/common/dapl_rsp_free.c \ + dapl/common/dapl_rsp_query.c \ + dapl/common/dapl_cno_util.c \ + dapl/common/dapl_set_consumer_context.c \ + dapl/common/dapl_ring_buffer_util.c \ + dapl/common/dapl_name_service.c \ + dapl/common/dapl_timer_util.c \ + dapl/common/dapl_ep_create_with_srq.c \ + dapl/common/dapl_ep_recv_query.c \ + dapl/common/dapl_ep_set_watermark.c \ + dapl/common/dapl_srq_create.c \ + dapl/common/dapl_srq_free.c \ + dapl/common/dapl_srq_query.c \ + dapl/common/dapl_srq_resize.c \ + dapl/common/dapl_srq_post_recv.c \ + dapl/common/dapl_srq_set_lw.c \ + dapl/common/dapl_srq_util.c \ + dapl/common/dapl_debug.c \ + dapl/common/dapl_ia_ha.c \ + dapl/common/dapl_csp.c \ + dapl/common/dapl_ep_post_send_invalidate.c \ + dapl/common/dapl_ep_post_rdma_read_to_rmr.c \ + dapl/openib_common/mem.c \ + dapl/openib_common/cq.c \ + dapl/openib_common/qp.c \ + dapl/openib_common/util.c \ + dapl/openib_cma/cm.c \ + dapl/openib_cma/device.c $(XPROGRAMS) + +dapl_udapl_libdaplofa_la_LDFLAGS = -version-info 2:0:0 $(daplofa_version_script) \ + -Wl,-init,dapl_init -Wl,-fini,dapl_fini \ + -lpthread -libverbs -lrdmacm + +# +# uDAPL OpenFabrics Socket CM version for IB: libdaplscm.so +# +dapl_udapl_libdaploscm_la_SOURCES = dapl/udapl/dapl_init.c \ + dapl/udapl/dapl_evd_create.c \ + dapl/udapl/dapl_evd_query.c \ + dapl/udapl/dapl_cno_create.c \ + dapl/udapl/dapl_cno_modify_agent.c \ + dapl/udapl/dapl_cno_free.c \ + dapl/udapl/dapl_cno_wait.c \ + dapl/udapl/dapl_cno_query.c \ + dapl/udapl/dapl_lmr_create.c \ + dapl/udapl/dapl_evd_wait.c \ + dapl/udapl/dapl_evd_disable.c \ + dapl/udapl/dapl_evd_enable.c \ + dapl/udapl/dapl_evd_modify_cno.c \ + dapl/udapl/dapl_evd_set_unwaitable.c \ + dapl/udapl/dapl_evd_clear_unwaitable.c \ + dapl/udapl/linux/dapl_osd.c \ + dapl/common/dapl_cookie.c \ + dapl/common/dapl_cr_accept.c \ + dapl/common/dapl_cr_query.c \ + dapl/common/dapl_cr_reject.c \ + dapl/common/dapl_cr_util.c \ + dapl/common/dapl_cr_callback.c \ + dapl/common/dapl_cr_handoff.c \ + dapl/common/dapl_ep_connect.c \ + dapl/common/dapl_ep_create.c \ + dapl/common/dapl_ep_disconnect.c \ + dapl/common/dapl_ep_dup_connect.c \ + dapl/common/dapl_ep_free.c \ + dapl/common/dapl_ep_reset.c \ + dapl/common/dapl_ep_get_status.c \ + dapl/common/dapl_ep_modify.c \ + dapl/common/dapl_ep_post_rdma_read.c \ + dapl/common/dapl_ep_post_rdma_write.c \ + dapl/common/dapl_ep_post_recv.c \ + dapl/common/dapl_ep_post_send.c \ + dapl/common/dapl_ep_query.c \ + dapl/common/dapl_ep_util.c \ + dapl/common/dapl_evd_dequeue.c \ + dapl/common/dapl_evd_free.c \ + dapl/common/dapl_evd_post_se.c \ + dapl/common/dapl_evd_resize.c \ + dapl/common/dapl_evd_util.c \ + dapl/common/dapl_evd_cq_async_error_callb.c \ + dapl/common/dapl_evd_qp_async_error_callb.c \ + dapl/common/dapl_evd_un_async_error_callb.c \ + dapl/common/dapl_evd_connection_callb.c \ + dapl/common/dapl_evd_dto_callb.c \ + dapl/common/dapl_get_consumer_context.c \ + dapl/common/dapl_get_handle_type.c \ + dapl/common/dapl_hash.c \ + dapl/common/dapl_hca_util.c \ + dapl/common/dapl_ia_close.c \ + dapl/common/dapl_ia_open.c \ + dapl/common/dapl_ia_query.c \ + dapl/common/dapl_ia_util.c \ + dapl/common/dapl_llist.c \ + dapl/common/dapl_lmr_free.c \ + dapl/common/dapl_lmr_query.c \ + dapl/common/dapl_lmr_util.c \ + dapl/common/dapl_lmr_sync_rdma_read.c \ + dapl/common/dapl_lmr_sync_rdma_write.c \ + dapl/common/dapl_mr_util.c \ + dapl/common/dapl_provider.c \ + dapl/common/dapl_sp_util.c \ + dapl/common/dapl_psp_create.c \ + dapl/common/dapl_psp_create_any.c \ + dapl/common/dapl_psp_free.c \ + dapl/common/dapl_psp_query.c \ + dapl/common/dapl_pz_create.c \ + dapl/common/dapl_pz_free.c \ + dapl/common/dapl_pz_query.c \ + dapl/common/dapl_pz_util.c \ + dapl/common/dapl_rmr_create.c \ + dapl/common/dapl_rmr_free.c \ + dapl/common/dapl_rmr_bind.c \ + dapl/common/dapl_rmr_query.c \ + dapl/common/dapl_rmr_util.c \ + dapl/common/dapl_rsp_create.c \ + dapl/common/dapl_rsp_free.c \ + dapl/common/dapl_rsp_query.c \ + dapl/common/dapl_cno_util.c \ + dapl/common/dapl_set_consumer_context.c \ + dapl/common/dapl_ring_buffer_util.c \ + dapl/common/dapl_name_service.c \ + dapl/common/dapl_timer_util.c \ + dapl/common/dapl_ep_create_with_srq.c \ + dapl/common/dapl_ep_recv_query.c \ + dapl/common/dapl_ep_set_watermark.c \ + dapl/common/dapl_srq_create.c \ + dapl/common/dapl_srq_free.c \ + dapl/common/dapl_srq_query.c \ + dapl/common/dapl_srq_resize.c \ + dapl/common/dapl_srq_post_recv.c \ + dapl/common/dapl_srq_set_lw.c \ + dapl/common/dapl_srq_util.c \ + dapl/common/dapl_debug.c \ + dapl/common/dapl_ia_ha.c \ + dapl/common/dapl_csp.c \ + dapl/common/dapl_ep_post_send_invalidate.c \ + dapl/common/dapl_ep_post_rdma_read_to_rmr.c \ + dapl/openib_common/mem.c \ + dapl/openib_common/cq.c \ + dapl/openib_common/qp.c \ + dapl/openib_common/util.c \ + dapl/openib_scm/cm.c \ + dapl/openib_scm/device.c $(XPROGRAMS) + +dapl_udapl_libdaploscm_la_LDFLAGS = -version-info 2:0:0 $(daploscm_version_script) \ + -Wl,-init,dapl_init -Wl,-fini,dapl_fini \ + -lpthread -libverbs -lrdmacm + +# +# uDAPL OpenFabrics UD CM version for IB: libdaplucm.so +# +dapl_udapl_libdaploucm_la_SOURCES = dapl/udapl/dapl_init.c \ + dapl/udapl/dapl_evd_create.c \ + dapl/udapl/dapl_evd_query.c \ + dapl/udapl/dapl_cno_create.c \ + dapl/udapl/dapl_cno_modify_agent.c \ + dapl/udapl/dapl_cno_free.c \ + dapl/udapl/dapl_cno_wait.c \ + dapl/udapl/dapl_cno_query.c \ + dapl/udapl/dapl_lmr_create.c \ + dapl/udapl/dapl_evd_wait.c \ + dapl/udapl/dapl_evd_disable.c \ + dapl/udapl/dapl_evd_enable.c \ + dapl/udapl/dapl_evd_modify_cno.c \ + dapl/udapl/dapl_evd_set_unwaitable.c \ + dapl/udapl/dapl_evd_clear_unwaitable.c \ + dapl/udapl/linux/dapl_osd.c \ + dapl/common/dapl_cookie.c \ + dapl/common/dapl_cr_accept.c \ + dapl/common/dapl_cr_query.c \ + dapl/common/dapl_cr_reject.c \ + dapl/common/dapl_cr_util.c \ + dapl/common/dapl_cr_callback.c \ + dapl/common/dapl_cr_handoff.c \ + dapl/common/dapl_ep_connect.c \ + dapl/common/dapl_ep_create.c \ + dapl/common/dapl_ep_disconnect.c \ + dapl/common/dapl_ep_dup_connect.c \ + dapl/common/dapl_ep_free.c \ + dapl/common/dapl_ep_reset.c \ + dapl/common/dapl_ep_get_status.c \ + dapl/common/dapl_ep_modify.c \ + dapl/common/dapl_ep_post_rdma_read.c \ + dapl/common/dapl_ep_post_rdma_write.c \ + dapl/common/dapl_ep_post_recv.c \ + dapl/common/dapl_ep_post_send.c \ + dapl/common/dapl_ep_query.c \ + dapl/common/dapl_ep_util.c \ + dapl/common/dapl_evd_dequeue.c \ + dapl/common/dapl_evd_free.c \ + dapl/common/dapl_evd_post_se.c \ + dapl/common/dapl_evd_resize.c \ + dapl/common/dapl_evd_util.c \ + dapl/common/dapl_evd_cq_async_error_callb.c \ + dapl/common/dapl_evd_qp_async_error_callb.c \ + dapl/common/dapl_evd_un_async_error_callb.c \ + dapl/common/dapl_evd_connection_callb.c \ + dapl/common/dapl_evd_dto_callb.c \ + dapl/common/dapl_get_consumer_context.c \ + dapl/common/dapl_get_handle_type.c \ + dapl/common/dapl_hash.c \ + dapl/common/dapl_hca_util.c \ + dapl/common/dapl_ia_close.c \ + dapl/common/dapl_ia_open.c \ + dapl/common/dapl_ia_query.c \ + dapl/common/dapl_ia_util.c \ + dapl/common/dapl_llist.c \ + dapl/common/dapl_lmr_free.c \ + dapl/common/dapl_lmr_query.c \ + dapl/common/dapl_lmr_util.c \ + dapl/common/dapl_lmr_sync_rdma_read.c \ + dapl/common/dapl_lmr_sync_rdma_write.c \ + dapl/common/dapl_mr_util.c \ + dapl/common/dapl_provider.c \ + dapl/common/dapl_sp_util.c \ + dapl/common/dapl_psp_create.c \ + dapl/common/dapl_psp_create_any.c \ + dapl/common/dapl_psp_free.c \ + dapl/common/dapl_psp_query.c \ + dapl/common/dapl_pz_create.c \ + dapl/common/dapl_pz_free.c \ + dapl/common/dapl_pz_query.c \ + dapl/common/dapl_pz_util.c \ + dapl/common/dapl_rmr_create.c \ + dapl/common/dapl_rmr_free.c \ + dapl/common/dapl_rmr_bind.c \ + dapl/common/dapl_rmr_query.c \ + dapl/common/dapl_rmr_util.c \ + dapl/common/dapl_rsp_create.c \ + dapl/common/dapl_rsp_free.c \ + dapl/common/dapl_rsp_query.c \ + dapl/common/dapl_cno_util.c \ + dapl/common/dapl_set_consumer_context.c \ + dapl/common/dapl_ring_buffer_util.c \ + dapl/common/dapl_name_service.c \ + dapl/common/dapl_timer_util.c \ + dapl/common/dapl_ep_create_with_srq.c \ + dapl/common/dapl_ep_recv_query.c \ + dapl/common/dapl_ep_set_watermark.c \ + dapl/common/dapl_srq_create.c \ + dapl/common/dapl_srq_free.c \ + dapl/common/dapl_srq_query.c \ + dapl/common/dapl_srq_resize.c \ + dapl/common/dapl_srq_post_recv.c \ + dapl/common/dapl_srq_set_lw.c \ + dapl/common/dapl_srq_util.c \ + dapl/common/dapl_debug.c \ + dapl/common/dapl_ia_ha.c \ + dapl/common/dapl_csp.c \ + dapl/common/dapl_ep_post_send_invalidate.c \ + dapl/common/dapl_ep_post_rdma_read_to_rmr.c \ + dapl/openib_common/mem.c \ + dapl/openib_common/cq.c \ + dapl/openib_common/qp.c \ + dapl/openib_common/util.c \ + dapl/openib_ucm/cm.c \ + dapl/openib_ucm/device.c $(XPROGRAMS) + +dapl_udapl_libdaploucm_la_LDFLAGS = -version-info 2:0:0 $(daploscm_version_script) \ + -Wl,-init,dapl_init -Wl,-fini,dapl_fini \ + -lpthread -libverbs -lrdmacm + +libdatincludedir = $(includedir)/dat2 + +libdatinclude_HEADERS = dat/include/dat2/dat.h \ + dat/include/dat2/dat_error.h \ + dat/include/dat2/dat_platform_specific.h \ + dat/include/dat2/dat_redirection.h \ + dat/include/dat2/dat_registry.h \ + dat/include/dat2/dat_vendor_specific.h \ + dat/include/dat2/udat_config.h \ + dat/include/dat2/udat.h \ + dat/include/dat2/udat_redirection.h \ + dat/include/dat2/udat_vendor_specific.h \ + dat/include/dat2/dat_ib_extensions.h + +man_MANS = man/dtest.1 man/dapltest.1 man/dat.conf.5 + +EXTRA_DIST = dat/common/dat_dictionary.h \ + dat/common/dat_dr.h \ + dat/common/dat_init.h \ + dat/common/dat_sr.h \ + dat/udat/udat_sr_parser.h \ + dat/udat/linux/dat_osd.h \ + dat/include/dat2/dat.h \ + dat/include/dat2/dat_error.h \ + dat/include/dat2/dat_platform_specific.h \ + dat/include/dat2/dat_redirection.h \ + dat/include/dat2/dat_registry.h \ + dat/include/dat2/dat_vendor_specific.h \ + dat/include/dat2/udat_config.h \ + dat/include/dat2/udat.h \ + dat/include/dat2/udat_redirection.h \ + dat/include/dat2/udat_vendor_specific.h \ + dapl/common/dapl_adapter_util.h \ + dapl/common/dapl_cno_util.h \ + dapl/common/dapl_cookie.h \ + dapl/common/dapl_cr_util.h \ + dapl/common/dapl_ep_util.h \ + dapl/common/dapl_evd_util.h \ + dapl/common/dapl_hash.h \ + dapl/common/dapl_hca_util.h \ + dapl/common/dapl_ia_util.h \ + dapl/common/dapl_init.h \ + dapl/common/dapl_lmr_util.h \ + dapl/common/dapl_mr_util.h \ + dapl/common/dapl_name_service.h \ + dapl/common/dapl_provider.h \ + dapl/common/dapl_pz_util.h \ + dapl/common/dapl_ring_buffer_util.h \ + dapl/common/dapl_rmr_util.h \ + dapl/common/dapl_sp_util.h \ + dapl/common/dapl_srq_util.h \ + dapl/common/dapl_timer_util.h \ + dapl/udapl/linux/dapl_osd.h \ + dapl/include/dapl.h \ + dapl/include/dapl_debug.h \ + dapl/include/dapl_ipoib_names.h \ + dapl/include/dapl_vendor.h \ + dapl/openib_common/dapl_ib_dto.h \ + dapl/openib_common/dapl_ib_common.h \ + dapl/openib_cma/dapl_ib_util.h \ + dapl/openib_cma/linux/openib_osd.h \ + dapl/openib_scm/dapl_ib_util.h \ + dapl/openib_scm/linux/openib_osd.h \ + dapl/openib_ucm/dapl_ib_util.h \ + dapl/openib_ucm/linux/openib_osd.h \ + dat/udat/libdat2.map \ + dapl/udapl/libdaplofa.map \ + dapl/udapl/libdaploscm.map \ + dapl/udapl/libdaploucm.map \ + LICENSE.txt \ + LICENSE2.txt \ + LICENSE3.txt \ + dapl.spec.in \ + $(man_MANS) \ + test/dapltest/include/dapl_bpool.h \ + test/dapltest/include/dapl_client_info.h \ + test/dapltest/include/dapl_common.h \ + test/dapltest/include/dapl_execute.h \ + test/dapltest/include/dapl_fft_cmd.h \ + test/dapltest/include/dapl_fft_util.h \ + test/dapltest/include/dapl_getopt.h \ + test/dapltest/include/dapl_global.h \ + test/dapltest/include/dapl_limit_cmd.h \ + test/dapltest/include/dapl_mdep.h \ + test/dapltest/include/dapl_memlist.h \ + test/dapltest/include/dapl_params.h \ + test/dapltest/include/dapl_performance_cmd.h \ + test/dapltest/include/dapl_performance_stats.h \ + test/dapltest/include/dapl_performance_test.h \ + test/dapltest/include/dapl_proto.h \ + test/dapltest/include/dapl_quit_cmd.h \ + test/dapltest/include/dapl_server_cmd.h \ + test/dapltest/include/dapl_server_info.h \ + test/dapltest/include/dapl_tdep.h \ + test/dapltest/include/dapl_tdep_print.h \ + test/dapltest/include/dapl_test_data.h \ + test/dapltest/include/dapl_transaction_cmd.h \ + test/dapltest/include/dapl_transaction_stats.h \ + test/dapltest/include/dapl_transaction_test.h \ + test/dapltest/include/dapl_version.h \ + test/dapltest/mdep/linux/dapl_mdep_user.h + +dist-hook: dapl.spec + cp dapl.spec $(distdir) + +install-exec-hook: + if ! test -d $(DESTDIR)$(sysconfdir); then \ + mkdir -p $(DESTDIR)$(sysconfdir); \ + fi; \ + if test -e $(DESTDIR)$(sysconfdir)/dat.conf; then \ + sed -e '/ofa-v2-.* u2/d' < $(DESTDIR)$(sysconfdir)/dat.conf > /tmp/$$$$ofadapl; \ + cp /tmp/$$$$ofadapl $(DESTDIR)$(sysconfdir)/dat.conf; \ + fi; \ + echo ofa-v2-mlx4_0-1 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"mlx4_0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-mlx4_0-2 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"mlx4_0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-ib0 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 '"ib0 0" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-ib1 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 '"ib1 0" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-mthca0-1 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"mthca0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-mthca0-2 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"mthca0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-ipath0-1 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"ipath0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-ipath0-2 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"ipath0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-ehca0-2 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"ehca0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-iwarp u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 '"eth2 0" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-mlx4_0-1u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 '"mlx4_0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-mlx4_0-2u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 '"mlx4_0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-mthca0-1u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 '"mthca0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-mthca0-2u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 '"mthca0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-cma-roe-eth2 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 '"eth2 0" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-cma-roe-eth3 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 '"eth3 0" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-scm-roe-mlx4_0-1 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"mlx4_0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-scm-roe-mlx4_0-2 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"mlx4_0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; + +uninstall-hook: + if test -e $(DESTDIR)$(sysconfdir)/dat.conf; then \ + sed -e '/ofa-v2-.* u2/d' < $(DESTDIR)$(sysconfdir)/dat.conf > /tmp/$$$$ofadapl; \ + cp /tmp/$$$$ofadapl $(DESTDIR)$(sysconfdir)/dat.conf; \ + fi; + +SUBDIRS = . test/dtest test/dapltest diff --git a/branches/WOF2-3/ulp/dapl2/configure.in b/branches/WOF2-3/ulp/dapl2/configure.in index 3ac53f74..33783274 100644 --- a/branches/WOF2-3/ulp/dapl2/configure.in +++ b/branches/WOF2-3/ulp/dapl2/configure.in @@ -31,9 +31,23 @@ AC_CHECK_MEMBER(struct ibv_port_attr.link_layer, AM_CONDITIONAL(DEFINE_ATTR_LINK_LAYER, test "yes" = "yes"), AM_CONDITIONAL(DEFINE_ATTR_LINK_LAYER, test "yes" = "no"), [#include ]) + +if test "$with_ib_acm" != "" && test "$with_ib_acm" != "no"; then +AC_CHECK_MEMBER(struct ibv_path_record.service_id, [], + AC_MSG_ERROR([IB ACM support requires libibverbs 1.1.4 or greater.]), + [#include ]) +AC_CHECK_HEADER(infiniband/acm.h, [], + AC_MSG_ERROR([IB ACM requested but not found.])) +fi + else AM_CONDITIONAL(DEFINE_ATTR_LINK_LAYER, test "yes" = "no") fi +dnl End check for libraries + +if test "$with_ib_acm" != "" && test "$with_ib_acm" != "no"; then + AC_DEFINE(DAPL_USE_IBACM, 1, [set to 1 to use IB ACM services]) +fi AC_CACHE_CHECK(whether ld accepts --version-script, ac_cv_version_script, if test -n "`$LD --help < /dev/null 2>/dev/null | grep version-script`"; then diff --git a/branches/WOF2-3/ulp/dapl2/dapl/common/dapl_debug.c b/branches/WOF2-3/ulp/dapl2/dapl/common/dapl_debug.c index 904d0752..f311a7a3 100644 --- a/branches/WOF2-3/ulp/dapl2/dapl/common/dapl_debug.c +++ b/branches/WOF2-3/ulp/dapl2/dapl/common/dapl_debug.c @@ -1,243 +1,252 @@ -/* - * Copyright (c) 2002-2003, Network Appliance, Inc. All rights reserved. - * - * This Software is licensed under one of the following licenses: - * - * 1) under the terms of the "Common Public License 1.0" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/cpl.php. - * - * 2) under the terms of the "The BSD License" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/bsd-license.php. - * - * 3) under the terms of the "GNU General Public License (GPL) Version 2" a - * copy of which is available from the Open Source Initiative, see - * http://www.opensource.org/licenses/gpl-license.php. - * - * Licensee has the right to choose one of the above licenses. - * - * Redistributions of source code must retain the above copyright - * notice and one of the license notices. - * - * Redistributions in binary form must reproduce both the above copyright - * notice, one of the license notices in the documentation - * and/or other materials provided with the distribution. - */ - -#include "dapl.h" -#if !defined(__KDAPL__) -#include -#include -#endif /* __KDAPL__ */ - -DAPL_DBG_TYPE g_dapl_dbg_type; /* initialized in dapl_init.c */ -DAPL_DBG_DEST g_dapl_dbg_dest; /* initialized in dapl_init.c */ - -static char *_ptr_host_ = NULL; -static char _hostname_[128]; - -void dapl_internal_dbg_log(DAPL_DBG_TYPE type, const char *fmt, ...) -{ - va_list args; - - if (_ptr_host_ == NULL) { - gethostname(_hostname_, sizeof(_hostname_)); - _ptr_host_ = _hostname_; - } - - if (type & g_dapl_dbg_type) { - if (DAPL_DBG_DEST_STDOUT & g_dapl_dbg_dest) { - va_start(args, fmt); - fprintf(stdout, "%s:%x: ", _ptr_host_, - dapl_os_getpid()); - dapl_os_vprintf(fmt, args); - va_end(args); - } - - if (DAPL_DBG_DEST_SYSLOG & g_dapl_dbg_dest) { - va_start(args, fmt); - dapl_os_syslog(fmt, args); - va_end(args); - } - } -} - -#ifdef DAPL_COUNTERS - -/* - * The order of this list must match the DAT counter definitions - */ -static char *ia_cntr_names[] = { - "DCNT_IA_PZ_CREATE", - "DCNT_IA_PZ_FREE", - "DCNT_IA_LMR_CREATE", - "DCNT_IA_LMR_FREE", - "DCNT_IA_RMR_CREATE", - "DCNT_IA_RMR_FREE", - "DCNT_IA_PSP_CREATE", - "DCNT_IA_PSP_CREATE_ANY", - "DCNT_IA_PSP_FREE", - "DCNT_IA_RSP_CREATE", - "DCNT_IA_RSP_FREE", - "DCNT_IA_EVD_CREATE", - "DCNT_IA_EVD_FREE", - "DCNT_IA_EP_CREATE", - "DCNT_IA_EP_FREE", - "DCNT_IA_SRQ_CREATE", - "DCNT_IA_SRQ_FREE", - "DCNT_IA_SP_CR", - "DCNT_IA_SP_CR_ACCEPTED", - "DCNT_IA_SP_CR_REJECTED", - "DCNT_IA_MEM_ALLOC", - "DCNT_IA_MEM_ALLOC_DATA", - "DCNT_IA_MEM_FREE", - "DCNT_IA_ASYNC_ERROR", - "DCNT_IA_ASYNC_QP_ERROR", - "DCNT_IA_ASYNC_CQ_ERROR" -}; - -static char *ep_cntr_names[] = { - "DCNT_EP_CONNECT", - "DCNT_EP_DISCONNECT", - "DCNT_EP_POST_SEND", - "DCNT_EP_POST_SEND_DATA", - "DCNT_EP_POST_SEND_UD", - "DCNT_EP_POST_SEND_UD_DATA", - "DCNT_EP_POST_RECV", - "DCNT_EP_POST_RECV_DATA", - "DCNT_EP_POST_WRITE", - "DCNT_EP_POST_WRITE_DATA", - "DCNT_EP_POST_WRITE_IMM", - "DCNT_EP_POST_WRITE_IMM_DATA", - "DCNT_EP_POST_READ", - "DCNT_EP_POST_READ_DATA", - "DCNT_EP_POST_CMP_SWAP", - "DCNT_EP_POST_FETCH_ADD", - "DCNT_EP_RECV", - "DCNT_EP_RECV_DATA", - "DCNT_EP_RECV_UD", - "DCNT_EP_RECV_UD_DATA", - "DCNT_EP_RECV_IMM", - "DCNT_EP_RECV_IMM_DATA", - "DCNT_EP_RECV_RDMA_IMM", - "DCNT_EP_RECV_RDMA_IMM_DATA", -}; - -static char *evd_cntr_names[] = { - "DCNT_EVD_WAIT", - "DCNT_EVD_WAIT_BLOCKED", - "DCNT_EVD_WAIT_NOTIFY", - "DCNT_EVD_DEQUEUE", - "DCNT_EVD_DEQUEUE_FOUND", - "DCNT_EVD_DEQUEUE_NOT_FOUND", - "DCNT_EVD_DEQUEUE_POLL", - "DCNT_EVD_DEQUEUE_POLL_FOUND", - "DCNT_EVD_CONN_CALLBACK", - "DCNT_EVD_DTO_CALLBACK", -}; - -DAT_RETURN dapl_query_counter(DAT_HANDLE dh, - int counter, void *p_cntrs_out, int reset) -{ - int i, max; - DAT_UINT64 *p_cntrs; - DAT_HANDLE_TYPE type = 0; - - dat_get_handle_type(dh, &type); - - switch (type) { - case DAT_HANDLE_TYPE_IA: - max = DCNT_IA_ALL_COUNTERS; - p_cntrs = ((DAPL_IA *) dh)->cntrs; - break; - case DAT_HANDLE_TYPE_EP: - max = DCNT_EP_ALL_COUNTERS; - p_cntrs = ((DAPL_EP *) dh)->cntrs; - break; - case DAT_HANDLE_TYPE_EVD: - max = DCNT_EVD_ALL_COUNTERS; - p_cntrs = ((DAPL_EVD *) dh)->cntrs; - break; - default: - return DAT_INVALID_HANDLE; - } - - for (i = 0; i < max; i++) { - if ((counter == i) || (counter == max)) { - ((DAT_UINT64 *) p_cntrs_out)[i] = p_cntrs[i]; - if (reset) - p_cntrs[i] = 0; - } - } - return DAT_SUCCESS; -} - -char *dapl_query_counter_name(DAT_HANDLE dh, int counter) -{ - DAT_HANDLE_TYPE type = 0; - - dat_get_handle_type(dh, &type); - - switch (type) { - case DAT_HANDLE_TYPE_IA: - if (counter < DCNT_IA_ALL_COUNTERS) - return ia_cntr_names[counter]; - break; - case DAT_HANDLE_TYPE_EP: - if (counter < DCNT_EP_ALL_COUNTERS) - return ep_cntr_names[counter]; - break; - case DAT_HANDLE_TYPE_EVD: - if (counter < DCNT_EVD_ALL_COUNTERS) - return evd_cntr_names[counter]; - break; - default: - return NULL; - } - return NULL; -} - -void dapl_print_counter(DAT_HANDLE dh, int counter, int reset) -{ - int i, max; - DAT_UINT64 *p_cntrs; - DAT_HANDLE_TYPE type = 0; - - dat_get_handle_type(dh, &type); - - switch (type) { - case DAT_HANDLE_TYPE_IA: - max = DCNT_IA_ALL_COUNTERS; - p_cntrs = ((DAPL_IA *) dh)->cntrs; - break; - case DAT_HANDLE_TYPE_EP: - max = DCNT_EP_ALL_COUNTERS; - p_cntrs = ((DAPL_EP *) dh)->cntrs; - break; - case DAT_HANDLE_TYPE_EVD: - max = DCNT_EVD_ALL_COUNTERS; - p_cntrs = ((DAPL_EVD *) dh)->cntrs; - break; - default: - return; - } - - for (i = 0; i < max; i++) { - if ((counter == i) || (counter == max)) { - printf(" %s = " F64u " \n", - dapl_query_counter_name(dh, i), p_cntrs[i]); - if (reset) - p_cntrs[i] = 0; - } - } - - /* Print in process CR's for this IA, if debug type set */ - if ((type == DAT_HANDLE_TYPE_IA) && - (g_dapl_dbg_type & DAPL_DBG_TYPE_CM_LIST)) { - dapls_print_cm_list((DAPL_IA*)dh); - } - return; -} - -#endif /* DAPL_COUNTERS */ +/* + * Copyright (c) 2002-2003, Network Appliance, Inc. All rights reserved. + * + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + */ + +#include "dapl.h" +#if !defined(__KDAPL__) +#include +#include +#endif /* __KDAPL__ */ + +DAPL_DBG_TYPE g_dapl_dbg_type; /* initialized in dapl_init.c */ +DAPL_DBG_DEST g_dapl_dbg_dest; /* initialized in dapl_init.c */ + +static char *_ptr_host_ = NULL; +static char _hostname_[128]; +static DAPL_OS_TIMEVAL start_t, current_t, last_t; /* microsecond timeStamp STDOUT */ +static int delta_t, total_t; + +void dapl_internal_dbg_log(DAPL_DBG_TYPE type, const char *fmt, ...) +{ + va_list args; + + if (_ptr_host_ == NULL) { + gethostname(_hostname_, sizeof(_hostname_)); + _ptr_host_ = _hostname_; + dapl_os_get_time(&start_t); + last_t = start_t; + } + dapl_os_get_time(¤t_t); + delta_t = current_t - last_t; + total_t = current_t - start_t; + last_t = current_t; + + if (type & g_dapl_dbg_type) { + if (DAPL_DBG_DEST_STDOUT & g_dapl_dbg_dest) { + va_start(args, fmt); + fprintf(stdout, "%s:%x:%x: %d us(%d us%s): ", + _ptr_host_, dapl_os_getpid(), dapl_os_gettid(), + total_t, delta_t, delta_t > 500000 ? "!!!":""); + dapl_os_vprintf(fmt, args); + va_end(args); + } + + if (DAPL_DBG_DEST_SYSLOG & g_dapl_dbg_dest) { + va_start(args, fmt); + dapl_os_syslog(fmt, args); + va_end(args); + } + } +} + +#ifdef DAPL_COUNTERS + +/* + * The order of this list must match the DAT counter definitions + */ +static char *ia_cntr_names[] = { + "DCNT_IA_PZ_CREATE", + "DCNT_IA_PZ_FREE", + "DCNT_IA_LMR_CREATE", + "DCNT_IA_LMR_FREE", + "DCNT_IA_RMR_CREATE", + "DCNT_IA_RMR_FREE", + "DCNT_IA_PSP_CREATE", + "DCNT_IA_PSP_CREATE_ANY", + "DCNT_IA_PSP_FREE", + "DCNT_IA_RSP_CREATE", + "DCNT_IA_RSP_FREE", + "DCNT_IA_EVD_CREATE", + "DCNT_IA_EVD_FREE", + "DCNT_IA_EP_CREATE", + "DCNT_IA_EP_FREE", + "DCNT_IA_SRQ_CREATE", + "DCNT_IA_SRQ_FREE", + "DCNT_IA_SP_CR", + "DCNT_IA_SP_CR_ACCEPTED", + "DCNT_IA_SP_CR_REJECTED", + "DCNT_IA_MEM_ALLOC", + "DCNT_IA_MEM_ALLOC_DATA", + "DCNT_IA_MEM_FREE", + "DCNT_IA_ASYNC_ERROR", + "DCNT_IA_ASYNC_QP_ERROR", + "DCNT_IA_ASYNC_CQ_ERROR" +}; + +static char *ep_cntr_names[] = { + "DCNT_EP_CONNECT", + "DCNT_EP_DISCONNECT", + "DCNT_EP_POST_SEND", + "DCNT_EP_POST_SEND_DATA", + "DCNT_EP_POST_SEND_UD", + "DCNT_EP_POST_SEND_UD_DATA", + "DCNT_EP_POST_RECV", + "DCNT_EP_POST_RECV_DATA", + "DCNT_EP_POST_WRITE", + "DCNT_EP_POST_WRITE_DATA", + "DCNT_EP_POST_WRITE_IMM", + "DCNT_EP_POST_WRITE_IMM_DATA", + "DCNT_EP_POST_READ", + "DCNT_EP_POST_READ_DATA", + "DCNT_EP_POST_CMP_SWAP", + "DCNT_EP_POST_FETCH_ADD", + "DCNT_EP_RECV", + "DCNT_EP_RECV_DATA", + "DCNT_EP_RECV_UD", + "DCNT_EP_RECV_UD_DATA", + "DCNT_EP_RECV_IMM", + "DCNT_EP_RECV_IMM_DATA", + "DCNT_EP_RECV_RDMA_IMM", + "DCNT_EP_RECV_RDMA_IMM_DATA", +}; + +static char *evd_cntr_names[] = { + "DCNT_EVD_WAIT", + "DCNT_EVD_WAIT_BLOCKED", + "DCNT_EVD_WAIT_NOTIFY", + "DCNT_EVD_DEQUEUE", + "DCNT_EVD_DEQUEUE_FOUND", + "DCNT_EVD_DEQUEUE_NOT_FOUND", + "DCNT_EVD_DEQUEUE_POLL", + "DCNT_EVD_DEQUEUE_POLL_FOUND", + "DCNT_EVD_CONN_CALLBACK", + "DCNT_EVD_DTO_CALLBACK", +}; + +DAT_RETURN dapl_query_counter(DAT_HANDLE dh, + int counter, void *p_cntrs_out, int reset) +{ + int i, max; + DAT_UINT64 *p_cntrs; + DAT_HANDLE_TYPE type = 0; + + dat_get_handle_type(dh, &type); + + switch (type) { + case DAT_HANDLE_TYPE_IA: + max = DCNT_IA_ALL_COUNTERS; + p_cntrs = ((DAPL_IA *) dh)->cntrs; + break; + case DAT_HANDLE_TYPE_EP: + max = DCNT_EP_ALL_COUNTERS; + p_cntrs = ((DAPL_EP *) dh)->cntrs; + break; + case DAT_HANDLE_TYPE_EVD: + max = DCNT_EVD_ALL_COUNTERS; + p_cntrs = ((DAPL_EVD *) dh)->cntrs; + break; + default: + return DAT_INVALID_HANDLE; + } + + for (i = 0; i < max; i++) { + if ((counter == i) || (counter == max)) { + ((DAT_UINT64 *) p_cntrs_out)[i] = p_cntrs[i]; + if (reset) + p_cntrs[i] = 0; + } + } + return DAT_SUCCESS; +} + +char *dapl_query_counter_name(DAT_HANDLE dh, int counter) +{ + DAT_HANDLE_TYPE type = 0; + + dat_get_handle_type(dh, &type); + + switch (type) { + case DAT_HANDLE_TYPE_IA: + if (counter < DCNT_IA_ALL_COUNTERS) + return ia_cntr_names[counter]; + break; + case DAT_HANDLE_TYPE_EP: + if (counter < DCNT_EP_ALL_COUNTERS) + return ep_cntr_names[counter]; + break; + case DAT_HANDLE_TYPE_EVD: + if (counter < DCNT_EVD_ALL_COUNTERS) + return evd_cntr_names[counter]; + break; + default: + return NULL; + } + return NULL; +} + +void dapl_print_counter(DAT_HANDLE dh, int counter, int reset) +{ + int i, max; + DAT_UINT64 *p_cntrs; + DAT_HANDLE_TYPE type = 0; + + dat_get_handle_type(dh, &type); + + switch (type) { + case DAT_HANDLE_TYPE_IA: + max = DCNT_IA_ALL_COUNTERS; + p_cntrs = ((DAPL_IA *) dh)->cntrs; + break; + case DAT_HANDLE_TYPE_EP: + max = DCNT_EP_ALL_COUNTERS; + p_cntrs = ((DAPL_EP *) dh)->cntrs; + break; + case DAT_HANDLE_TYPE_EVD: + max = DCNT_EVD_ALL_COUNTERS; + p_cntrs = ((DAPL_EVD *) dh)->cntrs; + break; + default: + return; + } + + for (i = 0; i < max; i++) { + if ((counter == i) || (counter == max)) { + printf(" %s = " F64u " \n", + dapl_query_counter_name(dh, i), p_cntrs[i]); + if (reset) + p_cntrs[i] = 0; + } + } + + /* Print in process CR's for this IA, if debug type set */ + if ((type == DAT_HANDLE_TYPE_IA) && + (g_dapl_dbg_type & DAPL_DBG_TYPE_CM_LIST)) { + dapls_print_cm_list((DAPL_IA*)dh); + } + return; +} + +#endif /* DAPL_COUNTERS */ diff --git a/branches/WOF2-3/ulp/dapl2/dapl/common/dapl_ep_free.c b/branches/WOF2-3/ulp/dapl2/dapl/common/dapl_ep_free.c index 32d50cce..d2f9b504 100644 --- a/branches/WOF2-3/ulp/dapl2/dapl/common/dapl_ep_free.c +++ b/branches/WOF2-3/ulp/dapl2/dapl/common/dapl_ep_free.c @@ -1,221 +1,224 @@ -/* - * Copyright (c) 2002-2003, Network Appliance, Inc. All rights reserved. - * - * This Software is licensed under one of the following licenses: - * - * 1) under the terms of the "Common Public License 1.0" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/cpl.php. - * - * 2) under the terms of the "The BSD License" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/bsd-license.php. - * - * 3) under the terms of the "GNU General Public License (GPL) Version 2" a - * copy of which is available from the Open Source Initiative, see - * http://www.opensource.org/licenses/gpl-license.php. - * - * Licensee has the right to choose one of the above licenses. - * - * Redistributions of source code must retain the above copyright - * notice and one of the license notices. - * - * Redistributions in binary form must reproduce both the above copyright - * notice, one of the license notices in the documentation - * and/or other materials provided with the distribution. - */ - -/********************************************************************** - * - * MODULE: dapl_ep_free.c - * - * PURPOSE: Endpoint management - * Description: Interfaces in this file are completely described in - * the DAPL 1.1 API, Chapter 6, section 5.4 - * - * $Id:$ - **********************************************************************/ - -#include "dapl.h" -#include "dapl_ia_util.h" -#include "dapl_ep_util.h" -#include "dapl_adapter_util.h" -#include "dapl_ring_buffer_util.h" -#include "dapl_timer_util.h" - -/* - * dapl_ep_free - * - * DAPL Requirements Version xxx, 6.5.3 - * - * Destroy an instance of the Endpoint - * - * Input: - * ep_handle - * - * Output: - * none - * - * Returns: - * DAT_SUCCESS - * DAT_INVALID_PARAMETER - * DAT_INVALID_STATE - */ -DAT_RETURN DAT_API dapl_ep_free(IN DAT_EP_HANDLE ep_handle) -{ - DAPL_EP *ep_ptr; - DAPL_IA *ia_ptr; - DAT_EP_PARAM *param; - dp_ib_cm_handle_t cm_ptr, next_cm_ptr; - ib_qp_state_t save_qp_state; - DAT_RETURN dat_status = DAT_SUCCESS; - - dapl_dbg_log(DAPL_DBG_TYPE_API | DAPL_DBG_TYPE_CM, - "dapl_ep_free (%p)\n", ep_handle); - - ep_ptr = (DAPL_EP *) ep_handle; - param = &ep_ptr->param; - - /* - * Verify parameter & state - */ - if (DAPL_BAD_HANDLE(ep_ptr, DAPL_MAGIC_EP)) { - dat_status = - DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP); - goto bail; - } - DAPL_CNTR(ep_ptr->header.owner_ia, DCNT_IA_EP_FREE); - - if (ep_ptr->param.ep_state == DAT_EP_STATE_RESERVED || - ep_ptr->param.ep_state == DAT_EP_STATE_PASSIVE_CONNECTION_PENDING || - ep_ptr->param.ep_state == DAT_EP_STATE_TENTATIVE_CONNECTION_PENDING) - { - dapl_dbg_log(DAPL_DBG_TYPE_WARN, - "--> dapl_ep_free: invalid state: %x, ep %p\n", - ep_ptr->param.ep_state, ep_ptr); - dat_status = DAT_ERROR(DAT_INVALID_STATE, - dapls_ep_state_subtype(ep_ptr)); - goto bail; - } - - ia_ptr = ep_ptr->header.owner_ia; - - /* If we are connected, issue a disconnect. If we are in the - * disconnect_pending state, disconnect with the ABRUPT flag - * set. - */ - - /* - * Invoke ep_disconnect to clean up outstanding connections - */ - (void)dapl_ep_disconnect(ep_ptr, DAT_CLOSE_ABRUPT_FLAG); - - /* Free all CM objects */ - cm_ptr = (dapl_llist_is_empty(&ep_ptr->cm_list_head) - ? NULL : dapl_llist_peek_head(&ep_ptr->cm_list_head)); - while (cm_ptr != NULL) { - dapl_log(DAPL_DBG_TYPE_EP, - "dapl_ep_free: Free CM: EP=%p CM=%p\n", - ep_ptr, cm_ptr); - - next_cm_ptr = dapl_llist_next_entry(&ep_ptr->cm_list_head, - &cm_ptr->list_entry); - dapls_cm_free(cm_ptr); /* blocking call */ - cm_ptr = next_cm_ptr; - } - - /* - * Do verification of parameters and the state change atomically. - */ - dapl_os_lock(&ep_ptr->header.lock); - -#ifdef DAPL_DBG - /* check if event pending and warn, don't assert, state is valid */ - if (ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECT_PENDING) { - dapl_dbg_log(DAPL_DBG_TYPE_WARN, " dat_ep_free WARNING: " - "EVENT PENDING on ep %p, disconnect " - "and wait before calling dat_ep_free\n", ep_ptr); - } -#endif - - if (ep_ptr->cxn_timer != NULL) { - dapls_timer_cancel(ep_ptr->cxn_timer); - dapl_os_free(ep_ptr->cxn_timer, sizeof(DAPL_OS_TIMER)); - ep_ptr->cxn_timer = NULL; - } - - /* Remove the EP from the IA */ - dapl_ia_unlink_ep(ia_ptr, ep_ptr); - - /* - * Update ref counts. Note the user may have used ep_modify - * to set handles to NULL. Set handles to NULL so this routine - * is idempotent. - */ - if (param->pz_handle != NULL) { - dapl_os_atomic_dec(&((DAPL_PZ *) param->pz_handle)-> - pz_ref_count); - param->pz_handle = NULL; - } - if (param->recv_evd_handle != NULL) { - dapl_os_atomic_dec(&((DAPL_EVD *) param->recv_evd_handle)-> - evd_ref_count); - param->recv_evd_handle = NULL; - } - if (param->request_evd_handle != NULL) { - dapl_os_atomic_dec(&((DAPL_EVD *) param->request_evd_handle)-> - evd_ref_count); - param->request_evd_handle = NULL; - } - if (param->connect_evd_handle != NULL) { - dapl_os_atomic_dec(&((DAPL_EVD *) param->connect_evd_handle)-> - evd_ref_count); - param->connect_evd_handle = NULL; - } - - /* - * Finish tearing everything down. - */ - dapl_dbg_log(DAPL_DBG_TYPE_EP | DAPL_DBG_TYPE_CM, - "dapl_ep_free: Free EP: %x, ep %p qp_state %x qp_handle %x\n", - ep_ptr->param.ep_state, - ep_ptr, ep_ptr->qp_state, ep_ptr->qp_handle); - /* - * Take care of the transport resource. Make a copy of the qp_state - * to prevent race conditions when we exit the lock. - */ - save_qp_state = ep_ptr->qp_state; - ep_ptr->qp_state = DAPL_QP_STATE_UNATTACHED; - dapl_os_unlock(&ep_ptr->header.lock); - - /* Free the QP. If the EP has never been used, the QP is invalid */ - if (save_qp_state != DAPL_QP_STATE_UNATTACHED) { - dat_status = dapls_ib_qp_free(ia_ptr, ep_ptr); - /* This should always succeed, but report to the user if - * there is a problem. The qp_state must be restored so - * they can try it again in the face of EINTR or similar - * where the QP is OK but the call couldn't complete. - */ - if (dat_status != DAT_SUCCESS) { - ep_ptr->qp_state = save_qp_state; - goto bail; - } - } - - dapls_ep_flush_cqs(ep_ptr); - - /* Free the resource */ - dapl_ep_dealloc(ep_ptr); - - bail: - return dat_status; - -} - -/* - * Local variables: - * c-indent-level: 4 - * c-basic-offset: 4 - * tab-width: 8 - * End: - */ +/* + * Copyright (c) 2002-2003, Network Appliance, Inc. All rights reserved. + * + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + */ + +/********************************************************************** + * + * MODULE: dapl_ep_free.c + * + * PURPOSE: Endpoint management + * Description: Interfaces in this file are completely described in + * the DAPL 1.1 API, Chapter 6, section 5.4 + * + * $Id:$ + **********************************************************************/ + +#include "dapl.h" +#include "dapl_ia_util.h" +#include "dapl_ep_util.h" +#include "dapl_adapter_util.h" +#include "dapl_ring_buffer_util.h" +#include "dapl_timer_util.h" + +/* + * dapl_ep_free + * + * DAPL Requirements Version xxx, 6.5.3 + * + * Destroy an instance of the Endpoint + * + * Input: + * ep_handle + * + * Output: + * none + * + * Returns: + * DAT_SUCCESS + * DAT_INVALID_PARAMETER + * DAT_INVALID_STATE + */ +DAT_RETURN DAT_API dapl_ep_free(IN DAT_EP_HANDLE ep_handle) +{ + DAPL_EP *ep_ptr; + DAPL_IA *ia_ptr; + DAT_EP_PARAM *param; + dp_ib_cm_handle_t cm_ptr, next_cm_ptr; + ib_qp_state_t save_qp_state; + DAT_RETURN dat_status = DAT_SUCCESS; + + dapl_dbg_log(DAPL_DBG_TYPE_API | DAPL_DBG_TYPE_CM, + "dapl_ep_free (%p)\n", ep_handle); + + ep_ptr = (DAPL_EP *) ep_handle; + param = &ep_ptr->param; + + /* + * Verify parameter & state + */ + if (DAPL_BAD_HANDLE(ep_ptr, DAPL_MAGIC_EP)) { + dat_status = + DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP); + goto bail; + } + DAPL_CNTR(ep_ptr->header.owner_ia, DCNT_IA_EP_FREE); + + if (ep_ptr->param.ep_state == DAT_EP_STATE_RESERVED || + ep_ptr->param.ep_state == DAT_EP_STATE_PASSIVE_CONNECTION_PENDING || + ep_ptr->param.ep_state == DAT_EP_STATE_TENTATIVE_CONNECTION_PENDING) + { + dapl_dbg_log(DAPL_DBG_TYPE_WARN, + "--> dapl_ep_free: invalid state: %x, ep %p\n", + ep_ptr->param.ep_state, ep_ptr); + dat_status = DAT_ERROR(DAT_INVALID_STATE, + dapls_ep_state_subtype(ep_ptr)); + goto bail; + } + + ia_ptr = ep_ptr->header.owner_ia; + + /* If we are connected, issue a disconnect. If we are in the + * disconnect_pending state, disconnect with the ABRUPT flag + * set. + */ + + /* + * Invoke ep_disconnect to clean up outstanding connections + */ + (void)dapl_ep_disconnect(ep_ptr, DAT_CLOSE_ABRUPT_FLAG); + + /* Free all CM objects */ + cm_ptr = (dapl_llist_is_empty(&ep_ptr->cm_list_head) + ? NULL : dapl_llist_peek_head(&ep_ptr->cm_list_head)); + while (cm_ptr != NULL) { + dapl_log(DAPL_DBG_TYPE_EP, + "dapl_ep_free: Free CM: EP=%p CM=%p\n", + ep_ptr, cm_ptr); + + next_cm_ptr = dapl_llist_next_entry(&ep_ptr->cm_list_head, + &cm_ptr->list_entry); + dapls_cm_free(cm_ptr); /* blocking call */ + cm_ptr = next_cm_ptr; + } + + /* + * Do verification of parameters and the state change atomically. + */ + dapl_os_lock(&ep_ptr->header.lock); + +#ifdef DAPL_DBG + /* check if event pending and warn, don't assert, state is valid */ + if (ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECT_PENDING) { + dapl_dbg_log(DAPL_DBG_TYPE_WARN, " dat_ep_free WARNING: " + "EVENT PENDING on ep %p, disconnect " + "and wait before calling dat_ep_free\n", ep_ptr); + } +#endif + + if (ep_ptr->cxn_timer != NULL) { + dapls_timer_cancel(ep_ptr->cxn_timer); + dapl_os_free(ep_ptr->cxn_timer, sizeof(DAPL_OS_TIMER)); + ep_ptr->cxn_timer = NULL; + } + + /* Remove the EP from the IA */ + dapl_ia_unlink_ep(ia_ptr, ep_ptr); + + /* + * Update ref counts. Note the user may have used ep_modify + * to set handles to NULL. Set handles to NULL so this routine + * is idempotent. + */ + if (param->pz_handle != NULL) { + dapl_os_atomic_dec(&((DAPL_PZ *) param->pz_handle)-> + pz_ref_count); + param->pz_handle = NULL; + } + if (param->connect_evd_handle != NULL) { + dapl_os_atomic_dec(&((DAPL_EVD *) param->connect_evd_handle)-> + evd_ref_count); + param->connect_evd_handle = NULL; + } + + /* + * Finish tearing everything down. + */ + dapl_dbg_log(DAPL_DBG_TYPE_EP | DAPL_DBG_TYPE_CM, + "dapl_ep_free: Free EP: %x, ep %p qp_state %x qp_handle %x\n", + ep_ptr->param.ep_state, + ep_ptr, ep_ptr->qp_state, ep_ptr->qp_handle); + /* + * Take care of the transport resource. Make a copy of the qp_state + * to prevent race conditions when we exit the lock. + */ + save_qp_state = ep_ptr->qp_state; + ep_ptr->qp_state = DAPL_QP_STATE_UNATTACHED; + dapl_os_unlock(&ep_ptr->header.lock); + + /* Free the QP. If the EP has never been used, the QP is invalid */ + if (save_qp_state != DAPL_QP_STATE_UNATTACHED) { + dat_status = dapls_ib_qp_free(ia_ptr, ep_ptr); + /* This should always succeed, but report to the user if + * there is a problem. The qp_state must be restored so + * they can try it again in the face of EINTR or similar + * where the QP is OK but the call couldn't complete. + */ + if (dat_status != DAT_SUCCESS) { + ep_ptr->qp_state = save_qp_state; + goto bail; + } + } + + /* + * Release the EVD handles after we destroy the QP, so we can flush all + * QP entries. + */ + if (param->recv_evd_handle != NULL) { + dapl_os_atomic_dec(&((DAPL_EVD *) param->recv_evd_handle)-> + evd_ref_count); + param->recv_evd_handle = NULL; + } + if (param->request_evd_handle != NULL) { + dapl_os_atomic_dec(&((DAPL_EVD *) param->request_evd_handle)-> + evd_ref_count); + param->request_evd_handle = NULL; + } + + /* Free the resource */ + dapl_ep_dealloc(ep_ptr); + + bail: + return dat_status; + +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * tab-width: 8 + * End: + */ diff --git a/branches/WOF2-3/ulp/dapl2/dapl/common/dapl_ep_util.c b/branches/WOF2-3/ulp/dapl2/dapl/common/dapl_ep_util.c index fc911a6a..eb7cab24 100644 --- a/branches/WOF2-3/ulp/dapl2/dapl/common/dapl_ep_util.c +++ b/branches/WOF2-3/ulp/dapl2/dapl/common/dapl_ep_util.c @@ -1,636 +1,634 @@ -/* - * Copyright (c) 2002-2003, Network Appliance, Inc. All rights reserved. - * - * This Software is licensed under one of the following licenses: - * - * 1) under the terms of the "Common Public License 1.0" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/cpl.php. - * - * 2) under the terms of the "The BSD License" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/bsd-license.php. - * - * 3) under the terms of the "GNU General Public License (GPL) Version 2" a - * copy of which is available from the Open Source Initiative, see - * http://www.opensource.org/licenses/gpl-license.php. - * - * Licensee has the right to choose one of the above licenses. - * - * Redistributions of source code must retain the above copyright - * notice and one of the license notices. - * - * Redistributions in binary form must reproduce both the above copyright - * notice, one of the license notices in the documentation - * and/or other materials provided with the distribution. - */ - -/********************************************************************** - * - * MODULE: dapl_ep_util.c - * - * PURPOSE: Manage EP Info structure - * - * $Id:$ - **********************************************************************/ - -#include "dapl_ep_util.h" -#include "dapl_ring_buffer_util.h" -#include "dapl_cookie.h" -#include "dapl_adapter_util.h" -#include "dapl_evd_util.h" -#include "dapl_cr_util.h" /* for callback routine */ - -/* - * Local definitions - */ -/* - * Default number of I/O operations on an end point - */ -#define IB_IO_DEFAULT 16 -/* - * Default number of scatter/gather entries available to a single - * post send/recv - */ -#define IB_IOV_DEFAULT 4 - -/* - * Default number of RDMA operations in progress at a time - */ -#define IB_RDMA_DEFAULT 4 - -extern void dapli_ep_default_attrs(IN DAPL_EP * ep_ptr); - -char *dapl_get_ep_state_str(DAT_EP_STATE state) -{ -#ifdef DAPL_DBG - static char *state_str[DAT_EP_STATE_CONNECTED_MULTI_PATH + 1] = { - "DAT_EP_STATE_UNCONNECTED", /* quiescent state */ - "DAT_EP_STATE_UNCONFIGURED_UNCONNECTED", - "DAT_EP_STATE_RESERVED", - "DAT_EP_STATE_UNCONFIGURED_RESERVED", - "DAT_EP_STATE_PASSIVE_CONNECTION_PENDING", - "DAT_EP_STATE_UNCONFIGURED_PASSIVE", - "DAT_EP_STATE_ACTIVE_CONNECTION_PENDING", - "DAT_EP_STATE_TENTATIVE_CONNECTION_PENDING", - "DAT_EP_STATE_UNCONFIGURED_TENTATIVE", - "DAT_EP_STATE_CONNECTED", - "DAT_EP_STATE_DISCONNECT_PENDING", - "DAT_EP_STATE_DISCONNECTED", - "DAT_EP_STATE_COMPLETION_PENDING", - "DAT_EP_STATE_CONNECTED_SINGLE_PATH", - "DAT_EP_STATE_CONNECTED_MULTI_PATH" - }; - return state_str[state]; -#else - static char buf[12]; - sprintf(buf, "%d", state); - return buf; -#endif -} - -/* - * dapl_ep_alloc - * - * alloc and initialize an EP INFO struct - * - * Input: - * IA INFO struct ptr - * - * Output: - * ep_ptr - * - * Returns: - * none - * - */ -DAPL_EP *dapl_ep_alloc(IN DAPL_IA * ia_ptr, IN const DAT_EP_ATTR * ep_attr) -{ - DAPL_EP *ep_ptr; - - /* Allocate EP */ - ep_ptr = - (DAPL_EP *) dapl_os_alloc(sizeof(DAPL_EP) + sizeof(DAT_SOCK_ADDR)); - if (ep_ptr == NULL) { - goto bail; - } - - /* zero the structure */ - dapl_os_memzero(ep_ptr, sizeof(DAPL_EP) + sizeof(DAT_SOCK_ADDR)); - -#ifdef DAPL_COUNTERS - /* Allocate counters */ - ep_ptr->cntrs = - dapl_os_alloc(sizeof(DAT_UINT64) * DCNT_EP_ALL_COUNTERS); - if (ep_ptr->cntrs == NULL) { - dapl_os_free(ep_ptr, sizeof(DAPL_EP) + sizeof(DAT_SOCK_ADDR)); - return (NULL); - } - dapl_os_memzero(ep_ptr->cntrs, - sizeof(DAT_UINT64) * DCNT_EP_ALL_COUNTERS); -#endif /* DAPL_COUNTERS */ - - /* - * initialize the header - */ - ep_ptr->header.provider = ia_ptr->header.provider; - ep_ptr->header.magic = DAPL_MAGIC_EP; - ep_ptr->header.handle_type = DAT_HANDLE_TYPE_EP; - ep_ptr->header.owner_ia = ia_ptr; - ep_ptr->header.user_context.as_64 = 0; - ep_ptr->header.user_context.as_ptr = NULL; - - dapl_llist_init_entry(&ep_ptr->header.ia_list_entry); - dapl_llist_init_head(&ep_ptr->cm_list_head); - dapl_os_lock_init(&ep_ptr->header.lock); - - /* - * Initialize the body - */ - /* - * Set up default parameters if the user passed in a NULL - */ - if (ep_attr == NULL) { - dapli_ep_default_attrs(ep_ptr); - } else { - ep_ptr->param.ep_attr = *ep_attr; - } - - /* - * IBM OS API specific fields - */ - ep_ptr->qp_handle = IB_INVALID_HANDLE; - ep_ptr->qpn = 0; - ep_ptr->qp_state = DAPL_QP_STATE_UNATTACHED; - - if (DAT_SUCCESS != dapls_cb_create(&ep_ptr->req_buffer, - ep_ptr, - ep_ptr->param.ep_attr. - max_request_dtos)) { - dapl_ep_dealloc(ep_ptr); - ep_ptr = NULL; - goto bail; - } - - if (DAT_SUCCESS != dapls_cb_create(&ep_ptr->recv_buffer, - ep_ptr, - ep_ptr->param.ep_attr.max_recv_dtos)) - { - dapl_ep_dealloc(ep_ptr); - ep_ptr = NULL; - goto bail; - } - - dapls_io_trc_alloc(ep_ptr); - - bail: - return ep_ptr; -} - -/* - * dapl_ep_dealloc - * - * Free the passed in EP structure. - * - * Input: - * entry point pointer - * - * Output: - * none - * - * Returns: - * none - * - */ -void dapl_ep_dealloc(IN DAPL_EP * ep_ptr) -{ - dapl_os_assert(ep_ptr->header.magic == DAPL_MAGIC_EP); - - ep_ptr->header.magic = DAPL_MAGIC_INVALID; /* reset magic to prevent reuse */ - - dapls_cb_free(&ep_ptr->req_buffer); - dapls_cb_free(&ep_ptr->recv_buffer); - - if (NULL != ep_ptr->cxn_timer) { - dapl_os_free(ep_ptr->cxn_timer, sizeof(DAPL_OS_TIMER)); - } - -#ifdef DAPL_COUNTERS - dapl_os_free(ep_ptr->cntrs, sizeof(DAT_UINT64) * DCNT_EP_ALL_COUNTERS); -#endif /* DAPL_COUNTERS */ - - dapl_os_free(ep_ptr, sizeof(DAPL_EP) + sizeof(DAT_SOCK_ADDR)); -} - -/* - * dapl_ep_default_attrs - * - * Set default values in the parameter fields - * - * Input: - * entry point pointer - * - * Output: - * none - * - * Returns: - * none - * - */ -void dapli_ep_default_attrs(IN DAPL_EP * ep_ptr) -{ - DAT_EP_ATTR ep_attr_limit; - DAT_EP_ATTR *ep_attr; - DAT_RETURN dat_status; - - ep_attr = &ep_ptr->param.ep_attr; - /* Set up defaults */ - dapl_os_memzero(ep_attr, sizeof(DAT_EP_ATTR)); - - /* mtu and rdma sizes fixed in IB as per IBTA 1.1, 9.4.3, 9.4.4, 9.7.7. */ - ep_attr->max_mtu_size = 0x80000000; - ep_attr->max_rdma_size = 0x80000000; - - ep_attr->qos = DAT_QOS_BEST_EFFORT; - ep_attr->service_type = DAT_SERVICE_TYPE_RC; - ep_attr->max_recv_dtos = IB_IO_DEFAULT; - ep_attr->max_request_dtos = IB_IO_DEFAULT; - ep_attr->max_recv_iov = IB_IOV_DEFAULT; - ep_attr->max_request_iov = IB_IOV_DEFAULT; - ep_attr->max_rdma_read_in = IB_RDMA_DEFAULT; - ep_attr->max_rdma_read_out = IB_RDMA_DEFAULT; - - /* - * Configure the EP as a standard completion type, which will be - * used by the EVDs. A threshold of 1 is the default state of an - * EVD. - */ - ep_attr->request_completion_flags = DAT_COMPLETION_EVD_THRESHOLD_FLAG; - ep_attr->recv_completion_flags = DAT_COMPLETION_EVD_THRESHOLD_FLAG; - /* - * Unspecified defaults: - * - ep_privileges: No RDMA capabilities - * - num_transport_specific_params: none - * - transport_specific_params: none - * - num_provider_specific_params: 0 - * - provider_specific_params: 0 - */ - - dat_status = dapls_ib_query_hca(ep_ptr->header.owner_ia->hca_ptr, - NULL, &ep_attr_limit, NULL); - /* check against HCA maximums */ - if (dat_status == DAT_SUCCESS) { - ep_ptr->param.ep_attr.max_mtu_size = - DAPL_MIN(ep_ptr->param.ep_attr.max_mtu_size, - ep_attr_limit.max_mtu_size); - ep_ptr->param.ep_attr.max_rdma_size = - DAPL_MIN(ep_ptr->param.ep_attr.max_rdma_size, - ep_attr_limit.max_rdma_size); - ep_ptr->param.ep_attr.max_recv_dtos = - DAPL_MIN(ep_ptr->param.ep_attr.max_recv_dtos, - ep_attr_limit.max_recv_dtos); - ep_ptr->param.ep_attr.max_request_dtos = - DAPL_MIN(ep_ptr->param.ep_attr.max_request_dtos, - ep_attr_limit.max_request_dtos); - ep_ptr->param.ep_attr.max_recv_iov = - DAPL_MIN(ep_ptr->param.ep_attr.max_recv_iov, - ep_attr_limit.max_recv_iov); - ep_ptr->param.ep_attr.max_request_iov = - DAPL_MIN(ep_ptr->param.ep_attr.max_request_iov, - ep_attr_limit.max_request_iov); - ep_ptr->param.ep_attr.max_rdma_read_in = - DAPL_MIN(ep_ptr->param.ep_attr.max_rdma_read_in, - ep_attr_limit.max_rdma_read_in); - ep_ptr->param.ep_attr.max_rdma_read_out = - DAPL_MIN(ep_ptr->param.ep_attr.max_rdma_read_out, - ep_attr_limit.max_rdma_read_out); - } -} - -DAT_RETURN dapl_ep_check_recv_completion_flags(DAT_COMPLETION_FLAGS flags) -{ - - /* - * InfiniBand will not allow signal suppression for RECV completions, - * see the 1.0.1 spec section 10.7.3.1, 10.8.6. - * N.B. SIGNALLED has a different meaning in dapl than it does - * in IB; IB SIGNALLED is the same as DAPL SUPPRESS. DAPL - * SIGNALLED simply means the user will not get awakened when - * an EVD completes, even though the dapl handler is invoked. - */ - - if (flags & DAT_COMPLETION_SUPPRESS_FLAG) { - return DAT_INVALID_PARAMETER; - } - - return DAT_SUCCESS; -} - -DAT_RETURN dapl_ep_check_request_completion_flags(DAT_COMPLETION_FLAGS flags) -{ - return DAT_SUCCESS; -} - -DAT_RETURN -dapl_ep_post_send_req(IN DAT_EP_HANDLE ep_handle, - IN DAT_COUNT num_segments, - IN DAT_LMR_TRIPLET * local_iov, - IN DAT_DTO_COOKIE user_cookie, - IN const DAT_RMR_TRIPLET * remote_iov, - IN DAT_COMPLETION_FLAGS completion_flags, - IN DAPL_DTO_TYPE dto_type, IN int op_type) -{ - DAPL_EP *ep_ptr; - DAPL_COOKIE *cookie; - DAT_RETURN dat_status; - - if (DAPL_BAD_HANDLE(ep_handle, DAPL_MAGIC_EP)) { - dat_status = - DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP); - goto bail; - } - - ep_ptr = (DAPL_EP *) ep_handle; - - /* - * Synchronization ok since this buffer is only used for send - * requests, which aren't allowed to race with each other. - */ - dat_status = dapls_dto_cookie_alloc(&ep_ptr->req_buffer, - dto_type, user_cookie, &cookie); - if (dat_status != DAT_SUCCESS) { - dapl_log(DAPL_DBG_TYPE_ERR, - " dapl_post_req resource ERR:" - " dtos pending = %d, max_dtos %d, max_cb %d hd %d tl %d\n", - dapls_cb_pending(&ep_ptr->req_buffer), - ep_ptr->param.ep_attr.max_request_dtos, - ep_ptr->req_buffer.pool_size, - ep_ptr->req_buffer.head, ep_ptr->req_buffer.tail); - - goto bail; - } - - /* - * Invoke provider specific routine to post DTO - */ - dat_status = dapls_ib_post_send(ep_ptr, - op_type, - cookie, - num_segments, - local_iov, - remote_iov, completion_flags); - - if (dat_status != DAT_SUCCESS) { - dapls_cookie_dealloc(&ep_ptr->req_buffer, cookie); - } - - bail: - return dat_status; -} - -/* - * dapli_ep_timeout - * - * If this routine is invoked before a connection occurs, generate an - * event - */ -void dapls_ep_timeout(uintptr_t arg) -{ - DAPL_EP *ep_ptr; - ib_cm_events_t ib_cm_event; - - dapl_dbg_log(DAPL_DBG_TYPE_CM, "--> dapls_ep_timeout! ep %lx\n", arg); - - ep_ptr = (DAPL_EP *) arg; - - /* reset the EP state */ - ep_ptr->param.ep_state = DAT_EP_STATE_DISCONNECTED; - - /* Clean up the EP and put the underlying QP into the ERROR state. - * The disconnect_clean interface requires the provided dependent - *cm event number. - */ - ib_cm_event = dapls_ib_get_cm_event(DAT_CONNECTION_EVENT_TIMED_OUT); - dapls_ib_disconnect_clean(ep_ptr, DAT_TRUE, ib_cm_event); - - (void)dapls_evd_post_connection_event((DAPL_EVD *) ep_ptr->param. - connect_evd_handle, - DAT_CONNECTION_EVENT_TIMED_OUT, - (DAT_HANDLE) ep_ptr, 0, 0); -} - -/* - * dapls_ep_state_subtype - * - * Return the INVALID_STATE connection subtype associated with an - * INVALID_STATE on an EP. Strictly for error reporting. - */ -DAT_RETURN_SUBTYPE dapls_ep_state_subtype(IN DAPL_EP * ep_ptr) -{ - DAT_RETURN_SUBTYPE dat_status; - - switch (ep_ptr->param.ep_state) { - case DAT_EP_STATE_UNCONNECTED: - { - dat_status = DAT_INVALID_STATE_EP_UNCONNECTED; - break; - } - case DAT_EP_STATE_RESERVED: - { - dat_status = DAT_INVALID_STATE_EP_RESERVED; - break; - } - case DAT_EP_STATE_PASSIVE_CONNECTION_PENDING: - { - dat_status = DAT_INVALID_STATE_EP_PASSCONNPENDING; - break; - } - case DAT_EP_STATE_ACTIVE_CONNECTION_PENDING: - { - dat_status = DAT_INVALID_STATE_EP_ACTCONNPENDING; - break; - } - case DAT_EP_STATE_TENTATIVE_CONNECTION_PENDING: - { - dat_status = DAT_INVALID_STATE_EP_TENTCONNPENDING; - break; - } - case DAT_EP_STATE_CONNECTED: - { - dat_status = DAT_INVALID_STATE_EP_CONNECTED; - break; - } - case DAT_EP_STATE_DISCONNECT_PENDING: - { - dat_status = DAT_INVALID_STATE_EP_DISCPENDING; - break; - } - case DAT_EP_STATE_DISCONNECTED: - { - dat_status = DAT_INVALID_STATE_EP_DISCONNECTED; - break; - } - case DAT_EP_STATE_COMPLETION_PENDING: - { - dat_status = DAT_INVALID_STATE_EP_COMPLPENDING; - break; - } - - default: - { - dat_status = 0; - break; - } - } - - return dat_status; -} - -#ifdef DAPL_DBG_IO_TRC -/* allocate trace buffer */ -void dapls_io_trc_alloc(DAPL_EP * ep_ptr) -{ - DAT_RETURN dat_status; - int i; - struct io_buf_track *ibt; - - ep_ptr->ibt_dumped = 0; /* bool to control how often we print */ - dat_status = dapls_rbuf_alloc(&ep_ptr->ibt_queue, DBG_IO_TRC_QLEN); - if (dat_status != DAT_SUCCESS) { - goto bail; - } - ibt = - (struct io_buf_track *)dapl_os_alloc(sizeof(struct io_buf_track) * - DBG_IO_TRC_QLEN); - - if (dat_status != DAT_SUCCESS) { - dapls_rbuf_destroy(&ep_ptr->ibt_queue); - goto bail; - } - ep_ptr->ibt_base = ibt; - dapl_os_memzero(ibt, sizeof(struct io_buf_track) * DBG_IO_TRC_QLEN); - - /* add events to free event queue */ - for (i = 0; i < DBG_IO_TRC_QLEN; i++) { - dapls_rbuf_add(&ep_ptr->ibt_queue, ibt++); - } - bail: - return; -} -#endif /* DAPL_DBG_IO_TRC */ - -/* - * Generate a disconnect event on abruct close for older verbs providers - * that do not do it automatically. - */ - -void -dapl_ep_legacy_post_disconnect(DAPL_EP * ep_ptr, - DAT_CLOSE_FLAGS disconnect_flags) -{ - ib_cm_events_t ib_cm_event; - DAPL_CR *cr_ptr; - dp_ib_cm_handle_t cm_ptr; - - /* - * Acquire the lock and make sure we didn't get a callback - * that cleaned up. - */ - dapl_os_lock(&ep_ptr->header.lock); - if (disconnect_flags == DAT_CLOSE_ABRUPT_FLAG && - ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECT_PENDING) { - /* - * If this is an ABRUPT close, the provider will not generate - * a disconnect message so we do it manually here. Just invoke - * the CM callback as it will clean up the appropriate - * data structures, reset the state, and generate the event - * on the way out. Obtain the provider dependent cm_event to - * pass into the callback for a disconnect. - */ - ib_cm_event = - dapls_ib_get_cm_event(DAT_CONNECTION_EVENT_DISCONNECTED); - - cr_ptr = ep_ptr->cr_ptr; - cm_ptr = (dapl_llist_is_empty(&ep_ptr->cm_list_head) - ? NULL : dapl_llist_peek_head(&ep_ptr->cm_list_head)); - dapl_os_unlock(&ep_ptr->header.lock); - - if (cr_ptr != NULL) { - dapl_dbg_log(DAPL_DBG_TYPE_API | DAPL_DBG_TYPE_CM, - " dapl_ep_disconnect force callback on EP %p CM handle %x\n", - ep_ptr, cr_ptr->ib_cm_handle); - - dapls_cr_callback(cr_ptr->ib_cm_handle, - ib_cm_event, NULL, 0, cr_ptr->sp_ptr); - } else { - dapl_evd_connection_callback(cm_ptr, - ib_cm_event, - NULL, 0, (void *)ep_ptr); - } - } else { - dapl_os_unlock(&ep_ptr->header.lock); - } -} - -/* - * dapl_ep_link_cm - * - * Add linking of provider's CM object to a EP structure - * This enables multiple CM's per EP, and syncronization - * - * Input: - * DAPL_EP *ep_ptr - * dp_ib_cm_handle_t *cm_ptr defined in provider's dapl_util.h - * - * CM objects linked with EP using ->list_entry - * Output: - * none - * - * Returns: - * none - * - */ -void dapl_ep_link_cm(IN DAPL_EP *ep_ptr, IN dp_ib_cm_handle_t cm_ptr) -{ - dapl_os_lock(&ep_ptr->header.lock); - dapls_cm_acquire(cm_ptr); - dapl_llist_add_tail(&ep_ptr->cm_list_head, &cm_ptr->list_entry, cm_ptr); - dapl_os_unlock(&ep_ptr->header.lock); -} - -void dapl_ep_unlink_cm(IN DAPL_EP *ep_ptr, IN dp_ib_cm_handle_t cm_ptr) -{ - dapl_os_lock(&ep_ptr->header.lock); - dapl_llist_remove_entry(&ep_ptr->cm_list_head, &cm_ptr->list_entry); - dapls_cm_release(cm_ptr); - dapl_os_unlock(&ep_ptr->header.lock); -} - -static void dapli_ep_flush_evd(DAPL_EVD *evd_ptr) -{ - DAT_RETURN dat_status; - - dapl_os_lock(&evd_ptr->header.lock); - dat_status = dapls_evd_copy_cq(evd_ptr); - dapl_os_unlock(&evd_ptr->header.lock); - - if (dat_status == DAT_QUEUE_FULL) - dapls_evd_post_overflow_event(evd_ptr); -} - -void dapls_ep_flush_cqs(DAPL_EP * ep_ptr) -{ - if (ep_ptr->param.request_evd_handle) - dapli_ep_flush_evd((DAPL_EVD *) ep_ptr->param.request_evd_handle); - - if (ep_ptr->param.recv_evd_handle) - dapli_ep_flush_evd((DAPL_EVD *) ep_ptr->param.recv_evd_handle); -} - -/* - * Local variables: - * c-indent-level: 4 - * c-basic-offset: 4 - * tab-width: 8 - * End: - */ +/* + * Copyright (c) 2002-2003, Network Appliance, Inc. All rights reserved. + * + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + */ + +/********************************************************************** + * + * MODULE: dapl_ep_util.c + * + * PURPOSE: Manage EP Info structure + * + * $Id:$ + **********************************************************************/ + +#include "dapl_ep_util.h" +#include "dapl_ring_buffer_util.h" +#include "dapl_cookie.h" +#include "dapl_adapter_util.h" +#include "dapl_evd_util.h" +#include "dapl_cr_util.h" /* for callback routine */ + +/* + * Local definitions + */ +/* + * Default number of I/O operations on an end point + */ +#define IB_IO_DEFAULT 16 +/* + * Default number of scatter/gather entries available to a single + * post send/recv + */ +#define IB_IOV_DEFAULT 4 + +/* + * Default number of RDMA operations in progress at a time + */ +#define IB_RDMA_DEFAULT 4 + +extern void dapli_ep_default_attrs(IN DAPL_EP * ep_ptr); + +char *dapl_get_ep_state_str(DAT_EP_STATE state) +{ +#ifdef DAPL_DBG + static char *state_str[DAT_EP_STATE_CONNECTED_MULTI_PATH + 1] = { + "DAT_EP_STATE_UNCONNECTED", /* quiescent state */ + "DAT_EP_STATE_UNCONFIGURED_UNCONNECTED", + "DAT_EP_STATE_RESERVED", + "DAT_EP_STATE_UNCONFIGURED_RESERVED", + "DAT_EP_STATE_PASSIVE_CONNECTION_PENDING", + "DAT_EP_STATE_UNCONFIGURED_PASSIVE", + "DAT_EP_STATE_ACTIVE_CONNECTION_PENDING", + "DAT_EP_STATE_TENTATIVE_CONNECTION_PENDING", + "DAT_EP_STATE_UNCONFIGURED_TENTATIVE", + "DAT_EP_STATE_CONNECTED", + "DAT_EP_STATE_DISCONNECT_PENDING", + "DAT_EP_STATE_DISCONNECTED", + "DAT_EP_STATE_COMPLETION_PENDING", + "DAT_EP_STATE_CONNECTED_SINGLE_PATH", + "DAT_EP_STATE_CONNECTED_MULTI_PATH" + }; + return state_str[state]; +#else + static char buf[12]; + sprintf(buf, "%d", state); + return buf; +#endif +} + +/* + * dapl_ep_alloc + * + * alloc and initialize an EP INFO struct + * + * Input: + * IA INFO struct ptr + * + * Output: + * ep_ptr + * + * Returns: + * none + * + */ +DAPL_EP *dapl_ep_alloc(IN DAPL_IA * ia_ptr, IN const DAT_EP_ATTR * ep_attr) +{ + DAPL_EP *ep_ptr; + + /* Allocate EP */ + ep_ptr = + (DAPL_EP *) dapl_os_alloc(sizeof(DAPL_EP) + sizeof(DAT_SOCK_ADDR)); + if (ep_ptr == NULL) { + goto bail; + } + + /* zero the structure */ + dapl_os_memzero(ep_ptr, sizeof(DAPL_EP) + sizeof(DAT_SOCK_ADDR)); + +#ifdef DAPL_COUNTERS + /* Allocate counters */ + ep_ptr->cntrs = + dapl_os_alloc(sizeof(DAT_UINT64) * DCNT_EP_ALL_COUNTERS); + if (ep_ptr->cntrs == NULL) { + dapl_os_free(ep_ptr, sizeof(DAPL_EP) + sizeof(DAT_SOCK_ADDR)); + return (NULL); + } + dapl_os_memzero(ep_ptr->cntrs, + sizeof(DAT_UINT64) * DCNT_EP_ALL_COUNTERS); +#endif /* DAPL_COUNTERS */ + + /* + * initialize the header + */ + ep_ptr->header.provider = ia_ptr->header.provider; + ep_ptr->header.magic = DAPL_MAGIC_EP; + ep_ptr->header.handle_type = DAT_HANDLE_TYPE_EP; + ep_ptr->header.owner_ia = ia_ptr; + ep_ptr->header.user_context.as_64 = 0; + ep_ptr->header.user_context.as_ptr = NULL; + + dapl_llist_init_entry(&ep_ptr->header.ia_list_entry); + dapl_llist_init_head(&ep_ptr->cm_list_head); + dapl_os_lock_init(&ep_ptr->header.lock); + + /* + * Initialize the body + */ + /* + * Set up default parameters if the user passed in a NULL + */ + if (ep_attr == NULL) { + dapli_ep_default_attrs(ep_ptr); + } else { + ep_ptr->param.ep_attr = *ep_attr; + } + + /* + * IBM OS API specific fields + */ + ep_ptr->qp_handle = IB_INVALID_HANDLE; + ep_ptr->qpn = 0; + ep_ptr->qp_state = DAPL_QP_STATE_UNATTACHED; + + if (DAT_SUCCESS != dapls_cb_create(&ep_ptr->req_buffer, + ep_ptr, + ep_ptr->param.ep_attr. + max_request_dtos)) { + dapl_ep_dealloc(ep_ptr); + ep_ptr = NULL; + goto bail; + } + + if (DAT_SUCCESS != dapls_cb_create(&ep_ptr->recv_buffer, + ep_ptr, + ep_ptr->param.ep_attr.max_recv_dtos)) + { + dapl_ep_dealloc(ep_ptr); + ep_ptr = NULL; + goto bail; + } + + dapls_io_trc_alloc(ep_ptr); + + bail: + return ep_ptr; +} + +/* + * dapl_ep_dealloc + * + * Free the passed in EP structure. + * + * Input: + * entry point pointer + * + * Output: + * none + * + * Returns: + * none + * + */ +void dapl_ep_dealloc(IN DAPL_EP * ep_ptr) +{ + dapl_os_assert(ep_ptr->header.magic == DAPL_MAGIC_EP); + + ep_ptr->header.magic = DAPL_MAGIC_INVALID; /* reset magic to prevent reuse */ + + dapls_cb_free(&ep_ptr->req_buffer); + dapls_cb_free(&ep_ptr->recv_buffer); + + if (NULL != ep_ptr->cxn_timer) { + dapl_os_free(ep_ptr->cxn_timer, sizeof(DAPL_OS_TIMER)); + } + +#ifdef DAPL_COUNTERS + dapl_os_free(ep_ptr->cntrs, sizeof(DAT_UINT64) * DCNT_EP_ALL_COUNTERS); +#endif /* DAPL_COUNTERS */ + + dapl_os_free(ep_ptr, sizeof(DAPL_EP) + sizeof(DAT_SOCK_ADDR)); +} + +/* + * dapl_ep_default_attrs + * + * Set default values in the parameter fields + * + * Input: + * entry point pointer + * + * Output: + * none + * + * Returns: + * none + * + */ +void dapli_ep_default_attrs(IN DAPL_EP * ep_ptr) +{ + DAT_EP_ATTR ep_attr_limit; + DAT_EP_ATTR *ep_attr; + DAT_RETURN dat_status; + + ep_attr = &ep_ptr->param.ep_attr; + /* Set up defaults */ + dapl_os_memzero(ep_attr, sizeof(DAT_EP_ATTR)); + + /* mtu and rdma sizes fixed in IB as per IBTA 1.1, 9.4.3, 9.4.4, 9.7.7. */ + ep_attr->max_mtu_size = 0x80000000; + ep_attr->max_rdma_size = 0x80000000; + + ep_attr->qos = DAT_QOS_BEST_EFFORT; + ep_attr->service_type = DAT_SERVICE_TYPE_RC; + ep_attr->max_recv_dtos = IB_IO_DEFAULT; + ep_attr->max_request_dtos = IB_IO_DEFAULT; + ep_attr->max_recv_iov = IB_IOV_DEFAULT; + ep_attr->max_request_iov = IB_IOV_DEFAULT; + ep_attr->max_rdma_read_in = IB_RDMA_DEFAULT; + ep_attr->max_rdma_read_out = IB_RDMA_DEFAULT; + + /* + * Configure the EP as a standard completion type, which will be + * used by the EVDs. A threshold of 1 is the default state of an + * EVD. + */ + ep_attr->request_completion_flags = DAT_COMPLETION_EVD_THRESHOLD_FLAG; + ep_attr->recv_completion_flags = DAT_COMPLETION_EVD_THRESHOLD_FLAG; + /* + * Unspecified defaults: + * - ep_privileges: No RDMA capabilities + * - num_transport_specific_params: none + * - transport_specific_params: none + * - num_provider_specific_params: 0 + * - provider_specific_params: 0 + */ + + dat_status = dapls_ib_query_hca(ep_ptr->header.owner_ia->hca_ptr, + NULL, &ep_attr_limit, NULL); + /* check against HCA maximums */ + if (dat_status == DAT_SUCCESS) { + ep_ptr->param.ep_attr.max_mtu_size = + DAPL_MIN(ep_ptr->param.ep_attr.max_mtu_size, + ep_attr_limit.max_mtu_size); + ep_ptr->param.ep_attr.max_rdma_size = + DAPL_MIN(ep_ptr->param.ep_attr.max_rdma_size, + ep_attr_limit.max_rdma_size); + ep_ptr->param.ep_attr.max_recv_dtos = + DAPL_MIN(ep_ptr->param.ep_attr.max_recv_dtos, + ep_attr_limit.max_recv_dtos); + ep_ptr->param.ep_attr.max_request_dtos = + DAPL_MIN(ep_ptr->param.ep_attr.max_request_dtos, + ep_attr_limit.max_request_dtos); + ep_ptr->param.ep_attr.max_recv_iov = + DAPL_MIN(ep_ptr->param.ep_attr.max_recv_iov, + ep_attr_limit.max_recv_iov); + ep_ptr->param.ep_attr.max_request_iov = + DAPL_MIN(ep_ptr->param.ep_attr.max_request_iov, + ep_attr_limit.max_request_iov); + ep_ptr->param.ep_attr.max_rdma_read_in = + DAPL_MIN(ep_ptr->param.ep_attr.max_rdma_read_in, + ep_attr_limit.max_rdma_read_in); + ep_ptr->param.ep_attr.max_rdma_read_out = + DAPL_MIN(ep_ptr->param.ep_attr.max_rdma_read_out, + ep_attr_limit.max_rdma_read_out); + } +} + +DAT_RETURN dapl_ep_check_recv_completion_flags(DAT_COMPLETION_FLAGS flags) +{ + + /* + * InfiniBand will not allow signal suppression for RECV completions, + * see the 1.0.1 spec section 10.7.3.1, 10.8.6. + * N.B. SIGNALLED has a different meaning in dapl than it does + * in IB; IB SIGNALLED is the same as DAPL SUPPRESS. DAPL + * SIGNALLED simply means the user will not get awakened when + * an EVD completes, even though the dapl handler is invoked. + */ + + if (flags & DAT_COMPLETION_SUPPRESS_FLAG) { + return DAT_INVALID_PARAMETER; + } + + return DAT_SUCCESS; +} + +DAT_RETURN dapl_ep_check_request_completion_flags(DAT_COMPLETION_FLAGS flags) +{ + return DAT_SUCCESS; +} + +DAT_RETURN +dapl_ep_post_send_req(IN DAT_EP_HANDLE ep_handle, + IN DAT_COUNT num_segments, + IN DAT_LMR_TRIPLET * local_iov, + IN DAT_DTO_COOKIE user_cookie, + IN const DAT_RMR_TRIPLET * remote_iov, + IN DAT_COMPLETION_FLAGS completion_flags, + IN DAPL_DTO_TYPE dto_type, IN int op_type) +{ + DAPL_EP *ep_ptr; + DAPL_COOKIE *cookie; + DAT_RETURN dat_status; + + if (DAPL_BAD_HANDLE(ep_handle, DAPL_MAGIC_EP)) { + dat_status = + DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP); + goto bail; + } + + ep_ptr = (DAPL_EP *) ep_handle; + + /* + * Synchronization ok since this buffer is only used for send + * requests, which aren't allowed to race with each other. + */ + dat_status = dapls_dto_cookie_alloc(&ep_ptr->req_buffer, + dto_type, user_cookie, &cookie); + if (dat_status != DAT_SUCCESS) { + dapl_log(DAPL_DBG_TYPE_ERR, + " dapl_post_req resource ERR:" + " dtos pending = %d, max_dtos %d, max_cb %d hd %d tl %d\n", + dapls_cb_pending(&ep_ptr->req_buffer), + ep_ptr->param.ep_attr.max_request_dtos, + ep_ptr->req_buffer.pool_size, + ep_ptr->req_buffer.head, ep_ptr->req_buffer.tail); + + goto bail; + } + + /* + * Invoke provider specific routine to post DTO + */ + dat_status = dapls_ib_post_send(ep_ptr, + op_type, + cookie, + num_segments, + local_iov, + remote_iov, completion_flags); + + if (dat_status != DAT_SUCCESS) { + dapls_cookie_dealloc(&ep_ptr->req_buffer, cookie); + } + + bail: + return dat_status; +} + +/* + * dapli_ep_timeout + * + * If this routine is invoked before a connection occurs, generate an + * event + */ +void dapls_ep_timeout(uintptr_t arg) +{ + DAPL_EP *ep_ptr; + ib_cm_events_t ib_cm_event; + + dapl_dbg_log(DAPL_DBG_TYPE_CM, "--> dapls_ep_timeout! ep %lx\n", arg); + + ep_ptr = (DAPL_EP *) arg; + + /* reset the EP state */ + ep_ptr->param.ep_state = DAT_EP_STATE_DISCONNECTED; + + /* Clean up the EP and put the underlying QP into the ERROR state. + * The disconnect_clean interface requires the provided dependent + *cm event number. + */ + ib_cm_event = dapls_ib_get_cm_event(DAT_CONNECTION_EVENT_TIMED_OUT); + dapls_ib_disconnect_clean(ep_ptr, DAT_TRUE, ib_cm_event); + + (void)dapls_evd_post_connection_event((DAPL_EVD *) ep_ptr->param. + connect_evd_handle, + DAT_CONNECTION_EVENT_TIMED_OUT, + (DAT_HANDLE) ep_ptr, 0, 0); +} + +/* + * dapls_ep_state_subtype + * + * Return the INVALID_STATE connection subtype associated with an + * INVALID_STATE on an EP. Strictly for error reporting. + */ +DAT_RETURN_SUBTYPE dapls_ep_state_subtype(IN DAPL_EP * ep_ptr) +{ + DAT_RETURN_SUBTYPE dat_status; + + switch (ep_ptr->param.ep_state) { + case DAT_EP_STATE_UNCONNECTED: + { + dat_status = DAT_INVALID_STATE_EP_UNCONNECTED; + break; + } + case DAT_EP_STATE_RESERVED: + { + dat_status = DAT_INVALID_STATE_EP_RESERVED; + break; + } + case DAT_EP_STATE_PASSIVE_CONNECTION_PENDING: + { + dat_status = DAT_INVALID_STATE_EP_PASSCONNPENDING; + break; + } + case DAT_EP_STATE_ACTIVE_CONNECTION_PENDING: + { + dat_status = DAT_INVALID_STATE_EP_ACTCONNPENDING; + break; + } + case DAT_EP_STATE_TENTATIVE_CONNECTION_PENDING: + { + dat_status = DAT_INVALID_STATE_EP_TENTCONNPENDING; + break; + } + case DAT_EP_STATE_CONNECTED: + { + dat_status = DAT_INVALID_STATE_EP_CONNECTED; + break; + } + case DAT_EP_STATE_DISCONNECT_PENDING: + { + dat_status = DAT_INVALID_STATE_EP_DISCPENDING; + break; + } + case DAT_EP_STATE_DISCONNECTED: + { + dat_status = DAT_INVALID_STATE_EP_DISCONNECTED; + break; + } + case DAT_EP_STATE_COMPLETION_PENDING: + { + dat_status = DAT_INVALID_STATE_EP_COMPLPENDING; + break; + } + + default: + { + dat_status = 0; + break; + } + } + + return dat_status; +} + +#ifdef DAPL_DBG_IO_TRC +/* allocate trace buffer */ +void dapls_io_trc_alloc(DAPL_EP * ep_ptr) +{ + DAT_RETURN dat_status; + int i; + struct io_buf_track *ibt; + + ep_ptr->ibt_dumped = 0; /* bool to control how often we print */ + dat_status = dapls_rbuf_alloc(&ep_ptr->ibt_queue, DBG_IO_TRC_QLEN); + if (dat_status != DAT_SUCCESS) { + goto bail; + } + ibt = + (struct io_buf_track *)dapl_os_alloc(sizeof(struct io_buf_track) * + DBG_IO_TRC_QLEN); + + if (dat_status != DAT_SUCCESS) { + dapls_rbuf_destroy(&ep_ptr->ibt_queue); + goto bail; + } + ep_ptr->ibt_base = ibt; + dapl_os_memzero(ibt, sizeof(struct io_buf_track) * DBG_IO_TRC_QLEN); + + /* add events to free event queue */ + for (i = 0; i < DBG_IO_TRC_QLEN; i++) { + dapls_rbuf_add(&ep_ptr->ibt_queue, ibt++); + } + bail: + return; +} +#endif /* DAPL_DBG_IO_TRC */ + +/* + * Generate a disconnect event on abruct close for older verbs providers + * that do not do it automatically. + */ + +void +dapl_ep_legacy_post_disconnect(DAPL_EP * ep_ptr, + DAT_CLOSE_FLAGS disconnect_flags) +{ + ib_cm_events_t ib_cm_event; + DAPL_CR *cr_ptr; + dp_ib_cm_handle_t cm_ptr; + + /* + * Acquire the lock and make sure we didn't get a callback + * that cleaned up. + */ + dapl_os_lock(&ep_ptr->header.lock); + if (disconnect_flags == DAT_CLOSE_ABRUPT_FLAG && + ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECT_PENDING) { + /* + * If this is an ABRUPT close, the provider will not generate + * a disconnect message so we do it manually here. Just invoke + * the CM callback as it will clean up the appropriate + * data structures, reset the state, and generate the event + * on the way out. Obtain the provider dependent cm_event to + * pass into the callback for a disconnect. + */ + ib_cm_event = + dapls_ib_get_cm_event(DAT_CONNECTION_EVENT_DISCONNECTED); + + cr_ptr = ep_ptr->cr_ptr; + cm_ptr = (dapl_llist_is_empty(&ep_ptr->cm_list_head) + ? NULL : dapl_llist_peek_head(&ep_ptr->cm_list_head)); + dapl_os_unlock(&ep_ptr->header.lock); + + if (cr_ptr != NULL) { + dapl_dbg_log(DAPL_DBG_TYPE_API | DAPL_DBG_TYPE_CM, + " dapl_ep_disconnect force callback on EP %p CM handle %x\n", + ep_ptr, cr_ptr->ib_cm_handle); + + dapls_cr_callback(cr_ptr->ib_cm_handle, + ib_cm_event, NULL, 0, cr_ptr->sp_ptr); + } else { + dapl_evd_connection_callback(cm_ptr, + ib_cm_event, + NULL, 0, (void *)ep_ptr); + } + } else { + dapl_os_unlock(&ep_ptr->header.lock); + } +} + +/* + * dapl_ep_link_cm + * + * Add linking of provider's CM object to a EP structure + * This enables multiple CM's per EP, and syncronization + * + * Input: + * DAPL_EP *ep_ptr + * dp_ib_cm_handle_t *cm_ptr defined in provider's dapl_util.h + * + * CM objects linked with EP using ->list_entry + * Output: + * none + * + * Returns: + * none + * + */ +void dapl_ep_link_cm(IN DAPL_EP *ep_ptr, IN dp_ib_cm_handle_t cm_ptr) +{ + dapl_os_lock(&ep_ptr->header.lock); + dapls_cm_acquire(cm_ptr); + dapl_llist_add_tail(&ep_ptr->cm_list_head, &cm_ptr->list_entry, cm_ptr); + dapl_os_unlock(&ep_ptr->header.lock); +} + +void dapl_ep_unlink_cm(IN DAPL_EP *ep_ptr, IN dp_ib_cm_handle_t cm_ptr) +{ + dapl_os_lock(&ep_ptr->header.lock); + dapl_llist_remove_entry(&ep_ptr->cm_list_head, &cm_ptr->list_entry); + dapls_cm_release(cm_ptr); + dapl_os_unlock(&ep_ptr->header.lock); +} + +static void dapli_ep_flush_evd(DAPL_EVD *evd_ptr) +{ + DAT_RETURN dat_status; + + dapl_os_lock(&evd_ptr->header.lock); + dat_status = dapls_evd_copy_cq(evd_ptr); + dapl_os_unlock(&evd_ptr->header.lock); + + if (dat_status == DAT_QUEUE_FULL) + dapls_evd_post_overflow_event(evd_ptr); +} + +void dapls_ep_flush_cqs(DAPL_EP * ep_ptr) +{ + dapli_ep_flush_evd((DAPL_EVD *) ep_ptr->param.request_evd_handle); + while (dapls_cb_pending(&ep_ptr->recv_buffer)) + dapli_ep_flush_evd((DAPL_EVD *) ep_ptr->param.recv_evd_handle); +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * tab-width: 8 + * End: + */ diff --git a/branches/WOF2-3/ulp/dapl2/dapl/common/dapl_evd_dequeue.c b/branches/WOF2-3/ulp/dapl2/dapl/common/dapl_evd_dequeue.c index 7632fe1e..071e0a67 100644 --- a/branches/WOF2-3/ulp/dapl2/dapl/common/dapl_evd_dequeue.c +++ b/branches/WOF2-3/ulp/dapl2/dapl/common/dapl_evd_dequeue.c @@ -1,134 +1,135 @@ -/* - * Copyright (c) 2002-2003, Network Appliance, Inc. All rights reserved. - * - * This Software is licensed under one of the following licenses: - * - * 1) under the terms of the "Common Public License 1.0" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/cpl.php. - * - * 2) under the terms of the "The BSD License" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/bsd-license.php. - * - * 3) under the terms of the "GNU General Public License (GPL) Version 2" a - * copy of which is available from the Open Source Initiative, see - * http://www.opensource.org/licenses/gpl-license.php. - * - * Licensee has the right to choose one of the above licenses. - * - * Redistributions of source code must retain the above copyright - * notice and one of the license notices. - * - * Redistributions in binary form must reproduce both the above copyright - * notice, one of the license notices in the documentation - * and/or other materials provided with the distribution. - */ - -/********************************************************************** - * - * MODULE: dapl_evd_dequeue.c - * - * PURPOSE: Event Management - * - * Description: Interfaces in this file are completely described in - * the uDAPL 1.1 API, Chapter 6, section 3 - * - * $Id:$ - **********************************************************************/ - -#include "dapl.h" -#include "dapl_ring_buffer_util.h" -#include "dapl_evd_util.h" - -/* - * dapl_evd_dequeue - * - * DAPL Requirements Version xxx, 6.3.2.7 - * - * Remove first element from an event dispatcher - * - * Input: - * evd_handle - * - * Output: - * event - * - * Returns: - * DAT_SUCCESS - * DAT_INVALID_HANDLE - * DAT_INVALID_PARAMETER - * DAT_INVALID_STATE - * DAT_QUEUE_EMPTY - */ - -DAT_RETURN DAT_API dapl_evd_dequeue(IN DAT_EVD_HANDLE evd_handle, - OUT DAT_EVENT * event) -{ - DAPL_EVD *evd_ptr; - DAT_EVENT *local_event; - DAT_RETURN dat_status; - - dapl_dbg_log(DAPL_DBG_TYPE_API, - "dapl_evd_dequeue (%p, %p)\n", evd_handle, event); - - evd_ptr = (DAPL_EVD *) evd_handle; - dat_status = DAT_SUCCESS; - - if (DAPL_BAD_HANDLE(evd_handle, DAPL_MAGIC_EVD)) { - dat_status = DAT_ERROR(DAT_INVALID_HANDLE, 0); - goto bail; - } - - if (event == NULL) { - dat_status = DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG2); - goto bail; - } - DAPL_CNTR(evd_ptr, DCNT_EVD_DEQUEUE); - - /* - * We need to dequeue under lock, as the IB OS Access API - * restricts us from having multiple threads in CQ poll, and the - * DAPL 1.1 API allows multiple threads in dat_evd_dequeue() - */ - dapl_os_lock(&evd_ptr->header.lock); - - /* - * Make sure there are no other waiters and the evd is active. - * Currently this means only the OPEN state is allowed. - */ - if (evd_ptr->evd_state != DAPL_EVD_STATE_OPEN || - evd_ptr->catastrophic_overflow) { - dapl_os_unlock(&evd_ptr->header.lock); - dat_status = DAT_ERROR(DAT_INVALID_STATE, 0); - goto bail; - } - - /* - * Try the EVD rbuf first; poll from the CQ only if that's empty. - * This keeps events in order if dat_evd_wait() has copied events - * from CQ to EVD. - */ - local_event = - (DAT_EVENT *) dapls_rbuf_remove(&evd_ptr->pending_event_queue); - if (local_event != NULL) { - *event = *local_event; - dat_status = dapls_rbuf_add(&evd_ptr->free_event_queue, - local_event); - DAPL_CNTR(evd_ptr, DCNT_EVD_DEQUEUE_FOUND); - - } else if (evd_ptr->ib_cq_handle != IB_INVALID_HANDLE) { - dat_status = dapls_evd_cq_poll_to_event(evd_ptr, event); - DAPL_CNTR(evd_ptr, DCNT_EVD_DEQUEUE_POLL); - } else { - dat_status = DAT_ERROR(DAT_QUEUE_EMPTY, 0); - DAPL_CNTR(evd_ptr, DCNT_EVD_DEQUEUE_NOT_FOUND); - } - - dapl_os_unlock(&evd_ptr->header.lock); - bail: - dapl_dbg_log(DAPL_DBG_TYPE_RTN, - "dapl_evd_dequeue () returns 0x%x\n", dat_status); - - return dat_status; -} +/* + * Copyright (c) 2002-2003, Network Appliance, Inc. All rights reserved. + * + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + */ + +/********************************************************************** + * + * MODULE: dapl_evd_dequeue.c + * + * PURPOSE: Event Management + * + * Description: Interfaces in this file are completely described in + * the uDAPL 1.1 API, Chapter 6, section 3 + * + * $Id:$ + **********************************************************************/ + +#include "dapl.h" +#include "dapl_ring_buffer_util.h" +#include "dapl_evd_util.h" + +/* + * dapl_evd_dequeue + * + * DAPL Requirements Version xxx, 6.3.2.7 + * + * Remove first element from an event dispatcher + * + * Input: + * evd_handle + * + * Output: + * event + * + * Returns: + * DAT_SUCCESS + * DAT_INVALID_HANDLE + * DAT_INVALID_PARAMETER + * DAT_INVALID_STATE + * DAT_QUEUE_EMPTY + */ + +DAT_RETURN DAT_API dapl_evd_dequeue(IN DAT_EVD_HANDLE evd_handle, + OUT DAT_EVENT * event) +{ + DAPL_EVD *evd_ptr; + DAT_EVENT *local_event; + DAT_RETURN dat_status; + + evd_ptr = (DAPL_EVD *) evd_handle; + dat_status = DAT_SUCCESS; + + if (DAPL_BAD_HANDLE(evd_handle, DAPL_MAGIC_EVD)) { + dat_status = DAT_ERROR(DAT_INVALID_HANDLE, 0); + goto bail; + } + + if (event == NULL) { + dat_status = DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG2); + goto bail; + } + DAPL_CNTR(evd_ptr, DCNT_EVD_DEQUEUE); + + /* + * We need to dequeue under lock, as the IB OS Access API + * restricts us from having multiple threads in CQ poll, and the + * DAPL 1.1 API allows multiple threads in dat_evd_dequeue() + */ + dapl_os_lock(&evd_ptr->header.lock); + + /* + * Make sure there are no other waiters and the evd is active. + * Currently this means only the OPEN state is allowed. + */ + if (evd_ptr->evd_state != DAPL_EVD_STATE_OPEN || + evd_ptr->catastrophic_overflow) { + dapl_os_unlock(&evd_ptr->header.lock); + dat_status = DAT_ERROR(DAT_INVALID_STATE, 0); + goto bail; + } + + /* + * Try the EVD rbuf first; poll from the CQ only if that's empty. + * This keeps events in order if dat_evd_wait() has copied events + * from CQ to EVD. + */ + local_event = + (DAT_EVENT *) dapls_rbuf_remove(&evd_ptr->pending_event_queue); + if (local_event != NULL) { + *event = *local_event; + dat_status = dapls_rbuf_add(&evd_ptr->free_event_queue, + local_event); + DAPL_CNTR(evd_ptr, DCNT_EVD_DEQUEUE_FOUND); + + } else if (evd_ptr->ib_cq_handle != IB_INVALID_HANDLE) { + dat_status = dapls_evd_cq_poll_to_event(evd_ptr, event); + DAPL_CNTR(evd_ptr, DCNT_EVD_DEQUEUE_POLL); + } else { + dat_status = DAT_ERROR(DAT_QUEUE_EMPTY, 0); + DAPL_CNTR(evd_ptr, DCNT_EVD_DEQUEUE_NOT_FOUND); + } + +#ifdef DAPL_DBG + if (dat_status == DAT_SUCCESS) + dapl_dbg_log(DAPL_DBG_TYPE_EVD, + "dapl_evd_dequeue() Event(%p) = 0x%x\n", + event->evd_handle, event->event_number); +#endif + dapl_os_unlock(&evd_ptr->header.lock); + bail: + + return dat_status; +} diff --git a/branches/WOF2-3/ulp/dapl2/dapl/ibal/dapl_ibal_qp.c b/branches/WOF2-3/ulp/dapl2/dapl/ibal/dapl_ibal_qp.c index e843829d..1dc9ab84 100644 --- a/branches/WOF2-3/ulp/dapl2/dapl/ibal/dapl_ibal_qp.c +++ b/branches/WOF2-3/ulp/dapl2/dapl/ibal/dapl_ibal_qp.c @@ -1,705 +1,713 @@ - -/* - * Copyright (c) 2005-2007 Intel Corporation. All rights reserved. - * Copyright (c) 2002, Network Appliance, Inc. All rights reserved. - * - * This Software is licensed under the terms of the "Common Public - * License" a copy of which is in the file LICENSE.txt in the root - * directory. The license is also available from the Open Source - * Initiative, see http://www.opensource.org/licenses/cpl.php. - * - */ - -/********************************************************************** - * - * MODULE: dapl_ibal_qp.c - * - * PURPOSE: IB QP routines for access to IBAL APIs - * - * $Id: dapl_ibal_qp.c 33 2005-07-11 19:51:17Z ftillier $ - * - **********************************************************************/ - -#include "dapl.h" -#include "dapl_adapter_util.h" -#include "dapl_evd_util.h" -#include "dapl_ibal_util.h" -#include "dapl_ep_util.h" - -#define DAPL_IBAL_QKEY 0 -#define DAPL_IBAL_START_PSN 0 - -extern DAT_RETURN -dapls_ib_cq_late_alloc ( IN ib_pd_handle_t pd_handle, - IN DAPL_EVD *evd_ptr ); - -static void -dapli_ib_qp_async_error_cb( IN ib_async_event_rec_t* p_err_rec ) -{ - DAPL_EP *ep_ptr = (DAPL_EP *)p_err_rec->context; - DAPL_EVD *evd_ptr; - DAPL_IA *ia_ptr; - dapl_ibal_ca_t *p_ca; - dapl_ibal_evd_cb_t *evd_cb; - - dapl_dbg_log (DAPL_DBG_TYPE_ERR,"--> DiQpAEC QP event %s qp ctx %p\n", - ib_get_async_event_str(p_err_rec->code), p_err_rec->context); - dapl_dbg_log (DAPL_DBG_TYPE_ERR,"--> DiQpAEC qp_handle %p qpn %u\n", - ((DAPL_EP *)p_err_rec->context)->qp_handle, - ((DAPL_EP *)p_err_rec->context)->qpn); - - /* - * Verify handles EP, EVD, and hca_handle - */ - if (DAPL_BAD_HANDLE (ep_ptr, DAPL_MAGIC_EP ) || - DAPL_BAD_HANDLE (ep_ptr->param.connect_evd_handle, DAPL_MAGIC_EVD)) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> DiQpAEC: invalid EP %p \n", ep_ptr); - return; - } - ia_ptr = ep_ptr->header.owner_ia; - evd_ptr = ia_ptr->async_error_evd; - - if (DAPL_BAD_HANDLE (evd_ptr, DAPL_MAGIC_EVD) || - ! (evd_ptr->evd_flags & DAT_EVD_ASYNC_FLAG)) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> DiQpAEC: invalid EVD %p \n", evd_ptr); - return; - } - p_ca = (dapl_ibal_ca_t *) ia_ptr->hca_ptr->ib_hca_handle; - if (p_ca == NULL) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> DiQpAEC: can't find %s HCA\n", - (ia_ptr->header.provider)->device_name); - return; - } - - /* find QP error callback using ia_ptr for context */ - evd_cb = dapli_find_evd_cb_by_context (ia_ptr, p_ca); - if ((evd_cb == NULL) || (evd_cb->pfn_async_qp_err_cb == NULL)) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> DiQpAEC: no ERROR cb on p_ca %p found\n", p_ca); - return; - } - - dapl_os_lock (&ep_ptr->header.lock); - ep_ptr->param.ep_state = DAT_EP_STATE_DISCONNECT_PENDING; - dapl_os_unlock (&ep_ptr->header.lock); - - /* force disconnect, QP error state, to insure DTO's get flushed */ - dapls_ib_disconnect ( ep_ptr, DAT_CLOSE_ABRUPT_FLAG ); - - /* maps to dapl_evd_qp_async_error_callback(), context is EP */ - evd_cb->pfn_async_qp_err_cb( (ib_hca_handle_t)p_ca, - ep_ptr->qp_handle, - (ib_error_record_t*)&p_err_rec->code, - ep_ptr ); -} - -/* - * dapls_ib_qp_alloc - * - * Alloc a QP - * - * Input: - * *ia_ptr pointer to DAPL IA - * *ep_ptr pointer to DAPL EP - * *ep_ctx_ptr pointer to DAPL EP context - * - * Output: - * none - * - * Returns: - * DAT_SUCCESS - * DAT_INSUFFICIENT_RESOURCES - * - */ -DAT_RETURN -dapls_ib_qp_alloc ( - IN DAPL_IA *ia_ptr, - IN DAPL_EP *ep_ptr, - IN DAPL_EP *ep_ctx_ptr) -{ - DAT_EP_ATTR *attr; - DAPL_EVD *recv_evd_ptr, *request_evd_ptr; - DAT_RETURN dat_status; - ib_api_status_t ib_status; - ib_qp_create_t qp_create; - ib_pd_handle_t ib_pd_handle; - ib_cq_handle_t cq_recv; - ib_cq_handle_t cq_send; - dapl_ibal_ca_t *p_ca; - dapl_ibal_port_t *p_active_port; - ib_qp_attr_t qp_attr; - dp_ib_cm_handle_t cm_ptr; - - attr = &ep_ptr->param.ep_attr; - - dapl_os_assert ( ep_ptr->param.pz_handle != NULL ); - - ib_pd_handle = ((DAPL_PZ *)ep_ptr->param.pz_handle)->pd_handle; - dapl_os_assert(ib_pd_handle); - recv_evd_ptr = (DAPL_EVD *) ep_ptr->param.recv_evd_handle; - request_evd_ptr = (DAPL_EVD *) ep_ptr->param.request_evd_handle; - - cq_recv = IB_INVALID_HANDLE; - cq_send = IB_INVALID_HANDLE; - - dapl_os_assert ( recv_evd_ptr != DAT_HANDLE_NULL ); - { - cq_recv = (ib_cq_handle_t) recv_evd_ptr->ib_cq_handle; - - if ((cq_recv == IB_INVALID_HANDLE) && - ( 0 != (recv_evd_ptr->evd_flags & ~DAT_EVD_SOFTWARE_FLAG) )) - { - dat_status = dapls_ib_cq_late_alloc ( ib_pd_handle, recv_evd_ptr); - if (dat_status != DAT_SUCCESS) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> %s: failed to create CQ\n","DsQA"); - return (dat_status); - } - - dat_status = dapls_set_cq_notify (ia_ptr, recv_evd_ptr); - - if (dat_status != DAT_SUCCESS) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> %s: failed to enable notify CQ\n","DsQA"); - return (dat_status); - } - - cq_recv = (ib_cq_handle_t) recv_evd_ptr->ib_cq_handle; - dapl_dbg_log (DAPL_DBG_TYPE_EP, - "--> DsQA: alloc_recv_CQ = %p\n", cq_recv); - - } - } - - dapl_os_assert ( request_evd_ptr != DAT_HANDLE_NULL ); - { - cq_send = (ib_cq_handle_t) request_evd_ptr->ib_cq_handle; - - if ((cq_send == IB_INVALID_HANDLE) && - ( 0 != (request_evd_ptr->evd_flags & ~DAT_EVD_SOFTWARE_FLAG) )) - { - dat_status = dapls_ib_cq_late_alloc (ib_pd_handle, request_evd_ptr); - if (dat_status != DAT_SUCCESS) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> %s: failed to create CQ\n","DsQA"); - return (dat_status); - } - - dat_status = dapls_set_cq_notify (ia_ptr, request_evd_ptr); - - if (dat_status != DAT_SUCCESS) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> %s: failed to enable notify CQ\n","DsQA"); - return (dat_status); - } - - cq_send = (ib_cq_handle_t) request_evd_ptr->ib_cq_handle; - dapl_dbg_log (DAPL_DBG_TYPE_EP, - "--> DsQA: alloc_send_CQ = %p\n", cq_send); - } - } - - /* - * Get the CA structure - */ - p_ca = (dapl_ibal_ca_t *) ia_ptr->hca_ptr->ib_hca_handle; - - dapl_os_memzero (&qp_create, sizeof (qp_create)); - qp_create.qp_type = IB_QPT_RELIABLE_CONN; - qp_create.sq_depth = attr->max_request_dtos; - qp_create.rq_depth = attr->max_recv_dtos; - qp_create.sq_sge = attr->max_recv_iov; - qp_create.rq_sge = attr->max_request_iov; - qp_create.h_sq_cq = cq_send; - qp_create.h_rq_cq = cq_recv; - qp_create.sq_signaled = FALSE; - - dapl_dbg_log (DAPL_DBG_TYPE_EP, - "--> DsQA: sqd,iov=%d,%d rqd,iov=%d,%d\n", - attr->max_request_dtos, attr->max_request_iov, - attr->max_recv_dtos, attr->max_recv_iov); - - ib_status = ib_create_qp ( - ib_pd_handle, - &qp_create, - (void *) ep_ctx_ptr /* context */, - dapli_ib_qp_async_error_cb, - &ep_ptr->qp_handle); - - if (ib_status != IB_SUCCESS) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, "--> DsQA: Create QP failed = %s\n", - ib_get_err_str(ib_status)); - return (DAT_INSUFFICIENT_RESOURCES); - } - /* EP-CM linking */ - cm_ptr = ibal_cm_alloc(); - if (!cm_ptr) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, "--> DsQA: Create CM failed\n"); - return (DAT_INSUFFICIENT_RESOURCES); - } - cm_ptr->ib_cm.h_qp = ep_ptr->qp_handle; - cm_ptr->ep = ep_ptr; - dapl_ep_link_cm(ep_ptr, cm_ptr); - - dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsQA: EP=%p, tEVD=%p, rEVD=%p QP=%p\n", - ep_ptr, ep_ptr->param.request_evd_handle, - ep_ptr->param.recv_evd_handle, - ep_ptr->qp_handle ); - - ep_ptr->qp_state = IB_QPS_RESET; - - p_active_port = dapli_ibal_get_port(p_ca,(uint8_t)ia_ptr->hca_ptr->port_num); - - if (NULL == p_active_port) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> DsQA: Port %d is not available = %d\n", - ia_ptr->hca_ptr->port_num, __LINE__); - return (DAT_INVALID_STATE); - } - - ib_status = dapls_modify_qp_state_to_init ( ep_ptr->qp_handle, - &ep_ptr->param.ep_attr, - p_active_port ); - - if ( ib_status != IB_SUCCESS ) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> DsQA: Change QP state to INIT failed = %s\n", - ib_get_err_str(ib_status)); - return (DAT_INVALID_HANDLE); - } - ib_status = ib_query_qp ( ep_ptr->qp_handle, &qp_attr ); - - ep_ptr->qp_state = qp_attr.state; - ep_ptr->qpn = qp_attr.num; - - dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsQAQA: EP:%p new_QP %p state %s\n", - ep_ptr, - ep_ptr->qp_handle, - ib_get_port_state_str(ep_ptr->qp_state)); - - return (DAT_SUCCESS); -} - - -/* - * dapls_ib_qp_free - * - * Free a QP - * - * Input: - * *ia_ptr pointer to IA structure - * *ep_ptr pointer to EP structure - * - * Output: - * none - * - * Returns: - * none - * - */ -DAT_RETURN -dapls_ib_qp_free ( - IN DAPL_IA *ia_ptr, - IN DAPL_EP *ep_ptr ) -{ - - UNREFERENCED_PARAMETER(ia_ptr); - - dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsQF: free %p, state %s\n", - ep_ptr->qp_handle, - ib_get_port_state_str(ep_ptr->qp_state)); - - dapl_os_lock(&ep_ptr->header.lock); - if (( ep_ptr->qp_handle != IB_INVALID_HANDLE )) - { - ib_destroy_qp ( ep_ptr->qp_handle, ib_sync_destroy ); - dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsQF: freed QP %p\n", - ep_ptr->qp_handle ); - ep_ptr->qp_handle = IB_INVALID_HANDLE; - } - dapl_os_unlock(&ep_ptr->header.lock); - - return DAT_SUCCESS; -} - - -/* - * dapls_ib_qp_modify - * - * Set the QP to the parameters specified in an EP_PARAM - * - * We can't be sure what state the QP is in so we first obtain the state - * from the driver. The EP_PARAM structure that is provided has been - * sanitized such that only non-zero values are valid. - * - * Input: - * *ia_ptr pointer to DAPL IA - * *ep_ptr pointer to DAPL EP - * *ep_attr pointer to DAT EP attribute - * - * Output: - * none - * - * Returns: - * DAT_SUCCESS - * DAT_INSUFFICIENT_RESOURCES - * DAT_INVALID_PARAMETER - * - */ -DAT_RETURN -dapls_ib_qp_modify ( - IN DAPL_IA *ia_ptr, - IN DAPL_EP *ep_ptr, - IN DAT_EP_ATTR *ep_attr ) -{ - ib_qp_attr_t qp_attr; - ib_api_status_t ib_status; - ib_qp_handle_t qp_handle; - ib_qp_state_t qp_state; - ib_qp_mod_t qp_mod; - ib_av_attr_t *p_av_attr; - ib_qp_opts_t *p_qp_opts; - uint32_t *p_sq_depth, *p_rq_depth; - DAT_BOOLEAN need_modify; - DAT_RETURN dat_status; - - qp_handle = ep_ptr->qp_handle; - need_modify = DAT_FALSE; - dat_status = DAT_SUCCESS; - if ( ia_ptr == NULL || ia_ptr->header.magic != DAPL_MAGIC_IA ) - { - dat_status = DAT_INVALID_HANDLE; - goto bail; - } - /* - * Query the QP to get the current state. - */ - ib_status = ib_query_qp ( qp_handle, &qp_attr ); - - if ( ib_status != IB_SUCCESS ) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, "--> DsIQM: Query QP failed = %s\n", - ib_get_err_str(ib_status)); - dat_status = DAT_INTERNAL_ERROR; - goto bail; - } - - qp_state = qp_attr.state; - - dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsIQM: modify qp state=%d\n",qp_state); - /* - * Check if we have the right qp_state or not - */ - if ( (qp_state != IB_QPS_RTR ) && (qp_state != IB_QPS_RTS ) ) - { - dapl_dbg_log (DAPL_DBG_TYPE_EP, - "--> DsIQM: postpone to modify qp to EP values later\n"); - dat_status = DAT_SUCCESS; - goto bail; - } - - dapl_os_memzero (&qp_mod, sizeof (ib_qp_mod_t)); - - if (qp_state == IB_QPS_RTR) - { - p_av_attr = &qp_mod.state.rtr.primary_av; - p_qp_opts = &qp_mod.state.rtr.opts; - p_sq_depth = &qp_mod.state.rtr.sq_depth; - p_rq_depth = &qp_mod.state.rtr.rq_depth; - } - else - { - /* - * RTS does not have primary_av field - */ - p_av_attr = &qp_mod.state.rts.alternate_av; - p_qp_opts = &qp_mod.state.rts.opts; - p_sq_depth = &qp_mod.state.rts.sq_depth; - p_rq_depth = &qp_mod.state.rts.rq_depth; - } - - if ( (ep_attr->max_recv_dtos > 0) && - ((DAT_UINT32)ep_attr->max_recv_dtos != qp_attr.rq_depth) ) - { - dapl_dbg_log (DAPL_DBG_TYPE_EP,"--> DsIQM: rq_depth modified (%d,%d)\n", - qp_attr.rq_depth, ep_attr->max_recv_dtos); - - *p_rq_depth = ep_attr->max_recv_dtos; - *p_qp_opts |= IB_MOD_QP_RQ_DEPTH; - need_modify = DAT_TRUE; - } - - if ( (ep_attr->max_request_dtos > 0) && - ((DAT_UINT32)ep_attr->max_request_dtos != qp_attr.sq_depth) ) - { - dapl_dbg_log (DAPL_DBG_TYPE_EP, - "--> DsIQM: sq_depth modified (%d,%d)\n", - qp_attr.sq_depth, ep_attr->max_request_dtos); - - *p_sq_depth = ep_attr->max_request_dtos; - *p_qp_opts |= IB_MOD_QP_SQ_DEPTH; - need_modify = DAT_TRUE; - } - - qp_mod.req_state = qp_state; - - if ( need_modify == DAT_TRUE ) - { - ib_status = ib_modify_qp (qp_handle, &qp_mod); - if ( ib_status != IB_SUCCESS) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, "--> %s: ib_status = %d\n", - "DsIQM", ib_status); - dat_status = DAT_INTERNAL_ERROR; - } - } - -bail: - - return dat_status; -} - - -ib_api_status_t -dapls_modify_qp_state_to_error ( ib_qp_handle_t qp_handle ) -{ - ib_qp_mod_t qp_mod; - ib_api_status_t ib_status; - - dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsIQM_Err: QP state change --> Err\n"); - - dapl_os_memzero (&qp_mod, sizeof (ib_qp_mod_t)); - - qp_mod.req_state = IB_QPS_ERROR; - - ib_status = ib_modify_qp (qp_handle, &qp_mod); - - return (ib_status); -} - - -ib_api_status_t -dapls_modify_qp_state_to_reset ( ib_qp_handle_t qp_handle ) -{ - ib_qp_mod_t qp_mod; - ib_api_status_t ib_status; - - dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsIQM_RESET: QP state change\n"); - - dapl_os_memzero (&qp_mod, sizeof (ib_qp_mod_t)); - - qp_mod.req_state = IB_QPS_RESET; - - ib_status = ib_modify_qp (qp_handle, &qp_mod); - - return (ib_status); -} - - -ib_api_status_t -dapls_modify_qp_state_to_init ( - IN ib_qp_handle_t qp_handle, - IN DAT_EP_ATTR *p_attr, - IN dapl_ibal_port_t *p_port ) -{ - ib_qp_mod_t qp_mod; - ib_api_status_t ib_status; - - dapl_os_memzero (&qp_mod, sizeof (ib_qp_mod_t)); - - qp_mod.req_state = IB_QPS_INIT; - qp_mod.state.init.primary_port = p_port->p_attr->port_num; - qp_mod.state.init.qkey = DAPL_IBAL_QKEY; - qp_mod.state.init.pkey_index = 0; - qp_mod.state.init.access_ctrl = IB_AC_LOCAL_WRITE | - IB_AC_RDMA_WRITE | - IB_AC_MW_BIND | - IB_AC_ATOMIC; - if ((p_attr->max_rdma_read_in > 0) || (p_attr->max_rdma_read_out > 0)) - { - qp_mod.state.init.access_ctrl |= IB_AC_RDMA_READ; - } - ib_status = ib_modify_qp (qp_handle, &qp_mod); - - dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsIQM_INIT: QP(%p) state change, %s\n", - qp_handle, ib_get_err_str(ib_status)); - - return (ib_status); -} - - -ib_api_status_t -dapls_modify_qp_state_to_rtr ( - ib_qp_handle_t qp_handle, - ib_net32_t dest_qp, - ib_lid_t dest_lid, - dapl_ibal_port_t *p_port) -{ - ib_qp_mod_t qp_mod; - ib_api_status_t ib_status; - - dapl_os_memzero (&qp_mod, sizeof (ib_qp_mod_t)); - - qp_mod.req_state = IB_QPS_RTR; - qp_mod.state.rtr.rq_psn = DAPL_IBAL_START_PSN; - qp_mod.state.rtr.dest_qp = dest_qp; - qp_mod.state.rtr.primary_av.port_num = p_port->p_attr->port_num; - qp_mod.state.rtr.primary_av.sl = 0; - qp_mod.state.rtr.primary_av.dlid = dest_lid; - qp_mod.state.rtr.primary_av.grh_valid = 0; /* FALSE */ - qp_mod.state.rtr.primary_av.static_rate = IB_PATH_RECORD_RATE_10_GBS; - qp_mod.state.rtr.primary_av.path_bits = 0; - qp_mod.state.rtr.primary_av.conn.path_mtu = p_port->p_attr->mtu; - qp_mod.state.rtr.primary_av.conn.local_ack_timeout = 7; - qp_mod.state.rtr.primary_av.conn.seq_err_retry_cnt = 7; - qp_mod.state.rtr.primary_av.conn.rnr_retry_cnt = IB_RNR_RETRY_CNT; - qp_mod.state.rtr.resp_res = 4; // in-flight RDMAs - qp_mod.state.rtr.rnr_nak_timeout = IB_RNR_NAK_TIMEOUT; - - ib_status = ib_modify_qp (qp_handle, &qp_mod); - - dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsIQM_RTR: QP(%p) state change %s\n", - qp_handle, ib_get_err_str(ib_status)); - - return (ib_status); -} - -ib_api_status_t -dapls_modify_qp_state_to_rts ( ib_qp_handle_t qp_handle ) -{ - ib_qp_mod_t qp_mod; - ib_api_status_t ib_status; - - dapl_os_memzero (&qp_mod, sizeof (ib_qp_mod_t)); - - qp_mod.req_state = IB_QPS_RTS; - qp_mod.state.rts.sq_psn = DAPL_IBAL_START_PSN; - qp_mod.state.rts.retry_cnt = 7; - qp_mod.state.rts.rnr_retry_cnt = IB_RNR_RETRY_CNT; - qp_mod.state.rtr.rnr_nak_timeout = IB_RNR_NAK_TIMEOUT; - qp_mod.state.rts.local_ack_timeout = 7; - qp_mod.state.rts.init_depth = 4; - - ib_status = ib_modify_qp (qp_handle, &qp_mod); - - dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsIQM_RTS: QP(%p) state change %s\n", - qp_handle, ib_get_err_str(ib_status)); - - return (ib_status); -} - - -/* - * dapls_ib_reinit_ep - * - * Move the QP to INIT state again. - * - * Input: - * ep_ptr DAPL_EP - * - * Output: - * none - * - * Returns: - * void - * - */ -void -dapls_ib_reinit_ep ( IN DAPL_EP *ep_ptr ) -{ - DAPL_IA *ia_ptr; - ib_api_status_t ib_status; - dapl_ibal_ca_t *p_ca; - dapl_ibal_port_t *p_active_port; - - dapl_dbg_log (DAPL_DBG_TYPE_EP, - "--> DsIQM_REINIT: EP(%p) QP(%p) state change\n", - ep_ptr, ep_ptr->qp_handle ); - - if ( ep_ptr->param.ep_state != DAT_EP_STATE_DISCONNECTED ) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, "--> DsIRE: EP invalid state(%d)\n", - ep_ptr->param.ep_state); - return /*DAT_INVALID_STATE*/; - } - - ia_ptr = ep_ptr->header.owner_ia; - - /* Re-create QP if cleaned up, alloc will return init state */ - if ( ep_ptr->qp_handle == IB_INVALID_HANDLE ) - { - dapl_dbg_log (DAPL_DBG_TYPE_EP, - "--> DsIRE: !EP(%p)->qp_handle, re-create QP\n",ep_ptr); - ib_status = dapls_ib_qp_alloc ( ia_ptr, ep_ptr, ep_ptr ); - if ( ib_status != IB_SUCCESS ) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> DsIRE: failed to move qp to RESET status = %s\n", - ib_get_err_str(ib_status)); - } - return /*ib_status*/; - } - - ib_status = dapls_modify_qp_state_to_reset (ep_ptr->qp_handle); - - if ( ib_status != IB_SUCCESS ) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> DsIRE: failed to move qp to RESET status = %s\n", - ib_get_err_str(ib_status)); - return /*DAT_INTERNAL_ERROR*/; - } - - ep_ptr->qp_state = IB_QPS_RESET; - - p_ca = (dapl_ibal_ca_t *) ia_ptr->hca_ptr->ib_hca_handle; - p_active_port = dapli_ibal_get_port ( p_ca, - (uint8_t)ia_ptr->hca_ptr->port_num ); - if (NULL == p_active_port) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> DsIRE: Port %d is not available = %d\n", - ia_ptr->hca_ptr->port_num, __LINE__); - return /*DAT_INTERNAL_ERROR*/; - } - - /* May fail if QP still RESET and in timewait, keep in reset state */ - ib_status = dapls_modify_qp_state_to_init ( ep_ptr->qp_handle, - &ep_ptr->param.ep_attr, - p_active_port); - if ( ib_status != IB_SUCCESS ) - { - ep_ptr->qp_state = IB_QPS_RESET; - - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> DsIRE: failed to move qp to INIT status %s\n", - ib_get_err_str(ib_status)); - return /*DAT_INTERNAL_ERROR*/; - } - ep_ptr->qp_state = IB_QPS_INIT; -} - - -/* - * Local variables: - * c-indent-level: 4 - * c-basic-offset: 4 - * tab-width: 8 - * End: - */ - + +/* + * Copyright (c) 2005-2007 Intel Corporation. All rights reserved. + * Copyright (c) 2002, Network Appliance, Inc. All rights reserved. + * + * This Software is licensed under the terms of the "Common Public + * License" a copy of which is in the file LICENSE.txt in the root + * directory. The license is also available from the Open Source + * Initiative, see http://www.opensource.org/licenses/cpl.php. + * + */ + +/********************************************************************** + * + * MODULE: dapl_ibal_qp.c + * + * PURPOSE: IB QP routines for access to IBAL APIs + * + * $Id: dapl_ibal_qp.c 33 2005-07-11 19:51:17Z ftillier $ + * + **********************************************************************/ + +#include "dapl.h" +#include "dapl_adapter_util.h" +#include "dapl_evd_util.h" +#include "dapl_ibal_util.h" +#include "dapl_ep_util.h" + +#define DAPL_IBAL_QKEY 0 +#define DAPL_IBAL_START_PSN 0 + +extern DAT_RETURN +dapls_ib_cq_late_alloc ( IN ib_pd_handle_t pd_handle, + IN DAPL_EVD *evd_ptr ); + +static void +dapli_ib_qp_async_error_cb( IN ib_async_event_rec_t* p_err_rec ) +{ + DAPL_EP *ep_ptr = (DAPL_EP *)p_err_rec->context; + DAPL_EVD *evd_ptr; + DAPL_IA *ia_ptr; + dapl_ibal_ca_t *p_ca; + dapl_ibal_evd_cb_t *evd_cb; + + dapl_dbg_log (DAPL_DBG_TYPE_ERR,"--> DiQpAEC QP event %s qp ctx %p\n", + ib_get_async_event_str(p_err_rec->code), p_err_rec->context); + dapl_dbg_log (DAPL_DBG_TYPE_ERR,"--> DiQpAEC qp_handle %p qpn %u\n", + ((DAPL_EP *)p_err_rec->context)->qp_handle, + ((DAPL_EP *)p_err_rec->context)->qpn); + + /* + * Verify handles EP, EVD, and hca_handle + */ + if (DAPL_BAD_HANDLE (ep_ptr, DAPL_MAGIC_EP ) || + DAPL_BAD_HANDLE (ep_ptr->param.connect_evd_handle, DAPL_MAGIC_EVD)) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> DiQpAEC: invalid EP %p \n", ep_ptr); + return; + } + ia_ptr = ep_ptr->header.owner_ia; + evd_ptr = ia_ptr->async_error_evd; + + if (DAPL_BAD_HANDLE (evd_ptr, DAPL_MAGIC_EVD) || + ! (evd_ptr->evd_flags & DAT_EVD_ASYNC_FLAG)) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> DiQpAEC: invalid EVD %p \n", evd_ptr); + return; + } + p_ca = (dapl_ibal_ca_t *) ia_ptr->hca_ptr->ib_hca_handle; + if (p_ca == NULL) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> DiQpAEC: can't find %s HCA\n", + (ia_ptr->header.provider)->device_name); + return; + } + + /* find QP error callback using ia_ptr for context */ + evd_cb = dapli_find_evd_cb_by_context (ia_ptr, p_ca); + if ((evd_cb == NULL) || (evd_cb->pfn_async_qp_err_cb == NULL)) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> DiQpAEC: no ERROR cb on p_ca %p found\n", p_ca); + return; + } + + dapl_os_lock (&ep_ptr->header.lock); + ep_ptr->param.ep_state = DAT_EP_STATE_DISCONNECT_PENDING; + dapl_os_unlock (&ep_ptr->header.lock); + + /* force disconnect, QP error state, to insure DTO's get flushed */ + dapls_ib_disconnect ( ep_ptr, DAT_CLOSE_ABRUPT_FLAG ); + + /* maps to dapl_evd_qp_async_error_callback(), context is EP */ + evd_cb->pfn_async_qp_err_cb( (ib_hca_handle_t)p_ca, + ep_ptr->qp_handle, + (ib_error_record_t*)&p_err_rec->code, + ep_ptr ); +} + +/* + * dapls_ib_qp_alloc + * + * Alloc a QP + * + * Input: + * *ia_ptr pointer to DAPL IA + * *ep_ptr pointer to DAPL EP + * *ep_ctx_ptr pointer to DAPL EP context + * + * Output: + * none + * + * Returns: + * DAT_SUCCESS + * DAT_INSUFFICIENT_RESOURCES + * + */ +DAT_RETURN +dapls_ib_qp_alloc ( + IN DAPL_IA *ia_ptr, + IN DAPL_EP *ep_ptr, + IN DAPL_EP *ep_ctx_ptr) +{ + DAT_EP_ATTR *attr; + DAPL_EVD *recv_evd_ptr, *request_evd_ptr; + DAT_RETURN dat_status; + ib_api_status_t ib_status; + ib_qp_create_t qp_create; + ib_pd_handle_t ib_pd_handle; + ib_cq_handle_t cq_recv; + ib_cq_handle_t cq_send; + dapl_ibal_ca_t *p_ca; + dapl_ibal_port_t *p_active_port; + ib_qp_attr_t qp_attr; + dp_ib_cm_handle_t cm_ptr; + + attr = &ep_ptr->param.ep_attr; + + dapl_os_assert ( ep_ptr->param.pz_handle != NULL ); + + ib_pd_handle = ((DAPL_PZ *)ep_ptr->param.pz_handle)->pd_handle; + dapl_os_assert(ib_pd_handle); + recv_evd_ptr = (DAPL_EVD *) ep_ptr->param.recv_evd_handle; + request_evd_ptr = (DAPL_EVD *) ep_ptr->param.request_evd_handle; + + cq_recv = IB_INVALID_HANDLE; + cq_send = IB_INVALID_HANDLE; + + dapl_os_assert ( recv_evd_ptr != DAT_HANDLE_NULL ); + { + cq_recv = (ib_cq_handle_t) recv_evd_ptr->ib_cq_handle; + + if ((cq_recv == IB_INVALID_HANDLE) && + ( 0 != (recv_evd_ptr->evd_flags & ~DAT_EVD_SOFTWARE_FLAG) )) + { + dat_status = dapls_ib_cq_late_alloc ( ib_pd_handle, recv_evd_ptr); + if (dat_status != DAT_SUCCESS) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> %s: failed to create CQ\n","DsQA"); + return (dat_status); + } + + dat_status = dapls_set_cq_notify (ia_ptr, recv_evd_ptr); + + if (dat_status != DAT_SUCCESS) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> %s: failed to enable notify CQ\n","DsQA"); + return (dat_status); + } + + cq_recv = (ib_cq_handle_t) recv_evd_ptr->ib_cq_handle; + dapl_dbg_log (DAPL_DBG_TYPE_EP, + "--> DsQA: alloc_recv_CQ = %p\n", cq_recv); + + } + } + + dapl_os_assert ( request_evd_ptr != DAT_HANDLE_NULL ); + { + cq_send = (ib_cq_handle_t) request_evd_ptr->ib_cq_handle; + + if ((cq_send == IB_INVALID_HANDLE) && + ( 0 != (request_evd_ptr->evd_flags & ~DAT_EVD_SOFTWARE_FLAG) )) + { + dat_status = dapls_ib_cq_late_alloc (ib_pd_handle, request_evd_ptr); + if (dat_status != DAT_SUCCESS) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> %s: failed to create CQ\n","DsQA"); + return (dat_status); + } + + dat_status = dapls_set_cq_notify (ia_ptr, request_evd_ptr); + + if (dat_status != DAT_SUCCESS) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> %s: failed to enable notify CQ\n","DsQA"); + return (dat_status); + } + + cq_send = (ib_cq_handle_t) request_evd_ptr->ib_cq_handle; + dapl_dbg_log (DAPL_DBG_TYPE_EP, + "--> DsQA: alloc_send_CQ = %p\n", cq_send); + } + } + + /* + * Get the CA structure + */ + p_ca = (dapl_ibal_ca_t *) ia_ptr->hca_ptr->ib_hca_handle; + + dapl_os_memzero (&qp_create, sizeof (qp_create)); + qp_create.qp_type = IB_QPT_RELIABLE_CONN; + qp_create.sq_depth = attr->max_request_dtos; + qp_create.rq_depth = attr->max_recv_dtos; + qp_create.sq_sge = attr->max_recv_iov; + qp_create.rq_sge = attr->max_request_iov; + qp_create.h_sq_cq = cq_send; + qp_create.h_rq_cq = cq_recv; + qp_create.sq_signaled = FALSE; + + dapl_dbg_log (DAPL_DBG_TYPE_EP, + "--> DsQA: sqd,iov=%d,%d rqd,iov=%d,%d\n", + attr->max_request_dtos, attr->max_request_iov, + attr->max_recv_dtos, attr->max_recv_iov); + + ib_status = ib_create_qp ( + ib_pd_handle, + &qp_create, + (void *) ep_ctx_ptr /* context */, + dapli_ib_qp_async_error_cb, + &ep_ptr->qp_handle); + + if (ib_status != IB_SUCCESS) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, "--> DsQA: Create QP failed = %s\n", + ib_get_err_str(ib_status)); + return (DAT_INSUFFICIENT_RESOURCES); + } + /* EP-CM linking */ + cm_ptr = ibal_cm_alloc(); + if (!cm_ptr) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, "--> DsQA: Create CM failed\n"); + return (DAT_INSUFFICIENT_RESOURCES); + } + cm_ptr->ib_cm.h_qp = ep_ptr->qp_handle; + cm_ptr->ep = ep_ptr; + dapl_ep_link_cm(ep_ptr, cm_ptr); + + dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsQA: EP=%p, tEVD=%p, rEVD=%p QP=%p\n", + ep_ptr, ep_ptr->param.request_evd_handle, + ep_ptr->param.recv_evd_handle, + ep_ptr->qp_handle ); + + ep_ptr->qp_state = IB_QPS_RESET; + + p_active_port = dapli_ibal_get_port(p_ca,(uint8_t)ia_ptr->hca_ptr->port_num); + + if (NULL == p_active_port) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> DsQA: Port %d is not available = %d\n", + ia_ptr->hca_ptr->port_num, __LINE__); + return (DAT_INVALID_STATE); + } + + ib_status = dapls_modify_qp_state_to_init ( ep_ptr->qp_handle, + &ep_ptr->param.ep_attr, + p_active_port ); + + if ( ib_status != IB_SUCCESS ) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> DsQA: Change QP state to INIT failed = %s\n", + ib_get_err_str(ib_status)); + return (DAT_INVALID_HANDLE); + } + ib_status = ib_query_qp ( ep_ptr->qp_handle, &qp_attr ); + + ep_ptr->qp_state = qp_attr.state; + ep_ptr->qpn = qp_attr.num; + + dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsQAQA: EP:%p new_QP %p state %s\n", + ep_ptr, + ep_ptr->qp_handle, + ib_get_port_state_str(ep_ptr->qp_state)); + + return (DAT_SUCCESS); +} + + +/* + * dapls_ib_qp_free + * + * Free a QP + * + * Input: + * *ia_ptr pointer to IA structure + * *ep_ptr pointer to EP structure + * + * Output: + * none + * + * Returns: + * none + * + */ +DAT_RETURN +dapls_ib_qp_free ( + IN DAPL_IA *ia_ptr, + IN DAPL_EP *ep_ptr ) +{ + ib_qp_handle_t qp; + + UNREFERENCED_PARAMETER(ia_ptr); + + dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsQF: free %p, state %s\n", + ep_ptr->qp_handle, + ib_get_port_state_str(ep_ptr->qp_state)); + + dapl_os_lock(&ep_ptr->header.lock); + if (( ep_ptr->qp_handle != IB_INVALID_HANDLE )) + { + qp = ep_ptr->qp_handle; + ep_ptr->qp_handle = IB_INVALID_HANDLE; + dapl_os_unlock(&ep_ptr->header.lock); + + dapls_modify_qp_state_to_error(qp); + dapls_ep_flush_cqs(ep_ptr); + + ib_destroy_qp ( qp, ib_sync_destroy ); + dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsQF: freed QP %p\n", + ep_ptr->qp_handle ); + } else { + dapl_os_unlock(&ep_ptr->header.lock); + } + + return DAT_SUCCESS; +} + + +/* + * dapls_ib_qp_modify + * + * Set the QP to the parameters specified in an EP_PARAM + * + * We can't be sure what state the QP is in so we first obtain the state + * from the driver. The EP_PARAM structure that is provided has been + * sanitized such that only non-zero values are valid. + * + * Input: + * *ia_ptr pointer to DAPL IA + * *ep_ptr pointer to DAPL EP + * *ep_attr pointer to DAT EP attribute + * + * Output: + * none + * + * Returns: + * DAT_SUCCESS + * DAT_INSUFFICIENT_RESOURCES + * DAT_INVALID_PARAMETER + * + */ +DAT_RETURN +dapls_ib_qp_modify ( + IN DAPL_IA *ia_ptr, + IN DAPL_EP *ep_ptr, + IN DAT_EP_ATTR *ep_attr ) +{ + ib_qp_attr_t qp_attr; + ib_api_status_t ib_status; + ib_qp_handle_t qp_handle; + ib_qp_state_t qp_state; + ib_qp_mod_t qp_mod; + ib_av_attr_t *p_av_attr; + ib_qp_opts_t *p_qp_opts; + uint32_t *p_sq_depth, *p_rq_depth; + DAT_BOOLEAN need_modify; + DAT_RETURN dat_status; + + qp_handle = ep_ptr->qp_handle; + need_modify = DAT_FALSE; + dat_status = DAT_SUCCESS; + if ( ia_ptr == NULL || ia_ptr->header.magic != DAPL_MAGIC_IA ) + { + dat_status = DAT_INVALID_HANDLE; + goto bail; + } + /* + * Query the QP to get the current state. + */ + ib_status = ib_query_qp ( qp_handle, &qp_attr ); + + if ( ib_status != IB_SUCCESS ) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, "--> DsIQM: Query QP failed = %s\n", + ib_get_err_str(ib_status)); + dat_status = DAT_INTERNAL_ERROR; + goto bail; + } + + qp_state = qp_attr.state; + + dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsIQM: modify qp state=%d\n",qp_state); + /* + * Check if we have the right qp_state or not + */ + if ( (qp_state != IB_QPS_RTR ) && (qp_state != IB_QPS_RTS ) ) + { + dapl_dbg_log (DAPL_DBG_TYPE_EP, + "--> DsIQM: postpone to modify qp to EP values later\n"); + dat_status = DAT_SUCCESS; + goto bail; + } + + dapl_os_memzero (&qp_mod, sizeof (ib_qp_mod_t)); + + if (qp_state == IB_QPS_RTR) + { + p_av_attr = &qp_mod.state.rtr.primary_av; + p_qp_opts = &qp_mod.state.rtr.opts; + p_sq_depth = &qp_mod.state.rtr.sq_depth; + p_rq_depth = &qp_mod.state.rtr.rq_depth; + } + else + { + /* + * RTS does not have primary_av field + */ + p_av_attr = &qp_mod.state.rts.alternate_av; + p_qp_opts = &qp_mod.state.rts.opts; + p_sq_depth = &qp_mod.state.rts.sq_depth; + p_rq_depth = &qp_mod.state.rts.rq_depth; + } + + if ( (ep_attr->max_recv_dtos > 0) && + ((DAT_UINT32)ep_attr->max_recv_dtos != qp_attr.rq_depth) ) + { + dapl_dbg_log (DAPL_DBG_TYPE_EP,"--> DsIQM: rq_depth modified (%d,%d)\n", + qp_attr.rq_depth, ep_attr->max_recv_dtos); + + *p_rq_depth = ep_attr->max_recv_dtos; + *p_qp_opts |= IB_MOD_QP_RQ_DEPTH; + need_modify = DAT_TRUE; + } + + if ( (ep_attr->max_request_dtos > 0) && + ((DAT_UINT32)ep_attr->max_request_dtos != qp_attr.sq_depth) ) + { + dapl_dbg_log (DAPL_DBG_TYPE_EP, + "--> DsIQM: sq_depth modified (%d,%d)\n", + qp_attr.sq_depth, ep_attr->max_request_dtos); + + *p_sq_depth = ep_attr->max_request_dtos; + *p_qp_opts |= IB_MOD_QP_SQ_DEPTH; + need_modify = DAT_TRUE; + } + + qp_mod.req_state = qp_state; + + if ( need_modify == DAT_TRUE ) + { + ib_status = ib_modify_qp (qp_handle, &qp_mod); + if ( ib_status != IB_SUCCESS) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, "--> %s: ib_status = %d\n", + "DsIQM", ib_status); + dat_status = DAT_INTERNAL_ERROR; + } + } + +bail: + + return dat_status; +} + + +ib_api_status_t +dapls_modify_qp_state_to_error ( ib_qp_handle_t qp_handle ) +{ + ib_qp_mod_t qp_mod; + ib_api_status_t ib_status; + + dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsIQM_Err: QP state change --> Err\n"); + + dapl_os_memzero (&qp_mod, sizeof (ib_qp_mod_t)); + + qp_mod.req_state = IB_QPS_ERROR; + + ib_status = ib_modify_qp (qp_handle, &qp_mod); + + return (ib_status); +} + + +ib_api_status_t +dapls_modify_qp_state_to_reset ( ib_qp_handle_t qp_handle ) +{ + ib_qp_mod_t qp_mod; + ib_api_status_t ib_status; + + dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsIQM_RESET: QP state change\n"); + + dapl_os_memzero (&qp_mod, sizeof (ib_qp_mod_t)); + + qp_mod.req_state = IB_QPS_RESET; + + ib_status = ib_modify_qp (qp_handle, &qp_mod); + + return (ib_status); +} + + +ib_api_status_t +dapls_modify_qp_state_to_init ( + IN ib_qp_handle_t qp_handle, + IN DAT_EP_ATTR *p_attr, + IN dapl_ibal_port_t *p_port ) +{ + ib_qp_mod_t qp_mod; + ib_api_status_t ib_status; + + dapl_os_memzero (&qp_mod, sizeof (ib_qp_mod_t)); + + qp_mod.req_state = IB_QPS_INIT; + qp_mod.state.init.primary_port = p_port->p_attr->port_num; + qp_mod.state.init.qkey = DAPL_IBAL_QKEY; + qp_mod.state.init.pkey_index = 0; + qp_mod.state.init.access_ctrl = IB_AC_LOCAL_WRITE | + IB_AC_RDMA_WRITE | + IB_AC_MW_BIND | + IB_AC_ATOMIC; + if ((p_attr->max_rdma_read_in > 0) || (p_attr->max_rdma_read_out > 0)) + { + qp_mod.state.init.access_ctrl |= IB_AC_RDMA_READ; + } + ib_status = ib_modify_qp (qp_handle, &qp_mod); + + dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsIQM_INIT: QP(%p) state change, %s\n", + qp_handle, ib_get_err_str(ib_status)); + + return (ib_status); +} + + +ib_api_status_t +dapls_modify_qp_state_to_rtr ( + ib_qp_handle_t qp_handle, + ib_net32_t dest_qp, + ib_lid_t dest_lid, + dapl_ibal_port_t *p_port) +{ + ib_qp_mod_t qp_mod; + ib_api_status_t ib_status; + + dapl_os_memzero (&qp_mod, sizeof (ib_qp_mod_t)); + + qp_mod.req_state = IB_QPS_RTR; + qp_mod.state.rtr.rq_psn = DAPL_IBAL_START_PSN; + qp_mod.state.rtr.dest_qp = dest_qp; + qp_mod.state.rtr.primary_av.port_num = p_port->p_attr->port_num; + qp_mod.state.rtr.primary_av.sl = 0; + qp_mod.state.rtr.primary_av.dlid = dest_lid; + qp_mod.state.rtr.primary_av.grh_valid = 0; /* FALSE */ + qp_mod.state.rtr.primary_av.static_rate = IB_PATH_RECORD_RATE_10_GBS; + qp_mod.state.rtr.primary_av.path_bits = 0; + qp_mod.state.rtr.primary_av.conn.path_mtu = p_port->p_attr->mtu; + qp_mod.state.rtr.primary_av.conn.local_ack_timeout = 7; + qp_mod.state.rtr.primary_av.conn.seq_err_retry_cnt = 7; + qp_mod.state.rtr.primary_av.conn.rnr_retry_cnt = IB_RNR_RETRY_CNT; + qp_mod.state.rtr.resp_res = 4; // in-flight RDMAs + qp_mod.state.rtr.rnr_nak_timeout = IB_RNR_NAK_TIMEOUT; + + ib_status = ib_modify_qp (qp_handle, &qp_mod); + + dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsIQM_RTR: QP(%p) state change %s\n", + qp_handle, ib_get_err_str(ib_status)); + + return (ib_status); +} + +ib_api_status_t +dapls_modify_qp_state_to_rts ( ib_qp_handle_t qp_handle ) +{ + ib_qp_mod_t qp_mod; + ib_api_status_t ib_status; + + dapl_os_memzero (&qp_mod, sizeof (ib_qp_mod_t)); + + qp_mod.req_state = IB_QPS_RTS; + qp_mod.state.rts.sq_psn = DAPL_IBAL_START_PSN; + qp_mod.state.rts.retry_cnt = 7; + qp_mod.state.rts.rnr_retry_cnt = IB_RNR_RETRY_CNT; + qp_mod.state.rtr.rnr_nak_timeout = IB_RNR_NAK_TIMEOUT; + qp_mod.state.rts.local_ack_timeout = 7; + qp_mod.state.rts.init_depth = 4; + + ib_status = ib_modify_qp (qp_handle, &qp_mod); + + dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsIQM_RTS: QP(%p) state change %s\n", + qp_handle, ib_get_err_str(ib_status)); + + return (ib_status); +} + + +/* + * dapls_ib_reinit_ep + * + * Move the QP to INIT state again. + * + * Input: + * ep_ptr DAPL_EP + * + * Output: + * none + * + * Returns: + * void + * + */ +void +dapls_ib_reinit_ep ( IN DAPL_EP *ep_ptr ) +{ + DAPL_IA *ia_ptr; + ib_api_status_t ib_status; + dapl_ibal_ca_t *p_ca; + dapl_ibal_port_t *p_active_port; + + dapl_dbg_log (DAPL_DBG_TYPE_EP, + "--> DsIQM_REINIT: EP(%p) QP(%p) state change\n", + ep_ptr, ep_ptr->qp_handle ); + + if ( ep_ptr->param.ep_state != DAT_EP_STATE_DISCONNECTED ) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, "--> DsIRE: EP invalid state(%d)\n", + ep_ptr->param.ep_state); + return /*DAT_INVALID_STATE*/; + } + + ia_ptr = ep_ptr->header.owner_ia; + + /* Re-create QP if cleaned up, alloc will return init state */ + if ( ep_ptr->qp_handle == IB_INVALID_HANDLE ) + { + dapl_dbg_log (DAPL_DBG_TYPE_EP, + "--> DsIRE: !EP(%p)->qp_handle, re-create QP\n",ep_ptr); + ib_status = dapls_ib_qp_alloc ( ia_ptr, ep_ptr, ep_ptr ); + if ( ib_status != IB_SUCCESS ) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> DsIRE: failed to move qp to RESET status = %s\n", + ib_get_err_str(ib_status)); + } + return /*ib_status*/; + } + + ib_status = dapls_modify_qp_state_to_reset (ep_ptr->qp_handle); + + if ( ib_status != IB_SUCCESS ) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> DsIRE: failed to move qp to RESET status = %s\n", + ib_get_err_str(ib_status)); + return /*DAT_INTERNAL_ERROR*/; + } + + ep_ptr->qp_state = IB_QPS_RESET; + + p_ca = (dapl_ibal_ca_t *) ia_ptr->hca_ptr->ib_hca_handle; + p_active_port = dapli_ibal_get_port ( p_ca, + (uint8_t)ia_ptr->hca_ptr->port_num ); + if (NULL == p_active_port) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> DsIRE: Port %d is not available = %d\n", + ia_ptr->hca_ptr->port_num, __LINE__); + return /*DAT_INTERNAL_ERROR*/; + } + + /* May fail if QP still RESET and in timewait, keep in reset state */ + ib_status = dapls_modify_qp_state_to_init ( ep_ptr->qp_handle, + &ep_ptr->param.ep_attr, + p_active_port); + if ( ib_status != IB_SUCCESS ) + { + ep_ptr->qp_state = IB_QPS_RESET; + + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> DsIRE: failed to move qp to INIT status %s\n", + ib_get_err_str(ib_status)); + return /*DAT_INTERNAL_ERROR*/; + } + ep_ptr->qp_state = IB_QPS_INIT; +} + + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * tab-width: 8 + * End: + */ + diff --git a/branches/WOF2-3/ulp/dapl2/dapl/openib_cma/device.c b/branches/WOF2-3/ulp/dapl2/dapl/openib_cma/device.c index e4ff22eb..454c394f 100644 --- a/branches/WOF2-3/ulp/dapl2/dapl/openib_cma/device.c +++ b/branches/WOF2-3/ulp/dapl2/dapl/openib_cma/device.c @@ -289,7 +289,6 @@ DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, IN DAPL_HCA * hca_ptr) dapl_log(DAPL_DBG_TYPE_ERR, " open_hca: rdma_bind ERR %s." " Is %s configured?\n", strerror(errno), hca_name); - rdma_destroy_id(cm_id); return DAT_INVALID_ADDRESS; } diff --git a/branches/WOF2-3/ulp/dapl2/dapl/openib_common/dapl_ib_common.h b/branches/WOF2-3/ulp/dapl2/dapl/openib_common/dapl_ib_common.h index d3cf2e05..c9423179 100644 --- a/branches/WOF2-3/ulp/dapl2/dapl/openib_common/dapl_ib_common.h +++ b/branches/WOF2-3/ulp/dapl2/dapl/openib_common/dapl_ib_common.h @@ -1,403 +1,404 @@ -/* - * Copyright (c) 2009 Intel Corporation. All rights reserved. - * - * This Software is licensed under one of the following licenses: - * - * 1) under the terms of the "Common Public License 1.0" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/cpl.php. - * - * 2) under the terms of the "The BSD License" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/bsd-license.php. - * - * 3) under the terms of the "GNU General Public License (GPL) Version 2" a - * copy of which is available from the Open Source Initiative, see - * http://www.opensource.org/licenses/gpl-license.php. - * - * Licensee has the right to choose one of the above licenses. - * - * Redistributions of source code must retain the above copyright - * notice and one of the license notices. - * - * Redistributions in binary form must reproduce both the above copyright - * notice, one of the license notices in the documentation - * and/or other materials provided with the distribution. - */ - -/* - * Definitions common to all OpenIB providers, cma, scm, ucm - */ - -#ifndef _DAPL_IB_COMMON_H_ -#define _DAPL_IB_COMMON_H_ - -#include - -#ifdef DAT_EXTENSIONS -#include -#endif - -#ifndef __cplusplus -#define false 0 -#define true 1 -#endif /*__cplusplus */ - -/* Typedefs to map common DAPL provider types to IB verbs */ -typedef struct ibv_qp *ib_qp_handle_t; -typedef struct ibv_cq *ib_cq_handle_t; -typedef struct ibv_pd *ib_pd_handle_t; -typedef struct ibv_mr *ib_mr_handle_t; -typedef struct ibv_mw *ib_mw_handle_t; -typedef struct ibv_wc ib_work_completion_t; -typedef struct ibv_ah *ib_ah_handle_t; -typedef union ibv_gid *ib_gid_handle_t; - -/* HCA context type maps to IB verbs */ -typedef struct ibv_context *ib_hca_handle_t; -typedef ib_hca_handle_t dapl_ibal_ca_t; - -/* QP info to exchange, wire protocol version for these CM's */ -#define DCM_VER 6 - -/* CM private data areas, same for all operations */ -#define DCM_MAX_PDATA_SIZE 118 - -/* - * UCM DAPL IB/QP address (lid, qp_num, gid) mapping to - * DAT_IA_ADDRESS_PTR, DAT_SOCK_ADDR2 (28 bytes) - * For applications, like MPI, that exchange IA_ADDRESS - * across the fabric before connecting, it eliminates the - * overhead of name and address resolution to the destination's - * CM services. UCM provider uses the following for - * DAT_IA_ADDRESS. Note: family == AF_INET6 to insure proper - * callee storage for address. - */ -union dcm_addr { - DAT_SOCK_ADDR6 so; - struct { - uint16_t family; /* sin6_family */ - uint16_t lid; /* sin6_port */ - uint32_t qpn; /* sin6_flowinfo */ - uint8_t gid[16]; /* sin6_addr */ - uint16_t port; /* sin6_scope_id */ - uint8_t sl; - uint8_t qp_type; - } ib; -}; - -/* 256 bytes total; default max_inline_send, min IB MTU size */ -typedef struct _ib_cm_msg -{ - uint16_t ver; - uint16_t op; - uint16_t sport; /* src cm port */ - uint16_t dport; /* dst cm port */ - uint32_t sqpn; /* src cm qpn */ - uint32_t dqpn; /* dst cm qpn */ - uint16_t p_size; - uint8_t resv[14]; - union dcm_addr saddr; - union dcm_addr daddr; - union dcm_addr saddr_alt; - union dcm_addr daddr_alt; - uint8_t p_data[DCM_MAX_PDATA_SIZE]; - -} ib_cm_msg_t; - -/* CM events */ -typedef enum { - IB_CME_CONNECTED, - IB_CME_DISCONNECTED, - IB_CME_DISCONNECTED_ON_LINK_DOWN, - IB_CME_CONNECTION_REQUEST_PENDING, - IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA, - IB_CME_CONNECTION_REQUEST_ACKED, - IB_CME_DESTINATION_REJECT, - IB_CME_DESTINATION_REJECT_PRIVATE_DATA, - IB_CME_DESTINATION_UNREACHABLE, - IB_CME_TOO_MANY_CONNECTION_REQUESTS, - IB_CME_LOCAL_FAILURE, - IB_CME_BROKEN, - IB_CME_TIMEOUT -} ib_cm_events_t; - -/* Operation and state mappings */ -typedef int ib_send_op_type_t; -typedef struct ibv_sge ib_data_segment_t; -typedef enum ibv_qp_state ib_qp_state_t; -typedef enum ibv_event_type ib_async_event_type; -typedef struct ibv_async_event ib_error_record_t; - -/* CQ notifications */ -typedef enum -{ - IB_NOTIFY_ON_NEXT_COMP, - IB_NOTIFY_ON_SOLIC_COMP - -} ib_notification_type_t; - -/* other mappings */ -typedef int ib_bool_t; -typedef union ibv_gid GID; -typedef char *IB_HCA_NAME; -typedef uint16_t ib_hca_port_t; - -/* Definitions */ -#define IB_INVALID_HANDLE NULL - -/* inline send rdma threshold */ -#define INLINE_SEND_IWARP_DEFAULT 64 -#define INLINE_SEND_IB_DEFAULT 256 - -/* qkey for UD QP's */ -#define DAT_UD_QKEY 0x78654321 - -/* RC timer - retry count defaults */ -#define DCM_ACK_TIMER 16 /* 5 bits, 4.096us*2^ack_timer. 16== 268ms */ -#define DCM_ACK_RETRY 7 /* 3 bits, 7 * 268ms = 1.8 seconds */ -#define DCM_RNR_TIMER 12 /* 5 bits, 12 =.64ms, 28 =163ms, 31 =491ms */ -#define DCM_RNR_RETRY 7 /* 3 bits, 7 == infinite */ -#define DCM_IB_MTU 2048 - -/* Global routing defaults */ -#define DCM_GLOBAL 0 /* global routing is disabled */ -#define DCM_HOP_LIMIT 0xff -#define DCM_TCLASS 0 - -/* DAPL uCM timers, default queue sizes */ -#define DCM_RETRY_CNT 15 -#define DCM_REP_TIME 800 /* reply timeout in m_secs */ -#define DCM_RTU_TIME 400 /* rtu timeout in m_secs */ -#define DCM_QP_SIZE 500 /* uCM tx, rx qp size */ -#define DCM_CQ_SIZE 500 /* uCM cq size */ - -/* DTO OPs, ordered for DAPL ENUM definitions */ -#define OP_RDMA_WRITE IBV_WR_RDMA_WRITE -#define OP_RDMA_WRITE_IMM IBV_WR_RDMA_WRITE_WITH_IMM -#define OP_SEND IBV_WR_SEND -#define OP_SEND_IMM IBV_WR_SEND_WITH_IMM -#define OP_RDMA_READ IBV_WR_RDMA_READ -#define OP_COMP_AND_SWAP IBV_WR_ATOMIC_CMP_AND_SWP -#define OP_FETCH_AND_ADD IBV_WR_ATOMIC_FETCH_AND_ADD -#define OP_RECEIVE 7 /* internal op */ -#define OP_RECEIVE_IMM 8 /* rdma write with immed, internel op */ -#define OP_RECEIVE_MSG_IMM 9 /* recv msg with immed, internel op */ -#define OP_BIND_MW 10 /* internal op */ -#define OP_SEND_UD 11 /* internal op */ -#define OP_RECV_UD 12 /* internal op */ -#define OP_INVALID 0xff - -/* Definitions to map QP state */ -#define IB_QP_STATE_RESET IBV_QPS_RESET -#define IB_QP_STATE_INIT IBV_QPS_INIT -#define IB_QP_STATE_RTR IBV_QPS_RTR -#define IB_QP_STATE_RTS IBV_QPS_RTS -#define IB_QP_STATE_SQD IBV_QPS_SQD -#define IB_QP_STATE_SQE IBV_QPS_SQE -#define IB_QP_STATE_ERROR IBV_QPS_ERR - -/* Definitions for ibverbs/mthca return codes, should be defined in verbs.h */ -/* some are errno and some are -n values */ - -/** - * ibv_get_device_name - Return kernel device name - * ibv_get_device_guid - Return device's node GUID - * ibv_open_device - Return ibv_context or NULL - * ibv_close_device - Return 0, (errno?) - * ibv_get_async_event - Return 0, -1 - * ibv_alloc_pd - Return ibv_pd, NULL - * ibv_dealloc_pd - Return 0, errno - * ibv_reg_mr - Return ibv_mr, NULL - * ibv_dereg_mr - Return 0, errno - * ibv_create_cq - Return ibv_cq, NULL - * ibv_destroy_cq - Return 0, errno - * ibv_get_cq_event - Return 0 & ibv_cq/context, int - * ibv_poll_cq - Return n & ibv_wc, 0 ok, -1 empty, -2 error - * ibv_req_notify_cq - Return 0 (void?) - * ibv_create_qp - Return ibv_qp, NULL - * ibv_modify_qp - Return 0, errno - * ibv_destroy_qp - Return 0, errno - * ibv_post_send - Return 0, -1 & bad_wr - * ibv_post_recv - Return 0, -1 & bad_wr - */ - -/* async handler for DTO, CQ, QP, and unafiliated */ -typedef void (*ib_async_dto_handler_t)( - IN ib_hca_handle_t ib_hca_handle, - IN ib_error_record_t *err_code, - IN void *context); - -typedef void (*ib_async_cq_handler_t)( - IN ib_hca_handle_t ib_hca_handle, - IN ib_cq_handle_t ib_cq_handle, - IN ib_error_record_t *err_code, - IN void *context); - -typedef void (*ib_async_qp_handler_t)( - IN ib_hca_handle_t ib_hca_handle, - IN ib_qp_handle_t ib_qp_handle, - IN ib_error_record_t *err_code, - IN void *context); - -typedef void (*ib_async_handler_t)( - IN ib_hca_handle_t ib_hca_handle, - IN ib_error_record_t *err_code, - IN void *context); - -typedef enum -{ - IB_THREAD_INIT, - IB_THREAD_CREATE, - IB_THREAD_RUN, - IB_THREAD_CANCEL, - IB_THREAD_EXIT - -} ib_thread_state_t; - -typedef enum dapl_cm_op -{ - DCM_REQ = 1, - DCM_REP, - DCM_REJ_USER, /* user reject */ - DCM_REJ_CM, /* cm reject, no SID */ - DCM_RTU, - DCM_DREQ, - DCM_DREP - -} DAPL_CM_OP; - -typedef enum dapl_cm_state -{ - DCM_INIT, - DCM_LISTEN, - DCM_CONN_PENDING, - DCM_REP_PENDING, - DCM_ACCEPTING, - DCM_ACCEPTING_DATA, - DCM_ACCEPTED, - DCM_REJECTING, - DCM_REJECTED, - DCM_CONNECTED, - DCM_RELEASE, - DCM_DISC_PENDING, - DCM_DISCONNECTED, - DCM_DESTROY, - DCM_RTU_PENDING, - DCM_DISC_RECV, - DCM_FREE, - -} DAPL_CM_STATE; - -/* provider specfic fields for shared memory support */ -typedef uint32_t ib_shm_transport_t; - -/* prototypes */ -int32_t dapls_ib_init(void); -int32_t dapls_ib_release(void); - -/* util.c */ -enum ibv_mtu dapl_ib_mtu(int mtu); -char *dapl_ib_mtu_str(enum ibv_mtu mtu); -int getipaddr_netdev(char *name, char *addr, int addr_len); -DAT_RETURN getlocalipaddr(char *addr, int addr_len); - -/* qp.c */ -DAT_RETURN dapls_modify_qp_ud(IN DAPL_HCA *hca, IN ib_qp_handle_t qp); -DAT_RETURN dapls_modify_qp_state(IN ib_qp_handle_t qp_handle, - IN ib_qp_state_t qp_state, - IN uint32_t qpn, - IN uint16_t lid, - IN ib_gid_handle_t gid); -ib_ah_handle_t dapls_create_ah( IN DAPL_HCA *hca, - IN ib_pd_handle_t pd, - IN ib_qp_handle_t qp, - IN uint16_t lid, - IN ib_gid_handle_t gid); - -/* inline functions */ -STATIC _INLINE_ IB_HCA_NAME dapl_ib_convert_name (IN char *name) -{ - /* use ascii; name of local device */ - return dapl_os_strdup(name); -} - -STATIC _INLINE_ void dapl_ib_release_name (IN IB_HCA_NAME name) -{ - return; -} - -/* - * Convert errno to DAT_RETURN values - */ -STATIC _INLINE_ DAT_RETURN -dapl_convert_errno( IN int err, IN const char *str ) -{ - if (!err) return DAT_SUCCESS; - -#if DAPL_DBG - if ((err != EAGAIN) && (err != ETIMEDOUT)) - dapl_dbg_log (DAPL_DBG_TYPE_ERR," %s %s\n", str, strerror(err)); -#endif - - switch( err ) - { - case EOVERFLOW : return DAT_LENGTH_ERROR; - case EACCES : return DAT_PRIVILEGES_VIOLATION; - case EPERM : return DAT_PROTECTION_VIOLATION; - case EINVAL : return DAT_INVALID_HANDLE; - case EISCONN : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_CONNECTED; - case ECONNREFUSED : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_NOTREADY; - case ETIMEDOUT : return DAT_TIMEOUT_EXPIRED; - case ENETUNREACH: return DAT_INVALID_ADDRESS | DAT_INVALID_ADDRESS_UNREACHABLE; - case EADDRINUSE : return DAT_CONN_QUAL_IN_USE; - case EALREADY : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_ACTCONNPENDING; - case ENOMEM : return DAT_INSUFFICIENT_RESOURCES; - case EAGAIN : return DAT_QUEUE_EMPTY; - case EINTR : return DAT_INTERRUPTED_CALL; - case EAFNOSUPPORT : return DAT_INVALID_ADDRESS | DAT_INVALID_ADDRESS_MALFORMED; - case EFAULT : - default : return DAT_INTERNAL_ERROR; - } - } - -STATIC _INLINE_ char * dapl_cm_state_str(IN int st) -{ - static char *state[] = { - "CM_INIT", - "CM_LISTEN", - "CM_CONN_PENDING", - "CM_REP_PENDING", - "CM_ACCEPTING", - "CM_ACCEPTING_DATA", - "CM_ACCEPTED", - "CM_REJECTING", - "CM_REJECTED", - "CM_CONNECTED", - "CM_RELEASE", - "CM_DISC_PENDING", - "CM_DISCONNECTED", - "CM_DESTROY", - "CM_RTU_PENDING", - "CM_DISC_RECV", - "CM_FREE" - }; - return ((st < 0 || st > 16) ? "Invalid CM state?" : state[st]); -} - -STATIC _INLINE_ char * dapl_cm_op_str(IN int op) -{ - static char *ops[] = { - "INVALID", - "REQ", - "REP", - "REJ_USER", - "REJ_CM", - "RTU", - "DREQ", - "DREP", - }; - return ((op < 1 || op > 7) ? "Invalid OP?" : ops[op]); -} - -#endif /* _DAPL_IB_COMMON_H_ */ +/* + * Copyright (c) 2009 Intel Corporation. All rights reserved. + * + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + */ + +/* + * Definitions common to all OpenIB providers, cma, scm, ucm + */ + +#ifndef _DAPL_IB_COMMON_H_ +#define _DAPL_IB_COMMON_H_ + +#include + +#ifdef DAT_EXTENSIONS +#include +#endif + +#ifndef __cplusplus +#define false 0 +#define true 1 +#endif /*__cplusplus */ + +/* Typedefs to map common DAPL provider types to IB verbs */ +typedef struct ibv_qp *ib_qp_handle_t; +typedef struct ibv_cq *ib_cq_handle_t; +typedef struct ibv_pd *ib_pd_handle_t; +typedef struct ibv_mr *ib_mr_handle_t; +typedef struct ibv_mw *ib_mw_handle_t; +typedef struct ibv_wc ib_work_completion_t; +typedef struct ibv_ah *ib_ah_handle_t; +typedef union ibv_gid *ib_gid_handle_t; + +/* HCA context type maps to IB verbs */ +typedef struct ibv_context *ib_hca_handle_t; +typedef ib_hca_handle_t dapl_ibal_ca_t; + +/* QP info to exchange, wire protocol version for these CM's */ +#define DCM_VER 6 + +/* CM private data areas, same for all operations */ +#define DCM_MAX_PDATA_SIZE 118 + +/* + * UCM DAPL IB/QP address (lid, qp_num, gid) mapping to + * DAT_IA_ADDRESS_PTR, DAT_SOCK_ADDR2 (28 bytes) + * For applications, like MPI, that exchange IA_ADDRESS + * across the fabric before connecting, it eliminates the + * overhead of name and address resolution to the destination's + * CM services. UCM provider uses the following for + * DAT_IA_ADDRESS. Note: family == AF_INET6 to insure proper + * callee storage for address. + */ +union dcm_addr { + DAT_SOCK_ADDR6 so; + struct { + uint16_t family; /* sin6_family */ + uint16_t lid; /* sin6_port */ + uint32_t qpn; /* sin6_flowinfo */ + uint8_t gid[16]; /* sin6_addr */ + uint16_t port; /* sin6_scope_id */ + uint8_t sl; + uint8_t qp_type; + } ib; +}; + +/* 256 bytes total; default max_inline_send, min IB MTU size */ +typedef struct _ib_cm_msg +{ + uint16_t ver; + uint16_t op; + uint16_t sport; /* src cm port */ + uint16_t dport; /* dst cm port */ + uint32_t sqpn; /* src cm qpn */ + uint32_t dqpn; /* dst cm qpn */ + uint16_t p_size; + uint8_t resv[14]; + union dcm_addr saddr; + union dcm_addr daddr; + union dcm_addr saddr_alt; + union dcm_addr daddr_alt; + uint8_t p_data[DCM_MAX_PDATA_SIZE]; + +} ib_cm_msg_t; + +/* CM events */ +typedef enum { + IB_CME_CONNECTED, + IB_CME_DISCONNECTED, + IB_CME_DISCONNECTED_ON_LINK_DOWN, + IB_CME_CONNECTION_REQUEST_PENDING, + IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA, + IB_CME_CONNECTION_REQUEST_ACKED, + IB_CME_DESTINATION_REJECT, + IB_CME_DESTINATION_REJECT_PRIVATE_DATA, + IB_CME_DESTINATION_UNREACHABLE, + IB_CME_TOO_MANY_CONNECTION_REQUESTS, + IB_CME_LOCAL_FAILURE, + IB_CME_BROKEN, + IB_CME_TIMEOUT +} ib_cm_events_t; + +/* Operation and state mappings */ +typedef int ib_send_op_type_t; +typedef struct ibv_sge ib_data_segment_t; +typedef enum ibv_qp_state ib_qp_state_t; +typedef enum ibv_event_type ib_async_event_type; +typedef struct ibv_async_event ib_error_record_t; + +/* CQ notifications */ +typedef enum +{ + IB_NOTIFY_ON_NEXT_COMP, + IB_NOTIFY_ON_SOLIC_COMP + +} ib_notification_type_t; + +/* other mappings */ +typedef int ib_bool_t; +typedef union ibv_gid GID; +typedef char *IB_HCA_NAME; +typedef uint16_t ib_hca_port_t; + +/* Definitions */ +#define IB_INVALID_HANDLE NULL + +/* inline send rdma threshold */ +#define INLINE_SEND_IWARP_DEFAULT 64 +#define INLINE_SEND_IB_DEFAULT 256 + +/* qkey for UD QP's */ +#define DAT_UD_QKEY 0x78654321 + +/* RC timer - retry count defaults */ +#define DCM_ACK_TIMER 16 /* 5 bits, 4.096us*2^ack_timer. 16== 268ms */ +#define DCM_ACK_RETRY 7 /* 3 bits, 7 * 268ms = 1.8 seconds */ +#define DCM_RNR_TIMER 12 /* 5 bits, 12 =.64ms, 28 =163ms, 31 =491ms */ +#define DCM_RNR_RETRY 7 /* 3 bits, 7 == infinite */ +#define DCM_IB_MTU 2048 + +/* Global routing defaults */ +#define DCM_GLOBAL 0 /* global routing is disabled */ +#define DCM_HOP_LIMIT 0xff +#define DCM_TCLASS 0 + +/* DAPL uCM timers, default queue sizes */ +#define DCM_RETRY_CNT 15 +#define DCM_REP_TIME 800 /* reply timeout in m_secs */ +#define DCM_RTU_TIME 400 /* rtu timeout in m_secs */ +#define DCM_QP_SIZE 500 /* uCM tx, rx qp size */ +#define DCM_CQ_SIZE 500 /* uCM cq size */ +#define DCM_TX_BURST 50 /* uCM signal, every TX burst msgs posted */ + +/* DTO OPs, ordered for DAPL ENUM definitions */ +#define OP_RDMA_WRITE IBV_WR_RDMA_WRITE +#define OP_RDMA_WRITE_IMM IBV_WR_RDMA_WRITE_WITH_IMM +#define OP_SEND IBV_WR_SEND +#define OP_SEND_IMM IBV_WR_SEND_WITH_IMM +#define OP_RDMA_READ IBV_WR_RDMA_READ +#define OP_COMP_AND_SWAP IBV_WR_ATOMIC_CMP_AND_SWP +#define OP_FETCH_AND_ADD IBV_WR_ATOMIC_FETCH_AND_ADD +#define OP_RECEIVE 7 /* internal op */ +#define OP_RECEIVE_IMM 8 /* rdma write with immed, internel op */ +#define OP_RECEIVE_MSG_IMM 9 /* recv msg with immed, internel op */ +#define OP_BIND_MW 10 /* internal op */ +#define OP_SEND_UD 11 /* internal op */ +#define OP_RECV_UD 12 /* internal op */ +#define OP_INVALID 0xff + +/* Definitions to map QP state */ +#define IB_QP_STATE_RESET IBV_QPS_RESET +#define IB_QP_STATE_INIT IBV_QPS_INIT +#define IB_QP_STATE_RTR IBV_QPS_RTR +#define IB_QP_STATE_RTS IBV_QPS_RTS +#define IB_QP_STATE_SQD IBV_QPS_SQD +#define IB_QP_STATE_SQE IBV_QPS_SQE +#define IB_QP_STATE_ERROR IBV_QPS_ERR + +/* Definitions for ibverbs/mthca return codes, should be defined in verbs.h */ +/* some are errno and some are -n values */ + +/** + * ibv_get_device_name - Return kernel device name + * ibv_get_device_guid - Return device's node GUID + * ibv_open_device - Return ibv_context or NULL + * ibv_close_device - Return 0, (errno?) + * ibv_get_async_event - Return 0, -1 + * ibv_alloc_pd - Return ibv_pd, NULL + * ibv_dealloc_pd - Return 0, errno + * ibv_reg_mr - Return ibv_mr, NULL + * ibv_dereg_mr - Return 0, errno + * ibv_create_cq - Return ibv_cq, NULL + * ibv_destroy_cq - Return 0, errno + * ibv_get_cq_event - Return 0 & ibv_cq/context, int + * ibv_poll_cq - Return n & ibv_wc, 0 ok, -1 empty, -2 error + * ibv_req_notify_cq - Return 0 (void?) + * ibv_create_qp - Return ibv_qp, NULL + * ibv_modify_qp - Return 0, errno + * ibv_destroy_qp - Return 0, errno + * ibv_post_send - Return 0, -1 & bad_wr + * ibv_post_recv - Return 0, -1 & bad_wr + */ + +/* async handler for DTO, CQ, QP, and unafiliated */ +typedef void (*ib_async_dto_handler_t)( + IN ib_hca_handle_t ib_hca_handle, + IN ib_error_record_t *err_code, + IN void *context); + +typedef void (*ib_async_cq_handler_t)( + IN ib_hca_handle_t ib_hca_handle, + IN ib_cq_handle_t ib_cq_handle, + IN ib_error_record_t *err_code, + IN void *context); + +typedef void (*ib_async_qp_handler_t)( + IN ib_hca_handle_t ib_hca_handle, + IN ib_qp_handle_t ib_qp_handle, + IN ib_error_record_t *err_code, + IN void *context); + +typedef void (*ib_async_handler_t)( + IN ib_hca_handle_t ib_hca_handle, + IN ib_error_record_t *err_code, + IN void *context); + +typedef enum +{ + IB_THREAD_INIT, + IB_THREAD_CREATE, + IB_THREAD_RUN, + IB_THREAD_CANCEL, + IB_THREAD_EXIT + +} ib_thread_state_t; + +typedef enum dapl_cm_op +{ + DCM_REQ = 1, + DCM_REP, + DCM_REJ_USER, /* user reject */ + DCM_REJ_CM, /* cm reject, no SID */ + DCM_RTU, + DCM_DREQ, + DCM_DREP + +} DAPL_CM_OP; + +typedef enum dapl_cm_state +{ + DCM_INIT, + DCM_LISTEN, + DCM_CONN_PENDING, + DCM_REP_PENDING, + DCM_ACCEPTING, + DCM_ACCEPTING_DATA, + DCM_ACCEPTED, + DCM_REJECTING, + DCM_REJECTED, + DCM_CONNECTED, + DCM_RELEASE, + DCM_DISC_PENDING, + DCM_DISCONNECTED, + DCM_DESTROY, + DCM_RTU_PENDING, + DCM_DISC_RECV, + DCM_FREE, + +} DAPL_CM_STATE; + +/* provider specfic fields for shared memory support */ +typedef uint32_t ib_shm_transport_t; + +/* prototypes */ +int32_t dapls_ib_init(void); +int32_t dapls_ib_release(void); + +/* util.c */ +enum ibv_mtu dapl_ib_mtu(int mtu); +char *dapl_ib_mtu_str(enum ibv_mtu mtu); +int getipaddr_netdev(char *name, char *addr, int addr_len); +DAT_RETURN getlocalipaddr(char *addr, int addr_len); + +/* qp.c */ +DAT_RETURN dapls_modify_qp_ud(IN DAPL_HCA *hca, IN ib_qp_handle_t qp); +DAT_RETURN dapls_modify_qp_state(IN ib_qp_handle_t qp_handle, + IN ib_qp_state_t qp_state, + IN uint32_t qpn, + IN uint16_t lid, + IN ib_gid_handle_t gid); +ib_ah_handle_t dapls_create_ah( IN DAPL_HCA *hca, + IN ib_pd_handle_t pd, + IN ib_qp_handle_t qp, + IN uint16_t lid, + IN ib_gid_handle_t gid); + +/* inline functions */ +STATIC _INLINE_ IB_HCA_NAME dapl_ib_convert_name (IN char *name) +{ + /* use ascii; name of local device */ + return dapl_os_strdup(name); +} + +STATIC _INLINE_ void dapl_ib_release_name (IN IB_HCA_NAME name) +{ + return; +} + +/* + * Convert errno to DAT_RETURN values + */ +STATIC _INLINE_ DAT_RETURN +dapl_convert_errno( IN int err, IN const char *str ) +{ + if (!err) return DAT_SUCCESS; + +#if DAPL_DBG + if ((err != EAGAIN) && (err != ETIMEDOUT)) + dapl_dbg_log (DAPL_DBG_TYPE_ERR," %s %s\n", str, strerror(err)); +#endif + + switch( err ) + { + case EOVERFLOW : return DAT_LENGTH_ERROR; + case EACCES : return DAT_PRIVILEGES_VIOLATION; + case EPERM : return DAT_PROTECTION_VIOLATION; + case EINVAL : return DAT_INVALID_HANDLE; + case EISCONN : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_CONNECTED; + case ECONNREFUSED : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_NOTREADY; + case ETIMEDOUT : return DAT_TIMEOUT_EXPIRED; + case ENETUNREACH: return DAT_INVALID_ADDRESS | DAT_INVALID_ADDRESS_UNREACHABLE; + case EADDRINUSE : return DAT_CONN_QUAL_IN_USE; + case EALREADY : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_ACTCONNPENDING; + case ENOMEM : return DAT_INSUFFICIENT_RESOURCES; + case EAGAIN : return DAT_QUEUE_EMPTY; + case EINTR : return DAT_INTERRUPTED_CALL; + case EAFNOSUPPORT : return DAT_INVALID_ADDRESS | DAT_INVALID_ADDRESS_MALFORMED; + case EFAULT : + default : return DAT_INTERNAL_ERROR; + } + } + +STATIC _INLINE_ char * dapl_cm_state_str(IN int st) +{ + static char *state[] = { + "CM_INIT", + "CM_LISTEN", + "CM_CONN_PENDING", + "CM_REP_PENDING", + "CM_ACCEPTING", + "CM_ACCEPTING_DATA", + "CM_ACCEPTED", + "CM_REJECTING", + "CM_REJECTED", + "CM_CONNECTED", + "CM_RELEASE", + "CM_DISC_PENDING", + "CM_DISCONNECTED", + "CM_DESTROY", + "CM_RTU_PENDING", + "CM_DISC_RECV", + "CM_FREE" + }; + return ((st < 0 || st > 16) ? "Invalid CM state?" : state[st]); +} + +STATIC _INLINE_ char * dapl_cm_op_str(IN int op) +{ + static char *ops[] = { + "INVALID", + "REQ", + "REP", + "REJ_USER", + "REJ_CM", + "RTU", + "DREQ", + "DREP", + }; + return ((op < 1 || op > 7) ? "Invalid OP?" : ops[op]); +} + +#endif /* _DAPL_IB_COMMON_H_ */ diff --git a/branches/WOF2-3/ulp/dapl2/dapl/openib_common/qp.c b/branches/WOF2-3/ulp/dapl2/dapl/openib_common/qp.c index 179eef0e..2d326357 100644 --- a/branches/WOF2-3/ulp/dapl2/dapl/openib_common/qp.c +++ b/branches/WOF2-3/ulp/dapl2/dapl/openib_common/qp.c @@ -1,614 +1,664 @@ -/* - * This Software is licensed under one of the following licenses: - * - * 1) under the terms of the "Common Public License 1.0" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/cpl.php. - * - * 2) under the terms of the "The BSD License" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/bsd-license.php. - * - * 3) under the terms of the "GNU General Public License (GPL) Version 2" a - * copy of which is available from the Open Source Initiative, see - * http://www.opensource.org/licenses/gpl-license.php. - * - * Licensee has the right to choose one of the above licenses. - * - * Redistributions of source code must retain the above copyright - * notice and one of the license notices. - * - * Redistributions in binary form must reproduce both the above copyright - * notice, one of the license notices in the documentation - * and/or other materials provided with the distribution. - */ -#include "dapl.h" -#include "dapl_adapter_util.h" -#include "dapl_ep_util.h" - -/* - * dapl_ib_qp_alloc - * - * Alloc a QP - * - * Input: - * *ep_ptr pointer to EP INFO - * ib_hca_handle provider HCA handle - * ib_pd_handle provider protection domain handle - * cq_recv provider recv CQ handle - * cq_send provider send CQ handle - * - * Output: - * none - * - * Returns: - * DAT_SUCCESS - * DAT_INSUFFICIENT_RESOURCES - * DAT_INTERNAL_ERROR - * - */ -DAT_RETURN -dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr, - IN DAPL_EP * ep_ptr, IN DAPL_EP * ep_ctx_ptr) -{ - DAT_EP_ATTR *attr; - DAPL_EVD *rcv_evd, *req_evd; - ib_cq_handle_t rcv_cq, req_cq; - ib_pd_handle_t ib_pd_handle; - struct ibv_qp_init_attr qp_create; -#ifdef _OPENIB_CMA_ - dp_ib_cm_handle_t conn; -#endif - dapl_dbg_log(DAPL_DBG_TYPE_EP, - " qp_alloc: ia_ptr %p ep_ptr %p ep_ctx_ptr %p\n", - ia_ptr, ep_ptr, ep_ctx_ptr); - - attr = &ep_ptr->param.ep_attr; - ib_pd_handle = ((DAPL_PZ *) ep_ptr->param.pz_handle)->pd_handle; - rcv_evd = (DAPL_EVD *) ep_ptr->param.recv_evd_handle; - req_evd = (DAPL_EVD *) ep_ptr->param.request_evd_handle; - - /* - * DAT allows usage model of EP's with no EVD's but IB does not. - * Create a CQ with zero entries under the covers to support and - * catch any invalid posting. - */ - if (rcv_evd != DAT_HANDLE_NULL) - rcv_cq = rcv_evd->ib_cq_handle; - else if (!ia_ptr->hca_ptr->ib_trans.ib_cq_empty) - rcv_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty; - else { - struct ibv_comp_channel *channel; - - channel = ibv_create_comp_channel(ia_ptr->hca_ptr->ib_hca_handle); - if (!channel) - return (dapl_convert_errno(ENOMEM, "create_cq")); - - /* Call IB verbs to create CQ */ - rcv_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle, - 0, NULL, channel, 0); - - if (rcv_cq == IB_INVALID_HANDLE) { - ibv_destroy_comp_channel(channel); - return (dapl_convert_errno(ENOMEM, "create_cq")); - } - - ia_ptr->hca_ptr->ib_trans.ib_cq_empty = rcv_cq; - } - if (req_evd != DAT_HANDLE_NULL) - req_cq = req_evd->ib_cq_handle; - else - req_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty; - - /* - * IMPLEMENTATION NOTE: - * uDAPL allows consumers to post buffers on the EP after creation - * and before a connect request (outbound and inbound). This forces - * a binding to a device during the hca_open call and requires the - * consumer to predetermine which device to listen on or connect from. - * This restriction eliminates any option of listening or connecting - * over multiple devices. uDAPL should add API's to resolve addresses - * and bind to the device at the approriate time (before connect - * and after CR arrives). Discovery should happen at connection time - * based on addressing and not on static configuration during open. - */ - -#ifdef _OPENIB_CMA_ - /* Allocate CM and initialize lock */ - if ((conn = dapls_ib_cm_create(ep_ptr)) == NULL) - return (dapl_convert_errno(ENOMEM, "cm_create")); - - /* open identifies the local device; per DAT specification */ - if (rdma_bind_addr(conn->cm_id, - (struct sockaddr *)&ia_ptr->hca_ptr->hca_address)) { - dapls_cm_free(conn); - return (dapl_convert_errno(EAFNOSUPPORT, "rdma_bind_addr")); - } -#endif - /* Setup attributes and create qp */ - dapl_os_memzero((void *)&qp_create, sizeof(qp_create)); - qp_create.send_cq = req_cq; - qp_create.cap.max_send_wr = attr->max_request_dtos; - qp_create.cap.max_send_sge = attr->max_request_iov; - qp_create.cap.max_inline_data = - ia_ptr->hca_ptr->ib_trans.max_inline_send; - qp_create.qp_type = IBV_QPT_RC; - qp_create.qp_context = (void *)ep_ptr; - -#ifdef DAT_EXTENSIONS - if (attr->service_type == DAT_IB_SERVICE_TYPE_UD) { -#ifdef _OPENIB_CMA_ - return (DAT_NOT_IMPLEMENTED); -#endif - qp_create.qp_type = IBV_QPT_UD; - if (attr->max_message_size > - (128 << ia_ptr->hca_ptr->ib_trans.mtu)) { - return (DAT_INVALID_PARAMETER | DAT_INVALID_ARG6); - } - } -#endif - - /* ibv assumes rcv_cq is never NULL, set to req_cq */ - if (rcv_cq == NULL) { - qp_create.recv_cq = req_cq; - qp_create.cap.max_recv_wr = 0; - qp_create.cap.max_recv_sge = 0; - } else { - qp_create.recv_cq = rcv_cq; - qp_create.cap.max_recv_wr = attr->max_recv_dtos; - qp_create.cap.max_recv_sge = attr->max_recv_iov; - } - -#ifdef _OPENIB_CMA_ - if (rdma_create_qp(conn->cm_id, ib_pd_handle, &qp_create)) { - dapls_cm_free(conn); - return (dapl_convert_errno(errno, "rdma_create_qp")); - } - ep_ptr->qp_handle = conn->cm_id->qp; - ep_ptr->qp_state = IBV_QPS_INIT; - - ep_ptr->param.local_port_qual = rdma_get_src_port(conn->cm_id); -#else - ep_ptr->qp_handle = ibv_create_qp(ib_pd_handle, &qp_create); - if (!ep_ptr->qp_handle) - return (dapl_convert_errno(ENOMEM, "create_qp")); - - /* Setup QP attributes for INIT state on the way out */ - if (dapls_modify_qp_state(ep_ptr->qp_handle, - IBV_QPS_INIT, 0, 0, 0) != DAT_SUCCESS) { - ibv_destroy_qp(ep_ptr->qp_handle); - ep_ptr->qp_handle = IB_INVALID_HANDLE; - return DAT_INTERNAL_ERROR; - } -#endif - dapl_dbg_log(DAPL_DBG_TYPE_EP, - " qp_alloc: qpn %p type %d sq %d,%d rq %d,%d\n", - ep_ptr->qp_handle->qp_num, ep_ptr->qp_handle->qp_type, - qp_create.cap.max_send_wr, qp_create.cap.max_send_sge, - qp_create.cap.max_recv_wr, qp_create.cap.max_recv_sge); - - return DAT_SUCCESS; -} - -/* - * dapl_ib_qp_free - * - * Free a QP - * - * Input: - * ia_handle IA handle - * *ep_ptr pointer to EP INFO - * - * Output: - * none - * - * Returns: - * DAT_SUCCESS - * dapl_convert_errno - * - */ -DAT_RETURN dapls_ib_qp_free(IN DAPL_IA * ia_ptr, IN DAPL_EP * ep_ptr) -{ -#ifdef _OPENIB_CMA_ - dp_ib_cm_handle_t cm_ptr = dapl_get_cm_from_ep(ep_ptr); - - dapl_os_lock(&ep_ptr->header.lock); - if (cm_ptr && cm_ptr->cm_id->qp) { - rdma_destroy_qp(cm_ptr->cm_id); - cm_ptr->cm_id->qp = NULL; - ep_ptr->qp_handle = NULL; - } -#else - dapl_os_lock(&ep_ptr->header.lock); - if (ep_ptr->qp_handle != NULL) { - /* force error state to flush queue, then destroy */ - dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, 0,0,0); - - if (ibv_destroy_qp(ep_ptr->qp_handle)) { - dapl_log(DAPL_DBG_TYPE_ERR, - " qp_free: ibv_destroy_qp error - %s\n", - strerror(errno)); - } - ep_ptr->qp_handle = NULL; - } -#endif - dapl_os_unlock(&ep_ptr->header.lock); - return DAT_SUCCESS; -} - -/* - * dapl_ib_qp_modify - * - * Set the QP to the parameters specified in an EP_PARAM - * - * The EP_PARAM structure that is provided has been - * sanitized such that only non-zero values are valid. - * - * Input: - * ib_hca_handle HCA handle - * qp_handle QP handle - * ep_attr Sanitized EP Params - * - * Output: - * none - * - * Returns: - * DAT_SUCCESS - * DAT_INSUFFICIENT_RESOURCES - * DAT_INVALID_PARAMETER - * - */ -DAT_RETURN -dapls_ib_qp_modify(IN DAPL_IA * ia_ptr, - IN DAPL_EP * ep_ptr, IN DAT_EP_ATTR * attr) -{ - struct ibv_qp_attr qp_attr; - - if (ep_ptr->qp_handle == IB_INVALID_HANDLE) - return DAT_INVALID_PARAMETER; - - /* - * EP state, qp_handle state should be an indication - * of current state but the only way to be sure is with - * a user mode ibv_query_qp call which is NOT available - */ - - /* move to error state if necessary */ - if ((ep_ptr->qp_state == IB_QP_STATE_ERROR) && - (ep_ptr->qp_handle->state != IBV_QPS_ERR)) { - return (dapls_modify_qp_state(ep_ptr->qp_handle, - IBV_QPS_ERR, 0, 0, 0)); - } - - /* - * Check if we have the right qp_state to modify attributes - */ - if ((ep_ptr->qp_handle->state != IBV_QPS_RTR) && - (ep_ptr->qp_handle->state != IBV_QPS_RTS)) - return DAT_INVALID_STATE; - - /* Adjust to current EP attributes */ - dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr)); - qp_attr.cap.max_send_wr = attr->max_request_dtos; - qp_attr.cap.max_recv_wr = attr->max_recv_dtos; - qp_attr.cap.max_send_sge = attr->max_request_iov; - qp_attr.cap.max_recv_sge = attr->max_recv_iov; - - dapl_dbg_log(DAPL_DBG_TYPE_EP, - "modify_qp: qp %p sq %d,%d, rq %d,%d\n", - ep_ptr->qp_handle, - qp_attr.cap.max_send_wr, qp_attr.cap.max_send_sge, - qp_attr.cap.max_recv_wr, qp_attr.cap.max_recv_sge); - - if (ibv_modify_qp(ep_ptr->qp_handle, &qp_attr, IBV_QP_CAP)) { - dapl_dbg_log(DAPL_DBG_TYPE_ERR, - "modify_qp: modify ep %p qp %p failed\n", - ep_ptr, ep_ptr->qp_handle); - return (dapl_convert_errno(errno, "modify_qp_state")); - } - - return DAT_SUCCESS; -} - -/* - * dapls_ib_reinit_ep - * - * Move the QP to INIT state again. - * - * Input: - * ep_ptr DAPL_EP - * - * Output: - * none - * - * Returns: - * void - * - */ -#if defined(_WIN32) || defined(_WIN64) || defined(_OPENIB_CMA_) -void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr) -{ - dp_ib_cm_handle_t cm_ptr, next_cm_ptr; - - /* work around bug in low level driver - 3/24/09 */ - /* RTS -> RESET -> INIT -> ERROR QP transition crashes system */ - if (ep_ptr->qp_handle != IB_INVALID_HANDLE) { - dapls_ib_qp_free(ep_ptr->header.owner_ia, ep_ptr); - - /* free any CM object's created */ - cm_ptr = (dapl_llist_is_empty(&ep_ptr->cm_list_head) - ? NULL : dapl_llist_peek_head(&ep_ptr->cm_list_head)); - while (cm_ptr != NULL) { - next_cm_ptr = dapl_llist_next_entry(&ep_ptr->cm_list_head, - &cm_ptr->list_entry); - dapls_cm_free(cm_ptr); - cm_ptr = next_cm_ptr; - } - dapls_ib_qp_alloc(ep_ptr->header.owner_ia, ep_ptr, ep_ptr); - } -} -#else // _WIN32 || _WIN64 -void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr) -{ - if (ep_ptr->qp_handle != IB_INVALID_HANDLE && - ep_ptr->qp_handle->qp_type != IBV_QPT_UD) { - /* move to RESET state and then to INIT */ - dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_RESET,0,0,0); - dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_INIT,0,0,0); - } -} -#endif // _WIN32 || _WIN64 - -/* - * Generic QP modify for init, reset, error, RTS, RTR - * For UD, create_ah on RTR, qkey on INIT - * CM msg provides QP attributes, info in network order - */ -DAT_RETURN -dapls_modify_qp_state(IN ib_qp_handle_t qp_handle, - IN ib_qp_state_t qp_state, - IN uint32_t qpn, - IN uint16_t lid, - IN ib_gid_handle_t gid) -{ - struct ibv_qp_attr qp_attr; - enum ibv_qp_attr_mask mask = IBV_QP_STATE; - DAPL_EP *ep_ptr = (DAPL_EP *) qp_handle->qp_context; - DAPL_IA *ia_ptr = ep_ptr->header.owner_ia; - int ret; - - dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr)); - qp_attr.qp_state = qp_state; - - switch (qp_state) { - case IBV_QPS_RTR: - dapl_dbg_log(DAPL_DBG_TYPE_EP, - " QPS_RTR: type %d qpn 0x%x gid %p (%d) lid 0x%x" - " port %d ep %p qp_state %d \n", - qp_handle->qp_type, ntohl(qpn), gid, - ia_ptr->hca_ptr->ib_trans.global, - ntohs(lid), ia_ptr->hca_ptr->port_num, - ep_ptr, ep_ptr->qp_state); - - mask |= IBV_QP_AV | - IBV_QP_PATH_MTU | - IBV_QP_DEST_QPN | - IBV_QP_RQ_PSN | - IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER; - - qp_attr.dest_qp_num = ntohl(qpn); - qp_attr.rq_psn = 1; - qp_attr.path_mtu = ia_ptr->hca_ptr->ib_trans.mtu; - qp_attr.max_dest_rd_atomic = - ep_ptr->param.ep_attr.max_rdma_read_out; - qp_attr.min_rnr_timer = - ia_ptr->hca_ptr->ib_trans.rnr_timer; - - /* address handle. RC and UD */ - qp_attr.ah_attr.dlid = ntohs(lid); - if (gid && ia_ptr->hca_ptr->ib_trans.global) { - dapl_dbg_log(DAPL_DBG_TYPE_EP, - " QPS_RTR: GID Subnet 0x" F64x " ID 0x" F64x "\n", - (unsigned long long)htonll(gid->global.subnet_prefix), - (unsigned long long)htonll(gid->global.interface_id)); - - qp_attr.ah_attr.is_global = 1; - qp_attr.ah_attr.grh.dgid.global.subnet_prefix = - gid->global.subnet_prefix; - qp_attr.ah_attr.grh.dgid.global.interface_id = - gid->global.interface_id; - qp_attr.ah_attr.grh.hop_limit = - ia_ptr->hca_ptr->ib_trans.hop_limit; - qp_attr.ah_attr.grh.traffic_class = - ia_ptr->hca_ptr->ib_trans.tclass; - } - qp_attr.ah_attr.sl = ia_ptr->hca_ptr->ib_trans.sl; - qp_attr.ah_attr.src_path_bits = 0; - qp_attr.ah_attr.port_num = ia_ptr->hca_ptr->port_num; - - /* UD: already in RTR, RTS state */ - if (qp_handle->qp_type == IBV_QPT_UD) { - mask = IBV_QP_STATE; - if (ep_ptr->qp_state == IBV_QPS_RTR || - ep_ptr->qp_state == IBV_QPS_RTS) - return DAT_SUCCESS; - } - break; - case IBV_QPS_RTS: - if (qp_handle->qp_type == IBV_QPT_RC) { - mask |= IBV_QP_SQ_PSN | - IBV_QP_TIMEOUT | - IBV_QP_RETRY_CNT | - IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC; - qp_attr.timeout = - ia_ptr->hca_ptr->ib_trans.ack_timer; - qp_attr.retry_cnt = - ia_ptr->hca_ptr->ib_trans.ack_retry; - qp_attr.rnr_retry = - ia_ptr->hca_ptr->ib_trans.rnr_retry; - qp_attr.max_rd_atomic = - ep_ptr->param.ep_attr.max_rdma_read_out; - } - /* RC and UD */ - qp_attr.qp_state = IBV_QPS_RTS; - qp_attr.sq_psn = 1; - - dapl_dbg_log(DAPL_DBG_TYPE_EP, - " QPS_RTS: psn %x rd_atomic %d ack %d " - " retry %d rnr_retry %d ep %p qp_state %d\n", - qp_attr.sq_psn, qp_attr.max_rd_atomic, - qp_attr.timeout, qp_attr.retry_cnt, - qp_attr.rnr_retry, ep_ptr, - ep_ptr->qp_state); - - if (qp_handle->qp_type == IBV_QPT_UD) { - /* already RTS, multi remote AH's on QP */ - if (ep_ptr->qp_state == IBV_QPS_RTS) - return DAT_SUCCESS; - else - mask = IBV_QP_STATE | IBV_QP_SQ_PSN; - } - break; - case IBV_QPS_INIT: - mask |= IBV_QP_PKEY_INDEX | IBV_QP_PORT; - if (qp_handle->qp_type == IBV_QPT_RC) { - mask |= IBV_QP_ACCESS_FLAGS; - qp_attr.qp_access_flags = - IBV_ACCESS_LOCAL_WRITE | - IBV_ACCESS_REMOTE_WRITE | - IBV_ACCESS_REMOTE_READ | - IBV_ACCESS_REMOTE_ATOMIC | - IBV_ACCESS_MW_BIND; - } - - if (qp_handle->qp_type == IBV_QPT_UD) { - /* already INIT, multi remote AH's on QP */ - if (ep_ptr->qp_state == IBV_QPS_INIT) - return DAT_SUCCESS; - mask |= IBV_QP_QKEY; - qp_attr.qkey = DAT_UD_QKEY; - } - - qp_attr.pkey_index = ia_ptr->hca_ptr->ib_trans.pkey_idx; - qp_attr.port_num = ia_ptr->hca_ptr->port_num; - - dapl_dbg_log(DAPL_DBG_TYPE_EP, - " QPS_INIT: pi %x port %x acc %x qkey 0x%x\n", - qp_attr.pkey_index, qp_attr.port_num, - qp_attr.qp_access_flags, qp_attr.qkey); - break; - default: - break; - } - - ret = ibv_modify_qp(qp_handle, &qp_attr, mask); - if (ret == 0) { - ep_ptr->qp_state = qp_state; - return DAT_SUCCESS; - } else { - return (dapl_convert_errno(errno, "modify_qp_state")); - } -} - -/* Modify UD type QP from init, rtr, rts, info network order */ -DAT_RETURN -dapls_modify_qp_ud(IN DAPL_HCA *hca, IN ib_qp_handle_t qp) -{ - struct ibv_qp_attr qp_attr; - - /* modify QP, setup and prepost buffers */ - dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr)); - qp_attr.qp_state = IBV_QPS_INIT; - qp_attr.pkey_index = hca->ib_trans.pkey_idx; - qp_attr.port_num = hca->port_num; - qp_attr.qkey = DAT_UD_QKEY; - if (ibv_modify_qp(qp, &qp_attr, - IBV_QP_STATE | - IBV_QP_PKEY_INDEX | - IBV_QP_PORT | - IBV_QP_QKEY)) { - dapl_log(DAPL_DBG_TYPE_ERR, - " modify_ud_qp INIT: ERR %s\n", strerror(errno)); - return (dapl_convert_errno(errno, "modify_qp")); - } - dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr)); - qp_attr.qp_state = IBV_QPS_RTR; - if (ibv_modify_qp(qp, &qp_attr,IBV_QP_STATE)) { - dapl_log(DAPL_DBG_TYPE_ERR, - " modify_ud_qp RTR: ERR %s\n", strerror(errno)); - return (dapl_convert_errno(errno, "modify_qp")); - } - dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr)); - qp_attr.qp_state = IBV_QPS_RTS; - qp_attr.sq_psn = 1; - if (ibv_modify_qp(qp, &qp_attr, - IBV_QP_STATE | IBV_QP_SQ_PSN)) { - dapl_log(DAPL_DBG_TYPE_ERR, - " modify_ud_qp RTS: ERR %s\n", strerror(errno)); - return (dapl_convert_errno(errno, "modify_qp")); - } - return DAT_SUCCESS; -} - -/* Create address handle for remote QP, info in network order */ -ib_ah_handle_t -dapls_create_ah(IN DAPL_HCA *hca, - IN ib_pd_handle_t pd, - IN ib_qp_handle_t qp, - IN uint16_t lid, - IN ib_gid_handle_t gid) -{ - struct ibv_qp_attr qp_attr; - ib_ah_handle_t ah; - - if (qp->qp_type != IBV_QPT_UD) { - dapl_log(DAPL_DBG_TYPE_ERR, - " create_ah ERR: QP_type != UD\n"); - return NULL; - } - - dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr)); - qp_attr.qp_state = IBV_QP_STATE; - - /* address handle. RC and UD */ - qp_attr.ah_attr.dlid = ntohs(lid); - if (gid != NULL) { - dapl_log(DAPL_DBG_TYPE_CM, "dapl_create_ah: with GID\n"); - qp_attr.ah_attr.is_global = 1; - qp_attr.ah_attr.grh.dgid.global.subnet_prefix = - ntohll(gid->global.subnet_prefix); - qp_attr.ah_attr.grh.dgid.global.interface_id = - ntohll(gid->global.interface_id); - qp_attr.ah_attr.grh.hop_limit = hca->ib_trans.hop_limit; - qp_attr.ah_attr.grh.traffic_class = hca->ib_trans.tclass; - } - qp_attr.ah_attr.sl = hca->ib_trans.sl; - qp_attr.ah_attr.src_path_bits = 0; - qp_attr.ah_attr.port_num = hca->port_num; - - dapl_log(DAPL_DBG_TYPE_CM, - " dapls_create_ah: port %x lid %x pd %p ctx %p handle 0x%x\n", - hca->port_num,qp_attr.ah_attr.dlid, pd, pd->context, pd->handle); - - /* UD: create AH for remote side */ - ah = ibv_create_ah(pd, &qp_attr.ah_attr); - if (!ah) { - dapl_log(DAPL_DBG_TYPE_ERR, - " create_ah: ERR %s\n", strerror(errno)); - return NULL; - } - - dapl_log(DAPL_DBG_TYPE_CM, - " dapls_create_ah: AH %p for lid %x\n", - ah, qp_attr.ah_attr.dlid); - - return ah; -} - -/* - * Local variables: - * c-indent-level: 4 - * c-basic-offset: 4 - * tab-width: 8 - * End: - */ +/* + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "dapl.h" +#include "dapl_adapter_util.h" +#include "dapl_ep_util.h" +#include +#include + +/* + * dapl_ib_qp_alloc + * + * Alloc a QP + * + * Input: + * *ep_ptr pointer to EP INFO + * ib_hca_handle provider HCA handle + * ib_pd_handle provider protection domain handle + * cq_recv provider recv CQ handle + * cq_send provider send CQ handle + * + * Output: + * none + * + * Returns: + * DAT_SUCCESS + * DAT_INSUFFICIENT_RESOURCES + * DAT_INTERNAL_ERROR + * + */ +DAT_RETURN +dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr, + IN DAPL_EP * ep_ptr, IN DAPL_EP * ep_ctx_ptr) +{ + DAT_EP_ATTR *attr; + DAPL_EVD *rcv_evd, *req_evd; + ib_cq_handle_t rcv_cq, req_cq; + ib_pd_handle_t ib_pd_handle; + struct ibv_qp_init_attr qp_create; +#ifdef _OPENIB_CMA_ + dp_ib_cm_handle_t conn; +#endif + dapl_dbg_log(DAPL_DBG_TYPE_EP, + " qp_alloc: ia_ptr %p ep_ptr %p ep_ctx_ptr %p\n", + ia_ptr, ep_ptr, ep_ctx_ptr); + + attr = &ep_ptr->param.ep_attr; + ib_pd_handle = ((DAPL_PZ *) ep_ptr->param.pz_handle)->pd_handle; + rcv_evd = (DAPL_EVD *) ep_ptr->param.recv_evd_handle; + req_evd = (DAPL_EVD *) ep_ptr->param.request_evd_handle; + + /* + * DAT allows usage model of EP's with no EVD's but IB does not. + * Create a CQ with zero entries under the covers to support and + * catch any invalid posting. + */ + if (rcv_evd != DAT_HANDLE_NULL) + rcv_cq = rcv_evd->ib_cq_handle; + else if (!ia_ptr->hca_ptr->ib_trans.ib_cq_empty) + rcv_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty; + else { + struct ibv_comp_channel *channel; + + channel = ibv_create_comp_channel(ia_ptr->hca_ptr->ib_hca_handle); + if (!channel) + return (dapl_convert_errno(ENOMEM, "create_cq")); + + /* Call IB verbs to create CQ */ + rcv_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle, + 0, NULL, channel, 0); + + if (rcv_cq == IB_INVALID_HANDLE) { + ibv_destroy_comp_channel(channel); + return (dapl_convert_errno(ENOMEM, "create_cq")); + } + + ia_ptr->hca_ptr->ib_trans.ib_cq_empty = rcv_cq; + } + if (req_evd != DAT_HANDLE_NULL) + req_cq = req_evd->ib_cq_handle; + else + req_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty; + + /* + * IMPLEMENTATION NOTE: + * uDAPL allows consumers to post buffers on the EP after creation + * and before a connect request (outbound and inbound). This forces + * a binding to a device during the hca_open call and requires the + * consumer to predetermine which device to listen on or connect from. + * This restriction eliminates any option of listening or connecting + * over multiple devices. uDAPL should add API's to resolve addresses + * and bind to the device at the approriate time (before connect + * and after CR arrives). Discovery should happen at connection time + * based on addressing and not on static configuration during open. + */ + +#ifdef _OPENIB_CMA_ + /* Allocate CM and initialize lock */ + if ((conn = dapls_ib_cm_create(ep_ptr)) == NULL) + return (dapl_convert_errno(ENOMEM, "cm_create")); + + /* open identifies the local device; per DAT specification */ + if (rdma_bind_addr(conn->cm_id, + (struct sockaddr *)&ia_ptr->hca_ptr->hca_address)) { + dapls_cm_free(conn); + return (dapl_convert_errno(EAFNOSUPPORT, "rdma_bind_addr")); + } +#endif + /* Setup attributes and create qp */ + dapl_os_memzero((void *)&qp_create, sizeof(qp_create)); + qp_create.send_cq = req_cq; + qp_create.cap.max_send_wr = attr->max_request_dtos; + qp_create.cap.max_send_sge = attr->max_request_iov; + qp_create.cap.max_inline_data = + ia_ptr->hca_ptr->ib_trans.max_inline_send; + qp_create.qp_type = IBV_QPT_RC; + qp_create.qp_context = (void *)ep_ptr; + +#ifdef DAT_EXTENSIONS + if (attr->service_type == DAT_IB_SERVICE_TYPE_UD) { +#ifdef _OPENIB_CMA_ + return (DAT_NOT_IMPLEMENTED); +#endif + qp_create.qp_type = IBV_QPT_UD; + if (attr->max_message_size > + (128 << ia_ptr->hca_ptr->ib_trans.mtu)) { + return (DAT_INVALID_PARAMETER | DAT_INVALID_ARG6); + } + } +#endif + + /* ibv assumes rcv_cq is never NULL, set to req_cq */ + if (rcv_cq == NULL) { + qp_create.recv_cq = req_cq; + qp_create.cap.max_recv_wr = 0; + qp_create.cap.max_recv_sge = 0; + } else { + qp_create.recv_cq = rcv_cq; + qp_create.cap.max_recv_wr = attr->max_recv_dtos; + qp_create.cap.max_recv_sge = attr->max_recv_iov; + } + +#ifdef _OPENIB_CMA_ + if (rdma_create_qp(conn->cm_id, ib_pd_handle, &qp_create)) { + dapls_cm_free(conn); + return (dapl_convert_errno(errno, "rdma_create_qp")); + } + ep_ptr->qp_handle = conn->cm_id->qp; + ep_ptr->qp_state = IBV_QPS_INIT; + + ep_ptr->param.local_port_qual = rdma_get_src_port(conn->cm_id); +#else + ep_ptr->qp_handle = ibv_create_qp(ib_pd_handle, &qp_create); + if (!ep_ptr->qp_handle) + return (dapl_convert_errno(ENOMEM, "create_qp")); + + /* Setup QP attributes for INIT state on the way out */ + if (dapls_modify_qp_state(ep_ptr->qp_handle, + IBV_QPS_INIT, 0, 0, 0) != DAT_SUCCESS) { + ibv_destroy_qp(ep_ptr->qp_handle); + ep_ptr->qp_handle = IB_INVALID_HANDLE; + return DAT_INTERNAL_ERROR; + } +#endif + dapl_dbg_log(DAPL_DBG_TYPE_EP, + " qp_alloc: qpn %p type %d sq %d,%d rq %d,%d\n", + ep_ptr->qp_handle->qp_num, ep_ptr->qp_handle->qp_type, + qp_create.cap.max_send_wr, qp_create.cap.max_send_sge, + qp_create.cap.max_recv_wr, qp_create.cap.max_recv_sge); + + return DAT_SUCCESS; +} + +/* + * dapl_ib_qp_free + * + * Free a QP + * + * Input: + * ia_handle IA handle + * *ep_ptr pointer to EP INFO + * + * Output: + * none + * + * Returns: + * DAT_SUCCESS + * dapl_convert_errno + * + */ +DAT_RETURN dapls_ib_qp_free(IN DAPL_IA * ia_ptr, IN DAPL_EP * ep_ptr) +{ + struct ibv_qp *qp; + struct ibv_qp_attr qp_attr; + +#ifdef _OPENIB_CMA_ + dp_ib_cm_handle_t cm_ptr = dapl_get_cm_from_ep(ep_ptr); + if (!cm_ptr) + return DAT_SUCCESS; +#endif + + dapl_os_lock(&ep_ptr->header.lock); + if (ep_ptr->qp_handle != NULL) { + qp = ep_ptr->qp_handle; + dapl_os_unlock(&ep_ptr->header.lock); + + qp_attr.qp_state = IBV_QPS_ERR; + ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE); + dapls_ep_flush_cqs(ep_ptr); + + ep_ptr->qp_handle = NULL; +#ifdef _OPENIB_CMA_ + rdma_destroy_qp(cm_ptr->cm_id); + cm_ptr->cm_id->qp = NULL; +#else + if (ibv_destroy_qp(qp)) { + dapl_log(DAPL_DBG_TYPE_ERR, + " qp_free: ibv_destroy_qp error - %s\n", + strerror(errno)); + } +#endif + } else { + dapl_os_unlock(&ep_ptr->header.lock); + } + return DAT_SUCCESS; +} + +/* + * dapl_ib_qp_modify + * + * Set the QP to the parameters specified in an EP_PARAM + * + * The EP_PARAM structure that is provided has been + * sanitized such that only non-zero values are valid. + * + * Input: + * ib_hca_handle HCA handle + * qp_handle QP handle + * ep_attr Sanitized EP Params + * + * Output: + * none + * + * Returns: + * DAT_SUCCESS + * DAT_INSUFFICIENT_RESOURCES + * DAT_INVALID_PARAMETER + * + */ +DAT_RETURN +dapls_ib_qp_modify(IN DAPL_IA * ia_ptr, + IN DAPL_EP * ep_ptr, IN DAT_EP_ATTR * attr) +{ + struct ibv_qp_attr qp_attr; + + if (ep_ptr->qp_handle == IB_INVALID_HANDLE) + return DAT_INVALID_PARAMETER; + + /* + * EP state, qp_handle state should be an indication + * of current state but the only way to be sure is with + * a user mode ibv_query_qp call which is NOT available + */ + + /* move to error state if necessary */ + if ((ep_ptr->qp_state == IB_QP_STATE_ERROR) && + (ep_ptr->qp_handle->state != IBV_QPS_ERR)) { + return (dapls_modify_qp_state(ep_ptr->qp_handle, + IBV_QPS_ERR, 0, 0, 0)); + } + + /* + * Check if we have the right qp_state to modify attributes + */ + if ((ep_ptr->qp_handle->state != IBV_QPS_RTR) && + (ep_ptr->qp_handle->state != IBV_QPS_RTS)) + return DAT_INVALID_STATE; + + /* Adjust to current EP attributes */ + dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr)); + qp_attr.cap.max_send_wr = attr->max_request_dtos; + qp_attr.cap.max_recv_wr = attr->max_recv_dtos; + qp_attr.cap.max_send_sge = attr->max_request_iov; + qp_attr.cap.max_recv_sge = attr->max_recv_iov; + + dapl_dbg_log(DAPL_DBG_TYPE_EP, + "modify_qp: qp %p sq %d,%d, rq %d,%d\n", + ep_ptr->qp_handle, + qp_attr.cap.max_send_wr, qp_attr.cap.max_send_sge, + qp_attr.cap.max_recv_wr, qp_attr.cap.max_recv_sge); + + if (ibv_modify_qp(ep_ptr->qp_handle, &qp_attr, IBV_QP_CAP)) { + dapl_dbg_log(DAPL_DBG_TYPE_ERR, + "modify_qp: modify ep %p qp %p failed\n", + ep_ptr, ep_ptr->qp_handle); + return (dapl_convert_errno(errno, "modify_qp_state")); + } + + return DAT_SUCCESS; +} + +/* + * dapls_ib_reinit_ep + * + * Move the QP to INIT state again. + * + * Input: + * ep_ptr DAPL_EP + * + * Output: + * none + * + * Returns: + * void + * + */ +#if defined(_WIN32) || defined(_WIN64) || defined(_OPENIB_CMA_) +void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr) +{ + dp_ib_cm_handle_t cm_ptr, next_cm_ptr; + + /* work around bug in low level driver - 3/24/09 */ + /* RTS -> RESET -> INIT -> ERROR QP transition crashes system */ + if (ep_ptr->qp_handle != IB_INVALID_HANDLE) { + dapls_ib_qp_free(ep_ptr->header.owner_ia, ep_ptr); + + /* free any CM object's created */ + cm_ptr = (dapl_llist_is_empty(&ep_ptr->cm_list_head) + ? NULL : dapl_llist_peek_head(&ep_ptr->cm_list_head)); + while (cm_ptr != NULL) { + next_cm_ptr = dapl_llist_next_entry(&ep_ptr->cm_list_head, + &cm_ptr->list_entry); + dapls_cm_free(cm_ptr); + cm_ptr = next_cm_ptr; + } + dapls_ib_qp_alloc(ep_ptr->header.owner_ia, ep_ptr, ep_ptr); + } +} +#else // _WIN32 || _WIN64 +void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr) +{ + if (ep_ptr->qp_handle != IB_INVALID_HANDLE && + ep_ptr->qp_handle->qp_type != IBV_QPT_UD) { + /* move to RESET state and then to INIT */ + dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_RESET,0,0,0); + dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_INIT,0,0,0); + } +} +#endif // _WIN32 || _WIN64 + +#if DAPL_USE_IBACM +uint8_t dapls_get_sl(DAPL_HCA *hca_ptr, uint16_t dlid) +{ + struct rdma_addrinfo hint, *res; + struct ibv_path_record path; + uint8_t sl = hca_ptr->ib_trans.sl; + int ret; + + memset(&path, 0, sizeof path); + path.reversible_numpath = IBV_PATH_RECORD_REVERSIBLE | 1; + path.slid = hca_ptr->ib_trans.lid; + path.dlid = dlid; + + memset(&hint, 0, sizeof hint); + hint.ai_route = &path; + hint.ai_route_len = sizeof(path); + + ret = rdma_getaddrinfo(NULL, NULL, &hint, &res); + if (ret) + goto out; + + if (res->ai_route_len) + sl = ntohs(((struct ibv_path_record *) res->ai_route)-> + qosclass_sl) & 0xF; + + rdma_freeaddrinfo(res); +out: + return sl; +} +#else +uint8_t dapls_get_sl(DAPL_HCA *hca_ptr, uint16_t dlid) +{ + return hca_ptr->ib_trans.sl; +} +#endif + +/* + * Generic QP modify for init, reset, error, RTS, RTR + * For UD, create_ah on RTR, qkey on INIT + * CM msg provides QP attributes, info in network order + */ +DAT_RETURN +dapls_modify_qp_state(IN ib_qp_handle_t qp_handle, + IN ib_qp_state_t qp_state, + IN uint32_t qpn, + IN uint16_t lid, + IN ib_gid_handle_t gid) +{ + struct ibv_qp_attr qp_attr; + enum ibv_qp_attr_mask mask = IBV_QP_STATE; + DAPL_EP *ep_ptr = (DAPL_EP *) qp_handle->qp_context; + DAPL_IA *ia_ptr = ep_ptr->header.owner_ia; + int ret; + + dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr)); + qp_attr.qp_state = qp_state; + + switch (qp_state) { + case IBV_QPS_RTR: + dapl_dbg_log(DAPL_DBG_TYPE_EP, + " QPS_RTR: type %d qpn 0x%x gid %p (%d) lid 0x%x" + " port %d ep %p qp_state %d \n", + qp_handle->qp_type, ntohl(qpn), gid, + ia_ptr->hca_ptr->ib_trans.global, + ntohs(lid), ia_ptr->hca_ptr->port_num, + ep_ptr, ep_ptr->qp_state); + + mask |= IBV_QP_AV | + IBV_QP_PATH_MTU | + IBV_QP_DEST_QPN | + IBV_QP_RQ_PSN | + IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER; + + qp_attr.dest_qp_num = ntohl(qpn); + qp_attr.rq_psn = 1; + qp_attr.path_mtu = ia_ptr->hca_ptr->ib_trans.mtu; + qp_attr.max_dest_rd_atomic = + ep_ptr->param.ep_attr.max_rdma_read_out; + qp_attr.min_rnr_timer = + ia_ptr->hca_ptr->ib_trans.rnr_timer; + + /* address handle. RC and UD */ + qp_attr.ah_attr.dlid = ntohs(lid); + if (gid && ia_ptr->hca_ptr->ib_trans.global) { + dapl_dbg_log(DAPL_DBG_TYPE_EP, + " QPS_RTR: GID Subnet 0x" F64x " ID 0x" F64x "\n", + (unsigned long long)htonll(gid->global.subnet_prefix), + (unsigned long long)htonll(gid->global.interface_id)); + + qp_attr.ah_attr.is_global = 1; + qp_attr.ah_attr.grh.dgid.global.subnet_prefix = + gid->global.subnet_prefix; + qp_attr.ah_attr.grh.dgid.global.interface_id = + gid->global.interface_id; + qp_attr.ah_attr.grh.hop_limit = + ia_ptr->hca_ptr->ib_trans.hop_limit; + qp_attr.ah_attr.grh.traffic_class = + ia_ptr->hca_ptr->ib_trans.tclass; + } + qp_attr.ah_attr.sl = dapls_get_sl(ia_ptr->hca_ptr, lid); + qp_attr.ah_attr.src_path_bits = 0; + qp_attr.ah_attr.port_num = ia_ptr->hca_ptr->port_num; + + /* UD: already in RTR, RTS state */ + if (qp_handle->qp_type == IBV_QPT_UD) { + mask = IBV_QP_STATE; + if (ep_ptr->qp_state == IBV_QPS_RTR || + ep_ptr->qp_state == IBV_QPS_RTS) + return DAT_SUCCESS; + } + break; + case IBV_QPS_RTS: + if (qp_handle->qp_type == IBV_QPT_RC) { + mask |= IBV_QP_SQ_PSN | + IBV_QP_TIMEOUT | + IBV_QP_RETRY_CNT | + IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC; + qp_attr.timeout = + ia_ptr->hca_ptr->ib_trans.ack_timer; + qp_attr.retry_cnt = + ia_ptr->hca_ptr->ib_trans.ack_retry; + qp_attr.rnr_retry = + ia_ptr->hca_ptr->ib_trans.rnr_retry; + qp_attr.max_rd_atomic = + ep_ptr->param.ep_attr.max_rdma_read_out; + } + /* RC and UD */ + qp_attr.qp_state = IBV_QPS_RTS; + qp_attr.sq_psn = 1; + + dapl_dbg_log(DAPL_DBG_TYPE_EP, + " QPS_RTS: psn %x rd_atomic %d ack %d " + " retry %d rnr_retry %d ep %p qp_state %d\n", + qp_attr.sq_psn, qp_attr.max_rd_atomic, + qp_attr.timeout, qp_attr.retry_cnt, + qp_attr.rnr_retry, ep_ptr, + ep_ptr->qp_state); + + if (qp_handle->qp_type == IBV_QPT_UD) { + /* already RTS, multi remote AH's on QP */ + if (ep_ptr->qp_state == IBV_QPS_RTS) + return DAT_SUCCESS; + else + mask = IBV_QP_STATE | IBV_QP_SQ_PSN; + } + break; + case IBV_QPS_INIT: + mask |= IBV_QP_PKEY_INDEX | IBV_QP_PORT; + if (qp_handle->qp_type == IBV_QPT_RC) { + mask |= IBV_QP_ACCESS_FLAGS; + qp_attr.qp_access_flags = + IBV_ACCESS_LOCAL_WRITE | + IBV_ACCESS_REMOTE_WRITE | + IBV_ACCESS_REMOTE_READ | + IBV_ACCESS_REMOTE_ATOMIC | + IBV_ACCESS_MW_BIND; + } + + if (qp_handle->qp_type == IBV_QPT_UD) { + /* already INIT, multi remote AH's on QP */ + if (ep_ptr->qp_state == IBV_QPS_INIT) + return DAT_SUCCESS; + mask |= IBV_QP_QKEY; + qp_attr.qkey = DAT_UD_QKEY; + } + + qp_attr.pkey_index = ia_ptr->hca_ptr->ib_trans.pkey_idx; + qp_attr.port_num = ia_ptr->hca_ptr->port_num; + + dapl_dbg_log(DAPL_DBG_TYPE_EP, + " QPS_INIT: pi %x port %x acc %x qkey 0x%x\n", + qp_attr.pkey_index, qp_attr.port_num, + qp_attr.qp_access_flags, qp_attr.qkey); + break; + default: + break; + } + + ret = ibv_modify_qp(qp_handle, &qp_attr, mask); + if (ret == 0) { + ep_ptr->qp_state = qp_state; + return DAT_SUCCESS; + } else { + return (dapl_convert_errno(errno, "modify_qp_state")); + } +} + +/* Modify UD type QP from init, rtr, rts, info network order */ +DAT_RETURN +dapls_modify_qp_ud(IN DAPL_HCA *hca, IN ib_qp_handle_t qp) +{ + struct ibv_qp_attr qp_attr; + + /* modify QP, setup and prepost buffers */ + dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr)); + qp_attr.qp_state = IBV_QPS_INIT; + qp_attr.pkey_index = hca->ib_trans.pkey_idx; + qp_attr.port_num = hca->port_num; + qp_attr.qkey = DAT_UD_QKEY; + if (ibv_modify_qp(qp, &qp_attr, + IBV_QP_STATE | + IBV_QP_PKEY_INDEX | + IBV_QP_PORT | + IBV_QP_QKEY)) { + dapl_log(DAPL_DBG_TYPE_ERR, + " modify_ud_qp INIT: ERR %s\n", strerror(errno)); + return (dapl_convert_errno(errno, "modify_qp")); + } + dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr)); + qp_attr.qp_state = IBV_QPS_RTR; + if (ibv_modify_qp(qp, &qp_attr,IBV_QP_STATE)) { + dapl_log(DAPL_DBG_TYPE_ERR, + " modify_ud_qp RTR: ERR %s\n", strerror(errno)); + return (dapl_convert_errno(errno, "modify_qp")); + } + dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr)); + qp_attr.qp_state = IBV_QPS_RTS; + qp_attr.sq_psn = 1; + if (ibv_modify_qp(qp, &qp_attr, + IBV_QP_STATE | IBV_QP_SQ_PSN)) { + dapl_log(DAPL_DBG_TYPE_ERR, + " modify_ud_qp RTS: ERR %s\n", strerror(errno)); + return (dapl_convert_errno(errno, "modify_qp")); + } + return DAT_SUCCESS; +} + +/* Create address handle for remote QP, info in network order */ +ib_ah_handle_t +dapls_create_ah(IN DAPL_HCA *hca, + IN ib_pd_handle_t pd, + IN ib_qp_handle_t qp, + IN uint16_t lid, + IN ib_gid_handle_t gid) +{ + struct ibv_qp_attr qp_attr; + ib_ah_handle_t ah; + + if (qp->qp_type != IBV_QPT_UD) { + dapl_log(DAPL_DBG_TYPE_ERR, + " create_ah ERR: QP_type != UD\n"); + return NULL; + } + + dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr)); + qp_attr.qp_state = IBV_QP_STATE; + + /* address handle. RC and UD */ + qp_attr.ah_attr.dlid = ntohs(lid); + if (gid != NULL) { + dapl_log(DAPL_DBG_TYPE_CM, "dapl_create_ah: with GID\n"); + qp_attr.ah_attr.is_global = 1; + qp_attr.ah_attr.grh.dgid.global.subnet_prefix = + ntohll(gid->global.subnet_prefix); + qp_attr.ah_attr.grh.dgid.global.interface_id = + ntohll(gid->global.interface_id); + qp_attr.ah_attr.grh.hop_limit = hca->ib_trans.hop_limit; + qp_attr.ah_attr.grh.traffic_class = hca->ib_trans.tclass; + } + qp_attr.ah_attr.sl = dapls_get_sl(hca, lid); + qp_attr.ah_attr.src_path_bits = 0; + qp_attr.ah_attr.port_num = hca->port_num; + + dapl_log(DAPL_DBG_TYPE_CM, + " dapls_create_ah: port %x lid %x pd %p ctx %p handle 0x%x\n", + hca->port_num,qp_attr.ah_attr.dlid, pd, pd->context, pd->handle); + + /* UD: create AH for remote side */ + ah = ibv_create_ah(pd, &qp_attr.ah_attr); + if (!ah) { + dapl_log(DAPL_DBG_TYPE_ERR, + " create_ah: ERR %s\n", strerror(errno)); + return NULL; + } + + dapl_log(DAPL_DBG_TYPE_CM, + " dapls_create_ah: AH %p for lid %x\n", + ah, qp_attr.ah_attr.dlid); + + return ah; +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * tab-width: 8 + * End: + */ diff --git a/branches/WOF2-3/ulp/dapl2/dapl/openib_scm/SOURCES b/branches/WOF2-3/ulp/dapl2/dapl/openib_scm/SOURCES index d4470dee..109f9afa 100644 --- a/branches/WOF2-3/ulp/dapl2/dapl/openib_scm/SOURCES +++ b/branches/WOF2-3/ulp/dapl2/dapl/openib_scm/SOURCES @@ -26,7 +26,8 @@ SOURCES = \ INCLUDES = ..\include;..\openib_common\;..\common;windows;..\..\dat\include;\ ..\..\dat\udat\windows;..\udapl\windows;..\..\..\..\inc\user\linux;\ - ..\..\..\..\inc;..\..\..\..\inc\user;..\..\..\libibverbs\include + ..\..\..\..\inc;..\..\..\..\inc\user; ..\..\..\libibverbs\include;\ + ..\..\..\librdmacm\include; DAPL_OPTS = -DEXPORT_DAPL_SYMBOLS -DDAT_EXTENSIONS -DSOCK_CM -DOPENIB -DCQ_WAIT_OBJECT diff --git a/branches/WOF2-3/ulp/dapl2/dapl/openib_scm/cm.c b/branches/WOF2-3/ulp/dapl2/dapl/openib_scm/cm.c index 24065cef..5e60b540 100644 --- a/branches/WOF2-3/ulp/dapl2/dapl/openib_scm/cm.c +++ b/branches/WOF2-3/ulp/dapl2/dapl/openib_scm/cm.c @@ -463,10 +463,8 @@ DAT_RETURN dapli_socket_disconnect(dp_ib_cm_handle_t cm_ptr) return DAT_SUCCESS; } cm_ptr->state = DCM_DISCONNECTED; - dapl_os_unlock(&cm_ptr->lock); - - /* send disc date, close socket, schedule destroy */ send(cm_ptr->socket, (char *)&disc_data, sizeof(disc_data), 0); + dapl_os_unlock(&cm_ptr->lock); /* disconnect events for RC's only */ if (cm_ptr->ep->param.ep_attr.service_type == DAT_SERVICE_TYPE_RC) { @@ -1812,7 +1810,13 @@ void cr_thread(void *arg) dapl_os_unlock(&cr->lock); dapli_socket_disconnect(cr); break; + case DCM_DISCONNECTED: + cr->state = DCM_FREE; + dapl_os_unlock(&cr->lock); + break; default: + if (ret == DAPL_FD_ERROR) + cr->state = DCM_FREE; dapl_os_unlock(&cr->lock); break; } diff --git a/branches/WOF2-3/ulp/dapl2/dapl/openib_ucm/SOURCES b/branches/WOF2-3/ulp/dapl2/dapl/openib_ucm/SOURCES index 381afa23..1f9d9ebf 100644 --- a/branches/WOF2-3/ulp/dapl2/dapl/openib_ucm/SOURCES +++ b/branches/WOF2-3/ulp/dapl2/dapl/openib_ucm/SOURCES @@ -22,7 +22,7 @@ SOURCES = udapl.rc ..\dapl_common_src.c ..\dapl_udapl_src.c ..\openib_common.c \ INCLUDES = ..\include;..\openib_common\;..\common;windows;..\..\dat\include;\ ..\..\dat\udat\windows;..\udapl\windows;\ ..\..\..\..\inc;..\..\..\..\inc\user;..\..\..\libibverbs\include;\ - ..\..\..\..\inc\user\linux; + ..\..\..\librdmacm\include;..\..\..\..\inc\user\linux; DAPL_OPTS = -DEXPORT_DAPL_SYMBOLS -DDAT_EXTENSIONS -DOPENIB -DCQ_WAIT_OBJECT diff --git a/branches/WOF2-3/ulp/dapl2/dapl/openib_ucm/cm.c b/branches/WOF2-3/ulp/dapl2/dapl/openib_ucm/cm.c index c5c125a4..ed962d57 100644 --- a/branches/WOF2-3/ulp/dapl2/dapl/openib_ucm/cm.c +++ b/branches/WOF2-3/ulp/dapl2/dapl/openib_ucm/cm.c @@ -116,8 +116,6 @@ static void ucm_disconnect_final(dp_ib_cm_handle_t cm); DAT_RETURN dapli_cm_disconnect(dp_ib_cm_handle_t cm); DAT_RETURN dapli_cm_connect(DAPL_EP *ep, dp_ib_cm_handle_t cm); -#define UCM_SND_BURST 50 - /* Service ids - port space */ static uint16_t ucm_get_port(ib_hca_transport_t *tp, uint16_t port) { @@ -242,10 +240,10 @@ static ib_cm_msg_t *ucm_get_smsg(ib_hca_transport_t *tp) int ret, polled = 0, hd = tp->s_hd; hd++; -retry: + if (hd == tp->qpe) hd = 0; - +retry: if (hd == tp->s_tl) msg = NULL; else { @@ -257,7 +255,7 @@ retry: if ((msg == NULL) && (!polled)) { struct ibv_wc wc; - /* process completions, based on UCM_SND_BURST */ + /* process completions, based on UCM_TX_BURST */ ret = ibv_poll_cq(tp->scq, 1, &wc); if (ret < 0) { dapl_log(DAPL_DBG_TYPE_WARN, @@ -546,8 +544,9 @@ retry: msg = (ib_cm_msg_t*) (uintptr_t) wc[i].wr_id; dapl_dbg_log(DAPL_DBG_TYPE_CM, - " ucm_recv: wc status=%d, ln=%d id=%p sqp=%x\n", - wc[i].status, wc[i].byte_len, + " ucm_recv: stat=%d op=%s ln=%d id=%p sqp=%x\n", + wc[i].status, dapl_cm_op_str(ntohs(msg->op)), + wc[i].byte_len, (void*)wc[i].wr_id, wc[i].src_qp); /* validate CM message, version */ @@ -583,8 +582,12 @@ static int ucm_send(ib_hca_transport_t *tp, ib_cm_msg_t *msg, DAT_PVOID p_data, /* Get message from send queue, copy data, and send */ dapl_os_lock(&tp->slock); - if ((smsg = ucm_get_smsg(tp)) == NULL) + if ((smsg = ucm_get_smsg(tp)) == NULL) { + dapl_log(DAPL_DBG_TYPE_ERR, + " ucm_send ERR: get_smsg(hd=%d,tl=%d) \n", + tp->s_hd, tp->s_tl); goto bail; + } len = (sizeof(*msg) - DCM_MAX_PDATA_SIZE); dapl_os_memcpy(smsg, msg, len); @@ -598,7 +601,7 @@ static int ucm_send(ib_hca_transport_t *tp, ib_cm_msg_t *msg, DAT_PVOID p_data, wr.num_sge = 1; wr.opcode = IBV_WR_SEND; wr.wr_id = (unsigned long)tp->s_hd; - wr.send_flags = (wr.wr_id % UCM_SND_BURST) ? 0 : IBV_SEND_SIGNALED; + wr.send_flags = (wr.wr_id % tp->burst) ? 0 : IBV_SEND_SIGNALED; if (len <= tp->max_inline_send) wr.send_flags |= IBV_SEND_INLINE; @@ -607,7 +610,7 @@ static int ucm_send(ib_hca_transport_t *tp, ib_cm_msg_t *msg, DAT_PVOID p_data, sge.addr = (uintptr_t)smsg; dapl_dbg_log(DAPL_DBG_TYPE_CM, - " ucm_send: op %s ln %d lid %x c_qpn %x rport %s\n", + " ucm_send: op %s ln %d lid %x c_qpn %x rport %x\n", dapl_cm_op_str(ntohs(smsg->op)), sge.length, htons(smsg->daddr.ib.lid), htonl(smsg->dqpn), htons(smsg->dport)); @@ -626,6 +629,12 @@ static int ucm_send(ib_hca_transport_t *tp, ib_cm_msg_t *msg, DAT_PVOID p_data, wr.wr.ud.remote_qkey = DAT_UD_QKEY; ret = ibv_post_send(tp->qp, &wr, &bad_wr); + if (ret) { + dapl_log(DAPL_DBG_TYPE_ERR, + " ucm_send ERR: post_send() %s\n", + strerror(errno) ); + } + bail: dapl_os_unlock(&tp->slock); return ret; @@ -810,7 +819,7 @@ static void ucm_disconnect_final(dp_ib_cm_handle_t cm) return; dapl_os_lock(&cm->lock); - if (cm->state == DCM_DISCONNECTED) { + if ((cm->state == DCM_DISCONNECTED) || (cm->state == DCM_FREE)) { dapl_os_unlock(&cm->lock); return; } @@ -1413,9 +1422,10 @@ static int ucm_reply(dp_ib_cm_handle_t cm) } dapl_os_get_time(&cm->timer); /* RTU expected */ dapl_os_unlock(&cm->lock); - if (ucm_send(&cm->hca->ib_trans, &cm->msg, cm->p_data, cm->p_size)) + if (ucm_send(&cm->hca->ib_trans, &cm->msg, cm->p_data, cm->p_size)) { + dapl_log(DAPL_DBG_TYPE_ERR," accept ERR: ucm reply send()\n"); return -1; - + } return 0; } diff --git a/branches/WOF2-3/ulp/dapl2/dapl/openib_ucm/dapl_ib_util.h b/branches/WOF2-3/ulp/dapl2/dapl/openib_ucm/dapl_ib_util.h index 25ce963e..920b6c45 100644 --- a/branches/WOF2-3/ulp/dapl2/dapl/openib_ucm/dapl_ib_util.h +++ b/branches/WOF2-3/ulp/dapl2/dapl/openib_ucm/dapl_ib_util.h @@ -1,136 +1,138 @@ -/* - * Copyright (c) 2009 Intel Corporation. All rights reserved. - * - * This Software is licensed under one of the following licenses: - * - * 1) under the terms of the "Common Public License 1.0" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/cpl.php. - * - * 2) under the terms of the "The BSD License" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/bsd-license.php. - * - * 3) under the terms of the "GNU General Public License (GPL) Version 2" a - * copy of which is available from the Open Source Initiative, see - * http://www.opensource.org/licenses/gpl-license.php. - * - * Licensee has the right to choose one of the above licenses. - * - * Redistributions of source code must retain the above copyright - * notice and one of the license notices. - * - * Redistributions in binary form must reproduce both the above copyright - * notice, one of the license notices in the documentation - * and/or other materials provided with the distribution. - */ - -#ifndef _DAPL_IB_UTIL_H_ -#define _DAPL_IB_UTIL_H_ -#define _OPENIB_SCM_ - -#include -#include "openib_osd.h" -#include "dapl_ib_common.h" - -/* DAPL CM objects MUST include list_entry, ref_count, event for EP linking */ -struct ib_cm_handle -{ - struct dapl_llist_entry list_entry; - struct dapl_llist_entry local_entry; - DAPL_OS_WAIT_OBJECT event; - DAPL_OS_LOCK lock; - DAPL_OS_TIMEVAL timer; - int ref_count; - int state; - int retries; - struct dapl_hca *hca; - struct dapl_sp *sp; - struct dapl_ep *ep; - struct ibv_ah *ah; - uint16_t p_size; /* accept p_data, for retries */ - uint8_t p_data[DCM_MAX_PDATA_SIZE]; - ib_cm_msg_t msg; -}; - -typedef struct ib_cm_handle *dp_ib_cm_handle_t; -typedef dp_ib_cm_handle_t ib_cm_srvc_handle_t; - -/* Definitions */ -#define IB_INVALID_HANDLE NULL - -/* ib_hca_transport_t, specific to this implementation */ -typedef struct _ib_hca_transport -{ - struct ibv_device *ib_dev; - struct dapl_hca *hca; - struct ibv_context *ib_ctx; - struct ibv_comp_channel *ib_cq; - ib_cq_handle_t ib_cq_empty; - int destroy; - int cm_state; - DAPL_OS_THREAD thread; - DAPL_OS_LOCK lock; /* connect list */ - struct dapl_llist_entry *list; - DAPL_OS_LOCK llock; /* listen list */ - struct dapl_llist_entry *llist; - ib_async_handler_t async_unafiliated; - void *async_un_ctx; - ib_async_cq_handler_t async_cq_error; - ib_async_dto_handler_t async_cq; - ib_async_qp_handler_t async_qp_error; - union dcm_addr addr; /* lid, port, qp_num, gid */ - int max_inline_send; - int rd_atom_in; - int rd_atom_out; - uint8_t ack_timer; - uint8_t ack_retry; - uint8_t rnr_timer; - uint8_t rnr_retry; - uint8_t global; - uint8_t hop_limit; - uint8_t tclass; - uint8_t mtu; - DAT_NAMED_ATTR named_attr; - struct dapl_thread_signal signal; - int cqe; - int qpe; - int retries; - int cm_timer; - int rep_time; - int rtu_time; - DAPL_OS_LOCK slock; - int s_hd; - int s_tl; - struct ibv_pd *pd; - struct ibv_cq *scq; - struct ibv_cq *rcq; - struct ibv_qp *qp; - struct ibv_mr *mr_rbuf; - struct ibv_mr *mr_sbuf; - ib_cm_msg_t *sbuf; - ib_cm_msg_t *rbuf; - struct ibv_comp_channel *rch; - struct ibv_ah **ah; - DAPL_OS_LOCK plock; - uint8_t *sid; /* Sevice IDs, port space, bitarray? */ - uint8_t sl; - uint16_t pkey; - int pkey_idx; - -} ib_hca_transport_t; - -/* prototypes */ -void cm_thread(void *arg); -void ucm_async_event(struct dapl_hca *hca); -void dapli_cq_event_cb(struct _ib_hca_transport *tp); -void dapls_cm_acquire(dp_ib_cm_handle_t cm_ptr); -void dapls_cm_release(dp_ib_cm_handle_t cm_ptr); -void dapls_cm_free(dp_ib_cm_handle_t cm_ptr); - -#ifdef DAPL_COUNTERS -void dapls_print_cm_list(IN DAPL_IA *ia_ptr); -#endif - -#endif /* _DAPL_IB_UTIL_H_ */ - +/* + * Copyright (c) 2009 Intel Corporation. All rights reserved. + * + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + */ + +#ifndef _DAPL_IB_UTIL_H_ +#define _DAPL_IB_UTIL_H_ +#define _OPENIB_SCM_ + +#include +#include "openib_osd.h" +#include "dapl_ib_common.h" + +/* DAPL CM objects MUST include list_entry, ref_count, event for EP linking */ +struct ib_cm_handle +{ + struct dapl_llist_entry list_entry; + struct dapl_llist_entry local_entry; + DAPL_OS_WAIT_OBJECT event; + DAPL_OS_LOCK lock; + DAPL_OS_TIMEVAL timer; + int ref_count; + int state; + int retries; + struct dapl_hca *hca; + struct dapl_sp *sp; + struct dapl_ep *ep; + struct ibv_ah *ah; + uint16_t p_size; /* accept p_data, for retries */ + uint8_t p_data[DCM_MAX_PDATA_SIZE]; + ib_cm_msg_t msg; +}; + +typedef struct ib_cm_handle *dp_ib_cm_handle_t; +typedef dp_ib_cm_handle_t ib_cm_srvc_handle_t; + +/* Definitions */ +#define IB_INVALID_HANDLE NULL + +/* ib_hca_transport_t, specific to this implementation */ +typedef struct _ib_hca_transport +{ + struct ibv_device *ib_dev; + struct dapl_hca *hca; + struct ibv_context *ib_ctx; + struct ibv_comp_channel *ib_cq; + ib_cq_handle_t ib_cq_empty; + int destroy; + int cm_state; + DAPL_OS_THREAD thread; + DAPL_OS_LOCK lock; /* connect list */ + struct dapl_llist_entry *list; + DAPL_OS_LOCK llock; /* listen list */ + struct dapl_llist_entry *llist; + ib_async_handler_t async_unafiliated; + void *async_un_ctx; + ib_async_cq_handler_t async_cq_error; + ib_async_dto_handler_t async_cq; + ib_async_qp_handler_t async_qp_error; + union dcm_addr addr; /* lid, port, qp_num, gid */ + int max_inline_send; + int rd_atom_in; + int rd_atom_out; + uint8_t ack_timer; + uint8_t ack_retry; + uint8_t rnr_timer; + uint8_t rnr_retry; + uint8_t global; + uint8_t hop_limit; + uint8_t tclass; + uint8_t mtu; + DAT_NAMED_ATTR named_attr; + struct dapl_thread_signal signal; + int cqe; + int qpe; + int burst; + int retries; + int cm_timer; + int rep_time; + int rtu_time; + DAPL_OS_LOCK slock; + int s_hd; + int s_tl; + struct ibv_pd *pd; + struct ibv_cq *scq; + struct ibv_cq *rcq; + struct ibv_qp *qp; + struct ibv_mr *mr_rbuf; + struct ibv_mr *mr_sbuf; + ib_cm_msg_t *sbuf; + ib_cm_msg_t *rbuf; + struct ibv_comp_channel *rch; + struct ibv_ah **ah; + DAPL_OS_LOCK plock; + uint16_t lid; + uint8_t *sid; /* Sevice IDs, port space, bitarray? */ + uint8_t sl; + uint16_t pkey; + int pkey_idx; + +} ib_hca_transport_t; + +/* prototypes */ +void cm_thread(void *arg); +void ucm_async_event(struct dapl_hca *hca); +void dapli_cq_event_cb(struct _ib_hca_transport *tp); +void dapls_cm_acquire(dp_ib_cm_handle_t cm_ptr); +void dapls_cm_release(dp_ib_cm_handle_t cm_ptr); +void dapls_cm_free(dp_ib_cm_handle_t cm_ptr); + +#ifdef DAPL_COUNTERS +void dapls_print_cm_list(IN DAPL_IA *ia_ptr); +#endif + +#endif /* _DAPL_IB_UTIL_H_ */ + diff --git a/branches/WOF2-3/ulp/dapl2/dapl/openib_ucm/device.c b/branches/WOF2-3/ulp/dapl2/dapl/openib_ucm/device.c index b7d9efd3..ff038c6a 100644 --- a/branches/WOF2-3/ulp/dapl2/dapl/openib_ucm/device.c +++ b/branches/WOF2-3/ulp/dapl2/dapl/openib_ucm/device.c @@ -1,670 +1,673 @@ -/* - * Copyright (c) 2009 Intel Corporation. All rights reserved. - * - * This Software is licensed under one of the following licenses: - * - * 1) under the terms of the "Common Public License 1.0" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/cpl.php. - * - * 2) under the terms of the "The BSD License" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/bsd-license.php. - * - * 3) under the terms of the "GNU General Public License (GPL) Version 2" a - * copy of which is available from the Open Source Initiative, see - * http://www.opensource.org/licenses/gpl-license.php. - * - * Licensee has the right to choose one of the above licenses. - * - * Redistributions of source code must retain the above copyright - * notice and one of the license notices. - * - * Redistributions in binary form must reproduce both the above copyright - * notice, one of the license notices in the documentation - * and/or other materials provided with the distribution. - */ - -#include "openib_osd.h" -#include "dapl.h" -#include "dapl_adapter_util.h" -#include "dapl_ib_util.h" -#include "dapl_osd.h" - -#include - -static void ucm_service_destroy(IN DAPL_HCA *hca); -static int ucm_service_create(IN DAPL_HCA *hca); - -#if defined (_WIN32) -#include - -static int32_t create_os_signal(IN DAPL_HCA * hca_ptr) -{ - return CompSetInit(&hca_ptr->ib_trans.signal.set); -} - -static void destroy_os_signal(IN DAPL_HCA * hca_ptr) -{ - CompSetCleanup(&hca_ptr->ib_trans.signal.set); -} - -static int dapls_config_verbs(struct ibv_context *verbs) -{ - verbs->channel.Milliseconds = 0; - return 0; -} - -static int dapls_config_comp_channel(struct ibv_comp_channel *channel) -{ - channel->comp_channel.Milliseconds = 0; - return 0; -} - -#else // _WIN32 - -static int32_t create_os_signal(IN DAPL_HCA * hca_ptr) -{ - DAPL_SOCKET listen_socket; - struct sockaddr_in addr; - socklen_t addrlen = sizeof(addr); - int ret; - - listen_socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); - if (listen_socket == DAPL_INVALID_SOCKET) - return 1; - - memset(&addr, 0, sizeof addr); - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl(0x7f000001); - ret = bind(listen_socket, (struct sockaddr *)&addr, sizeof addr); - if (ret) - goto err1; - - ret = getsockname(listen_socket, (struct sockaddr *)&addr, &addrlen); - if (ret) - goto err1; - - ret = listen(listen_socket, 0); - if (ret) - goto err1; - - hca_ptr->ib_trans.signal.scm[1] = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); - if (hca_ptr->ib_trans.signal.scm[1] == DAPL_INVALID_SOCKET) - goto err1; - - ret = connect(hca_ptr->ib_trans.signal.scm[1], - (struct sockaddr *)&addr, sizeof(addr)); - if (ret) - goto err2; - - hca_ptr->ib_trans.signal.scm[0] = accept(listen_socket, NULL, NULL); - if (hca_ptr->ib_trans.signal.scm[0] == DAPL_INVALID_SOCKET) - goto err2; - - closesocket(listen_socket); - return 0; - - err2: - closesocket(hca_ptr->ib_trans.signal.scm[1]); - err1: - closesocket(listen_socket); - return 1; -} - -static void destroy_os_signal(IN DAPL_HCA * hca_ptr) -{ - closesocket(hca_ptr->ib_trans.signal.scm[0]); - closesocket(hca_ptr->ib_trans.signal.scm[1]); -} - -static int dapls_config_fd(int fd) -{ - int opts; - - opts = fcntl(fd, F_GETFL); - if (opts < 0 || fcntl(fd, F_SETFL, opts | O_NONBLOCK) < 0) { - dapl_log(DAPL_DBG_TYPE_ERR, - " dapls_config_fd: fcntl on fd %d ERR %d %s\n", - fd, opts, strerror(errno)); - return errno; - } - - return 0; -} - -static int dapls_config_verbs(struct ibv_context *verbs) -{ - return dapls_config_fd(verbs->async_fd); -} - -static int dapls_config_comp_channel(struct ibv_comp_channel *channel) -{ - return dapls_config_fd(channel->fd); -} - -#endif - -/* - * dapls_ib_init, dapls_ib_release - * - * Initialize Verb related items for device open - * - * Input: - * none - * - * Output: - * none - * - * Returns: - * 0 success, -1 error - * - */ -int32_t dapls_ib_init(void) -{ - return 0; -} - -int32_t dapls_ib_release(void) -{ - return 0; -} - -/* - * dapls_ib_open_hca - * - * Open HCA - * - * Input: - * *hca_name pointer to provider device name - * *ib_hca_handle_p pointer to provide HCA handle - * - * Output: - * none - * - * Return: - * DAT_SUCCESS - * dapl_convert_errno - * - */ -DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, IN DAPL_HCA * hca_ptr) -{ - struct ibv_device **dev_list; - struct ibv_port_attr port_attr; - int i; - DAT_RETURN dat_status; - - /* Get list of all IB devices, find match, open */ - dev_list = ibv_get_device_list(NULL); - if (!dev_list) { - dapl_dbg_log(DAPL_DBG_TYPE_ERR, - " open_hca: ibv_get_device_list() failed\n", - hca_name); - return DAT_INTERNAL_ERROR; - } - - for (i = 0; dev_list[i]; ++i) { - hca_ptr->ib_trans.ib_dev = dev_list[i]; - if (!strcmp(ibv_get_device_name(hca_ptr->ib_trans.ib_dev), - hca_name)) - goto found; - } - - dapl_log(DAPL_DBG_TYPE_ERR, - " open_hca: device %s not found\n", hca_name); - goto err; - -found: - - hca_ptr->ib_hca_handle = ibv_open_device(hca_ptr->ib_trans.ib_dev); - if (!hca_ptr->ib_hca_handle) { - dapl_log(DAPL_DBG_TYPE_ERR, - " open_hca: dev open failed for %s, err=%s\n", - ibv_get_device_name(hca_ptr->ib_trans.ib_dev), - strerror(errno)); - goto err; - } - hca_ptr->ib_trans.ib_ctx = hca_ptr->ib_hca_handle; - dapls_config_verbs(hca_ptr->ib_hca_handle); - - /* get lid for this hca-port, network order */ - if (ibv_query_port(hca_ptr->ib_hca_handle, - (uint8_t)hca_ptr->port_num, &port_attr)) { - dapl_log(DAPL_DBG_TYPE_ERR, - " open_hca: get lid ERR for %s, err=%s\n", - ibv_get_device_name(hca_ptr->ib_trans.ib_dev), - strerror(errno)); - goto err; - } else { - hca_ptr->ib_trans.addr.ib.lid = htons(port_attr.lid); - } - - /* get gid for this hca-port, network order */ - if (ibv_query_gid(hca_ptr->ib_hca_handle, - (uint8_t) hca_ptr->port_num, 0, - (union ibv_gid *)&hca_ptr->ib_trans.addr.ib.gid)) { - dapl_log(DAPL_DBG_TYPE_ERR, - " open_hca: query GID ERR for %s, err=%s\n", - ibv_get_device_name(hca_ptr->ib_trans.ib_dev), - strerror(errno)); - goto err; - } - - /* set RC tunables via enviroment or default */ - hca_ptr->ib_trans.max_inline_send = - dapl_os_get_env_val("DAPL_MAX_INLINE", INLINE_SEND_IB_DEFAULT); - hca_ptr->ib_trans.ack_retry = - dapl_os_get_env_val("DAPL_ACK_RETRY", DCM_ACK_RETRY); - hca_ptr->ib_trans.ack_timer = - dapl_os_get_env_val("DAPL_ACK_TIMER", DCM_ACK_TIMER); - hca_ptr->ib_trans.rnr_retry = - dapl_os_get_env_val("DAPL_RNR_RETRY", DCM_RNR_RETRY); - hca_ptr->ib_trans.rnr_timer = - dapl_os_get_env_val("DAPL_RNR_TIMER", DCM_RNR_TIMER); - hca_ptr->ib_trans.global = - dapl_os_get_env_val("DAPL_GLOBAL_ROUTING", DCM_GLOBAL); - hca_ptr->ib_trans.hop_limit = - dapl_os_get_env_val("DAPL_HOP_LIMIT", DCM_HOP_LIMIT); - hca_ptr->ib_trans.tclass = - dapl_os_get_env_val("DAPL_TCLASS", DCM_TCLASS); - hca_ptr->ib_trans.mtu = - dapl_ib_mtu(dapl_os_get_env_val("DAPL_IB_MTU", DCM_IB_MTU)); - - /* initialize CM list, LISTEN, SND queue, PSP array, locks */ - if ((dapl_os_lock_init(&hca_ptr->ib_trans.lock)) != DAT_SUCCESS) - goto err; - - if ((dapl_os_lock_init(&hca_ptr->ib_trans.llock)) != DAT_SUCCESS) - goto err; - - if ((dapl_os_lock_init(&hca_ptr->ib_trans.slock)) != DAT_SUCCESS) - goto err; - - if ((dapl_os_lock_init(&hca_ptr->ib_trans.plock)) != DAT_SUCCESS) - goto err; - - /* EVD events without direct CQ channels, CNO support */ - hca_ptr->ib_trans.ib_cq = - ibv_create_comp_channel(hca_ptr->ib_hca_handle); - if (hca_ptr->ib_trans.ib_cq == NULL) { - dapl_log(DAPL_DBG_TYPE_ERR, - " open_hca: ibv_create_comp_channel ERR %s\n", - strerror(errno)); - goto bail; - } - dapls_config_comp_channel(hca_ptr->ib_trans.ib_cq); - - /* initialize CM and listen lists on this HCA uCM QP */ - dapl_llist_init_head(&hca_ptr->ib_trans.list); - dapl_llist_init_head(&hca_ptr->ib_trans.llist); - - /* create uCM qp services */ - if (ucm_service_create(hca_ptr)) - goto bail; - - if (create_os_signal(hca_ptr)) { - dapl_log(DAPL_DBG_TYPE_ERR, - " open_hca: failed to init cr pipe - %s\n", - strerror(errno)); - goto bail; - } - - /* create thread to process inbound connect request */ - hca_ptr->ib_trans.cm_state = IB_THREAD_INIT; - dat_status = dapl_os_thread_create(cm_thread, - (void *)hca_ptr, - &hca_ptr->ib_trans.thread); - if (dat_status != DAT_SUCCESS) { - dapl_log(DAPL_DBG_TYPE_ERR, - " open_hca: failed to create thread\n"); - goto bail; - } - - dapl_dbg_log(DAPL_DBG_TYPE_UTIL, - " open_hca: devname %s, ctx %p port %d, hostname_IP %s\n", - ibv_get_device_name(hca_ptr->ib_trans.ib_dev), - hca_ptr->ib_hca_handle, - hca_ptr->port_num, - inet_ntoa(((struct sockaddr_in *) - &hca_ptr->hca_address)->sin_addr)); - dapl_dbg_log(DAPL_DBG_TYPE_UTIL, - " open_hca: QPN 0x%x LID 0x%x GID Subnet 0x" F64x "" - " ID 0x" F64x "\n", - ntohl(hca_ptr->ib_trans.addr.ib.qpn), - ntohs(hca_ptr->ib_trans.addr.ib.lid), - (unsigned long long) - ntohll(*(uint64_t*)&hca_ptr->ib_trans.addr.ib.gid[0]), - (unsigned long long) - ntohll(*(uint64_t*)&hca_ptr->ib_trans.addr.ib.gid[8])); - - /* save LID, GID, QPN, PORT address information, for ia_queries */ - /* Set AF_INET6 to insure callee address storage of 28 bytes */ - hca_ptr->ib_trans.hca = hca_ptr; - hca_ptr->ib_trans.addr.ib.family = AF_INET6; - hca_ptr->ib_trans.addr.ib.qp_type = IBV_QPT_UD; - memcpy(&hca_ptr->hca_address, - &hca_ptr->ib_trans.addr, - sizeof(union dcm_addr)); - - ibv_free_device_list(dev_list); - - /* wait for cm_thread */ - while (hca_ptr->ib_trans.cm_state != IB_THREAD_RUN) - dapl_os_sleep_usec(1000); - - return dat_status; - -bail: - ucm_service_destroy(hca_ptr); - ibv_close_device(hca_ptr->ib_hca_handle); - hca_ptr->ib_hca_handle = IB_INVALID_HANDLE; - -err: - ibv_free_device_list(dev_list); - return DAT_INTERNAL_ERROR; -} - -/* - * dapls_ib_close_hca - * - * Open HCA - * - * Input: - * DAPL_HCA provide CA handle - * - * Output: - * none - * - * Return: - * DAT_SUCCESS - * dapl_convert_errno - * - */ -DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA * hca_ptr) -{ - dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " close_hca: %p\n", hca_ptr); - - if (hca_ptr->ib_trans.cm_state == IB_THREAD_RUN) { - hca_ptr->ib_trans.cm_state = IB_THREAD_CANCEL; - dapls_thread_signal(&hca_ptr->ib_trans.signal); - while (hca_ptr->ib_trans.cm_state != IB_THREAD_EXIT) { - dapl_dbg_log(DAPL_DBG_TYPE_UTIL, - " close_hca: waiting for cr_thread\n"); - dapls_thread_signal(&hca_ptr->ib_trans.signal); - dapl_os_sleep_usec(1000); - } - } - - dapl_os_lock_destroy(&hca_ptr->ib_trans.lock); - dapl_os_lock_destroy(&hca_ptr->ib_trans.llock); - destroy_os_signal(hca_ptr); - ucm_service_destroy(hca_ptr); - - if (hca_ptr->ib_trans.ib_cq) - ibv_destroy_comp_channel(hca_ptr->ib_trans.ib_cq); - - if (hca_ptr->ib_trans.ib_cq_empty) { - struct ibv_comp_channel *channel; - channel = hca_ptr->ib_trans.ib_cq_empty->channel; - ibv_destroy_cq(hca_ptr->ib_trans.ib_cq_empty); - ibv_destroy_comp_channel(channel); - } - - if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) { - if (ibv_close_device(hca_ptr->ib_hca_handle)) - return (dapl_convert_errno(errno, "ib_close_device")); - hca_ptr->ib_hca_handle = IB_INVALID_HANDLE; - } - - return (DAT_SUCCESS); -} - -/* Create uCM endpoint services, allocate remote_ah's array */ -static void ucm_service_destroy(IN DAPL_HCA *hca) -{ - ib_hca_transport_t *tp = &hca->ib_trans; - int msg_size = sizeof(ib_cm_msg_t); - - if (tp->mr_sbuf) - ibv_dereg_mr(tp->mr_sbuf); - - if (tp->mr_rbuf) - ibv_dereg_mr(tp->mr_rbuf); - - if (tp->qp) - ibv_destroy_qp(tp->qp); - - if (tp->scq) - ibv_destroy_cq(tp->scq); - - if (tp->rcq) - ibv_destroy_cq(tp->rcq); - - if (tp->rch) - ibv_destroy_comp_channel(tp->rch); - - if (tp->ah) { - int i; - - for (i = 0;i < 0xffff; i++) { - if (tp->ah[i]) - ibv_destroy_ah(tp->ah[i]); - } - dapl_os_free(tp->ah, (sizeof(*tp->ah) * 0xffff)); - } - - if (tp->pd) - ibv_dealloc_pd(tp->pd); - - if (tp->sid) - dapl_os_free(tp->sid, (sizeof(*tp->sid) * 0xffff)); - - if (tp->rbuf) - dapl_os_free(tp->rbuf, (msg_size * tp->qpe)); - - if (tp->sbuf) - dapl_os_free(tp->sbuf, (msg_size * tp->qpe)); -} - -static int ucm_service_create(IN DAPL_HCA *hca) -{ - struct ibv_qp_init_attr qp_create; - ib_hca_transport_t *tp = &hca->ib_trans; - struct ibv_recv_wr recv_wr, *recv_err; - struct ibv_sge sge; - int i, mlen = sizeof(ib_cm_msg_t); - int hlen = sizeof(struct ibv_grh); /* hdr included with UD recv */ - - dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ucm_create: \n"); - - /* setup CM timers and queue sizes */ - tp->retries = dapl_os_get_env_val("DAPL_UCM_RETRY", DCM_RETRY_CNT); - tp->rep_time = dapl_os_get_env_val("DAPL_UCM_REP_TIME", DCM_REP_TIME); - tp->rtu_time = dapl_os_get_env_val("DAPL_UCM_RTU_TIME", DCM_RTU_TIME); - tp->cm_timer = DAPL_MIN(tp->rep_time,tp->rtu_time); - tp->qpe = dapl_os_get_env_val("DAPL_UCM_QP_SIZE", DCM_QP_SIZE); - tp->cqe = dapl_os_get_env_val("DAPL_UCM_CQ_SIZE", DCM_CQ_SIZE); - tp->pd = ibv_alloc_pd(hca->ib_hca_handle); - if (!tp->pd) - goto bail; - - dapl_log(DAPL_DBG_TYPE_UTIL, - " create_service: pd %p ctx %p handle 0x%x\n", - tp->pd, tp->pd->context, tp->pd->handle); - - tp->rch = ibv_create_comp_channel(hca->ib_hca_handle); - if (!tp->rch) - goto bail; - - tp->scq = ibv_create_cq(hca->ib_hca_handle, tp->cqe, hca, NULL, 0); - if (!tp->scq) - goto bail; - - tp->rcq = ibv_create_cq(hca->ib_hca_handle, tp->cqe, hca, tp->rch, 0); - if (!tp->rcq) - goto bail; - - if(ibv_req_notify_cq(tp->rcq, 0)) - goto bail; - - dapl_os_memzero((void *)&qp_create, sizeof(qp_create)); - qp_create.qp_type = IBV_QPT_UD; - qp_create.send_cq = tp->scq; - qp_create.recv_cq = tp->rcq; - qp_create.cap.max_send_wr = qp_create.cap.max_recv_wr = tp->qpe; - qp_create.cap.max_send_sge = qp_create.cap.max_recv_sge = 1; - qp_create.cap.max_inline_data = tp->max_inline_send; - qp_create.qp_context = (void *)hca; - - tp->qp = ibv_create_qp(tp->pd, &qp_create); - if (!tp->qp) - goto bail; - - tp->ah = (ib_ah_handle_t*) dapl_os_alloc(sizeof(ib_ah_handle_t) * 0xffff); - tp->sid = (uint8_t*) dapl_os_alloc(sizeof(uint8_t) * 0xffff); - tp->rbuf = (void*) dapl_os_alloc((mlen + hlen) * tp->qpe); - tp->sbuf = (void*) dapl_os_alloc(mlen * tp->qpe); - - if (!tp->ah || !tp->rbuf || !tp->sbuf || !tp->sid) - goto bail; - - (void)dapl_os_memzero(tp->ah, (sizeof(ib_ah_handle_t) * 0xffff)); - (void)dapl_os_memzero(tp->sid, (sizeof(uint8_t) * 0xffff)); - tp->sid[0] = 1; /* resv slot 0, 0 == no ports available */ - (void)dapl_os_memzero(tp->rbuf, ((mlen + hlen) * tp->qpe)); - (void)dapl_os_memzero(tp->sbuf, (mlen * tp->qpe)); - - tp->mr_sbuf = ibv_reg_mr(tp->pd, tp->sbuf, - (mlen * tp->qpe), - IBV_ACCESS_LOCAL_WRITE); - if (!tp->mr_sbuf) - goto bail; - - tp->mr_rbuf = ibv_reg_mr(tp->pd, tp->rbuf, - ((mlen + hlen) * tp->qpe), - IBV_ACCESS_LOCAL_WRITE); - if (!tp->mr_rbuf) - goto bail; - - /* modify UD QP: init, rtr, rts */ - if ((dapls_modify_qp_ud(hca, tp->qp)) != DAT_SUCCESS) - goto bail; - - /* post receive buffers, setup head, tail pointers */ - recv_wr.next = NULL; - recv_wr.sg_list = &sge; - recv_wr.num_sge = 1; - sge.length = mlen + hlen; - sge.lkey = tp->mr_rbuf->lkey; - - for (i = 0; i < tp->qpe; i++) { - recv_wr.wr_id = - (uintptr_t)((char *)&tp->rbuf[i] + - sizeof(struct ibv_grh)); - sge.addr = (uintptr_t) &tp->rbuf[i]; - if (ibv_post_recv(tp->qp, &recv_wr, &recv_err)) - goto bail; - } - - /* save qp_num as part of ia_address, network order */ - tp->addr.ib.qpn = htonl(tp->qp->qp_num); - return 0; -bail: - dapl_log(DAPL_DBG_TYPE_ERR, - " ucm_create_services: ERR %s\n", strerror(errno)); - ucm_service_destroy(hca); - return -1; -} - -void ucm_async_event(struct dapl_hca *hca) -{ - struct ibv_async_event event; - struct _ib_hca_transport *tp = &hca->ib_trans; - - if (!ibv_get_async_event(hca->ib_hca_handle, &event)) { - - switch (event.event_type) { - case IBV_EVENT_CQ_ERR: - { - struct dapl_ep *evd_ptr = - event.element.cq->cq_context; - - dapl_log(DAPL_DBG_TYPE_ERR, - "dapl async_event CQ (%p) ERR %d\n", - evd_ptr, event.event_type); - - /* report up if async callback still setup */ - if (tp->async_cq_error) - tp->async_cq_error(hca->ib_hca_handle, - event.element.cq, - &event, (void *)evd_ptr); - break; - } - case IBV_EVENT_COMM_EST: - { - /* Received msgs on connected QP before RTU */ - dapl_log(DAPL_DBG_TYPE_UTIL, - " async_event COMM_EST(%p) rdata beat RTU\n", - event.element.qp); - - break; - } - case IBV_EVENT_QP_FATAL: - case IBV_EVENT_QP_REQ_ERR: - case IBV_EVENT_QP_ACCESS_ERR: - case IBV_EVENT_QP_LAST_WQE_REACHED: - case IBV_EVENT_SRQ_ERR: - case IBV_EVENT_SRQ_LIMIT_REACHED: - case IBV_EVENT_SQ_DRAINED: - { - struct dapl_ep *ep_ptr = - event.element.qp->qp_context; - - dapl_log(DAPL_DBG_TYPE_ERR, - "dapl async_event QP (%p) ERR %d\n", - ep_ptr, event.event_type); - - /* report up if async callback still setup */ - if (tp->async_qp_error) - tp->async_qp_error(hca->ib_hca_handle, - ep_ptr->qp_handle, - &event, (void *)ep_ptr); - break; - } - case IBV_EVENT_PATH_MIG: - case IBV_EVENT_PATH_MIG_ERR: - case IBV_EVENT_DEVICE_FATAL: - case IBV_EVENT_PORT_ACTIVE: - case IBV_EVENT_PORT_ERR: - case IBV_EVENT_LID_CHANGE: - case IBV_EVENT_PKEY_CHANGE: - case IBV_EVENT_SM_CHANGE: - { - dapl_log(DAPL_DBG_TYPE_WARN, - "dapl async_event: DEV ERR %d\n", - event.event_type); - - /* report up if async callback still setup */ - if (tp->async_unafiliated) - tp->async_unafiliated(hca->ib_hca_handle, - &event, - tp->async_un_ctx); - break; - } - case IBV_EVENT_CLIENT_REREGISTER: - /* no need to report this event this time */ - dapl_log(DAPL_DBG_TYPE_UTIL, - " async_event: IBV_CLIENT_REREGISTER\n"); - break; - - default: - dapl_log(DAPL_DBG_TYPE_WARN, - "dapl async_event: %d UNKNOWN\n", - event.event_type); - break; - - } - ibv_ack_async_event(&event); - } -} - +/* + * Copyright (c) 2009 Intel Corporation. All rights reserved. + * + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + */ + +#include "openib_osd.h" +#include "dapl.h" +#include "dapl_adapter_util.h" +#include "dapl_ib_util.h" +#include "dapl_osd.h" + +#include + +static void ucm_service_destroy(IN DAPL_HCA *hca); +static int ucm_service_create(IN DAPL_HCA *hca); + +#if defined (_WIN32) +#include + +static int32_t create_os_signal(IN DAPL_HCA * hca_ptr) +{ + return CompSetInit(&hca_ptr->ib_trans.signal.set); +} + +static void destroy_os_signal(IN DAPL_HCA * hca_ptr) +{ + CompSetCleanup(&hca_ptr->ib_trans.signal.set); +} + +static int dapls_config_verbs(struct ibv_context *verbs) +{ + verbs->channel.Milliseconds = 0; + return 0; +} + +static int dapls_config_comp_channel(struct ibv_comp_channel *channel) +{ + channel->comp_channel.Milliseconds = 0; + return 0; +} + +#else // _WIN32 + +static int32_t create_os_signal(IN DAPL_HCA * hca_ptr) +{ + DAPL_SOCKET listen_socket; + struct sockaddr_in addr; + socklen_t addrlen = sizeof(addr); + int ret; + + listen_socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (listen_socket == DAPL_INVALID_SOCKET) + return 1; + + memset(&addr, 0, sizeof addr); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(0x7f000001); + ret = bind(listen_socket, (struct sockaddr *)&addr, sizeof addr); + if (ret) + goto err1; + + ret = getsockname(listen_socket, (struct sockaddr *)&addr, &addrlen); + if (ret) + goto err1; + + ret = listen(listen_socket, 0); + if (ret) + goto err1; + + hca_ptr->ib_trans.signal.scm[1] = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (hca_ptr->ib_trans.signal.scm[1] == DAPL_INVALID_SOCKET) + goto err1; + + ret = connect(hca_ptr->ib_trans.signal.scm[1], + (struct sockaddr *)&addr, sizeof(addr)); + if (ret) + goto err2; + + hca_ptr->ib_trans.signal.scm[0] = accept(listen_socket, NULL, NULL); + if (hca_ptr->ib_trans.signal.scm[0] == DAPL_INVALID_SOCKET) + goto err2; + + closesocket(listen_socket); + return 0; + + err2: + closesocket(hca_ptr->ib_trans.signal.scm[1]); + err1: + closesocket(listen_socket); + return 1; +} + +static void destroy_os_signal(IN DAPL_HCA * hca_ptr) +{ + closesocket(hca_ptr->ib_trans.signal.scm[0]); + closesocket(hca_ptr->ib_trans.signal.scm[1]); +} + +static int dapls_config_fd(int fd) +{ + int opts; + + opts = fcntl(fd, F_GETFL); + if (opts < 0 || fcntl(fd, F_SETFL, opts | O_NONBLOCK) < 0) { + dapl_log(DAPL_DBG_TYPE_ERR, + " dapls_config_fd: fcntl on fd %d ERR %d %s\n", + fd, opts, strerror(errno)); + return errno; + } + + return 0; +} + +static int dapls_config_verbs(struct ibv_context *verbs) +{ + return dapls_config_fd(verbs->async_fd); +} + +static int dapls_config_comp_channel(struct ibv_comp_channel *channel) +{ + return dapls_config_fd(channel->fd); +} + +#endif + +/* + * dapls_ib_init, dapls_ib_release + * + * Initialize Verb related items for device open + * + * Input: + * none + * + * Output: + * none + * + * Returns: + * 0 success, -1 error + * + */ +int32_t dapls_ib_init(void) +{ + return 0; +} + +int32_t dapls_ib_release(void) +{ + return 0; +} + +/* + * dapls_ib_open_hca + * + * Open HCA + * + * Input: + * *hca_name pointer to provider device name + * *ib_hca_handle_p pointer to provide HCA handle + * + * Output: + * none + * + * Return: + * DAT_SUCCESS + * dapl_convert_errno + * + */ +DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, IN DAPL_HCA * hca_ptr) +{ + struct ibv_device **dev_list; + struct ibv_port_attr port_attr; + int i; + DAT_RETURN dat_status; + + /* Get list of all IB devices, find match, open */ + dev_list = ibv_get_device_list(NULL); + if (!dev_list) { + dapl_dbg_log(DAPL_DBG_TYPE_ERR, + " open_hca: ibv_get_device_list() failed\n", + hca_name); + return DAT_INTERNAL_ERROR; + } + + for (i = 0; dev_list[i]; ++i) { + hca_ptr->ib_trans.ib_dev = dev_list[i]; + if (!strcmp(ibv_get_device_name(hca_ptr->ib_trans.ib_dev), + hca_name)) + goto found; + } + + dapl_log(DAPL_DBG_TYPE_ERR, + " open_hca: device %s not found\n", hca_name); + goto err; + +found: + + hca_ptr->ib_hca_handle = ibv_open_device(hca_ptr->ib_trans.ib_dev); + if (!hca_ptr->ib_hca_handle) { + dapl_log(DAPL_DBG_TYPE_ERR, + " open_hca: dev open failed for %s, err=%s\n", + ibv_get_device_name(hca_ptr->ib_trans.ib_dev), + strerror(errno)); + goto err; + } + hca_ptr->ib_trans.ib_ctx = hca_ptr->ib_hca_handle; + dapls_config_verbs(hca_ptr->ib_hca_handle); + + /* get lid for this hca-port, network order */ + if (ibv_query_port(hca_ptr->ib_hca_handle, + (uint8_t)hca_ptr->port_num, &port_attr)) { + dapl_log(DAPL_DBG_TYPE_ERR, + " open_hca: get lid ERR for %s, err=%s\n", + ibv_get_device_name(hca_ptr->ib_trans.ib_dev), + strerror(errno)); + goto err; + } else { + hca_ptr->ib_trans.addr.ib.lid = htons(port_attr.lid); + hca_ptr->ib_trans.lid = htons(port_attr.lid); + } + + /* get gid for this hca-port, network order */ + if (ibv_query_gid(hca_ptr->ib_hca_handle, + (uint8_t) hca_ptr->port_num, 0, + (union ibv_gid *)&hca_ptr->ib_trans.addr.ib.gid)) { + dapl_log(DAPL_DBG_TYPE_ERR, + " open_hca: query GID ERR for %s, err=%s\n", + ibv_get_device_name(hca_ptr->ib_trans.ib_dev), + strerror(errno)); + goto err; + } + + /* set RC tunables via enviroment or default */ + hca_ptr->ib_trans.max_inline_send = + dapl_os_get_env_val("DAPL_MAX_INLINE", INLINE_SEND_IB_DEFAULT); + hca_ptr->ib_trans.ack_retry = + dapl_os_get_env_val("DAPL_ACK_RETRY", DCM_ACK_RETRY); + hca_ptr->ib_trans.ack_timer = + dapl_os_get_env_val("DAPL_ACK_TIMER", DCM_ACK_TIMER); + hca_ptr->ib_trans.rnr_retry = + dapl_os_get_env_val("DAPL_RNR_RETRY", DCM_RNR_RETRY); + hca_ptr->ib_trans.rnr_timer = + dapl_os_get_env_val("DAPL_RNR_TIMER", DCM_RNR_TIMER); + hca_ptr->ib_trans.global = + dapl_os_get_env_val("DAPL_GLOBAL_ROUTING", DCM_GLOBAL); + hca_ptr->ib_trans.hop_limit = + dapl_os_get_env_val("DAPL_HOP_LIMIT", DCM_HOP_LIMIT); + hca_ptr->ib_trans.tclass = + dapl_os_get_env_val("DAPL_TCLASS", DCM_TCLASS); + hca_ptr->ib_trans.mtu = + dapl_ib_mtu(dapl_os_get_env_val("DAPL_IB_MTU", DCM_IB_MTU)); + + /* initialize CM list, LISTEN, SND queue, PSP array, locks */ + if ((dapl_os_lock_init(&hca_ptr->ib_trans.lock)) != DAT_SUCCESS) + goto err; + + if ((dapl_os_lock_init(&hca_ptr->ib_trans.llock)) != DAT_SUCCESS) + goto err; + + if ((dapl_os_lock_init(&hca_ptr->ib_trans.slock)) != DAT_SUCCESS) + goto err; + + if ((dapl_os_lock_init(&hca_ptr->ib_trans.plock)) != DAT_SUCCESS) + goto err; + + /* EVD events without direct CQ channels, CNO support */ + hca_ptr->ib_trans.ib_cq = + ibv_create_comp_channel(hca_ptr->ib_hca_handle); + if (hca_ptr->ib_trans.ib_cq == NULL) { + dapl_log(DAPL_DBG_TYPE_ERR, + " open_hca: ibv_create_comp_channel ERR %s\n", + strerror(errno)); + goto bail; + } + dapls_config_comp_channel(hca_ptr->ib_trans.ib_cq); + + /* initialize CM and listen lists on this HCA uCM QP */ + dapl_llist_init_head(&hca_ptr->ib_trans.list); + dapl_llist_init_head(&hca_ptr->ib_trans.llist); + + /* create uCM qp services */ + if (ucm_service_create(hca_ptr)) + goto bail; + + if (create_os_signal(hca_ptr)) { + dapl_log(DAPL_DBG_TYPE_ERR, + " open_hca: failed to init cr pipe - %s\n", + strerror(errno)); + goto bail; + } + + /* create thread to process inbound connect request */ + hca_ptr->ib_trans.cm_state = IB_THREAD_INIT; + dat_status = dapl_os_thread_create(cm_thread, + (void *)hca_ptr, + &hca_ptr->ib_trans.thread); + if (dat_status != DAT_SUCCESS) { + dapl_log(DAPL_DBG_TYPE_ERR, + " open_hca: failed to create thread\n"); + goto bail; + } + + dapl_dbg_log(DAPL_DBG_TYPE_UTIL, + " open_hca: devname %s, ctx %p port %d, hostname_IP %s\n", + ibv_get_device_name(hca_ptr->ib_trans.ib_dev), + hca_ptr->ib_hca_handle, + hca_ptr->port_num, + inet_ntoa(((struct sockaddr_in *) + &hca_ptr->hca_address)->sin_addr)); + dapl_dbg_log(DAPL_DBG_TYPE_UTIL, + " open_hca: QPN 0x%x LID 0x%x GID Subnet 0x" F64x "" + " ID 0x" F64x "\n", + ntohl(hca_ptr->ib_trans.addr.ib.qpn), + ntohs(hca_ptr->ib_trans.addr.ib.lid), + (unsigned long long) + ntohll(*(uint64_t*)&hca_ptr->ib_trans.addr.ib.gid[0]), + (unsigned long long) + ntohll(*(uint64_t*)&hca_ptr->ib_trans.addr.ib.gid[8])); + + /* save LID, GID, QPN, PORT address information, for ia_queries */ + /* Set AF_INET6 to insure callee address storage of 28 bytes */ + hca_ptr->ib_trans.hca = hca_ptr; + hca_ptr->ib_trans.addr.ib.family = AF_INET6; + hca_ptr->ib_trans.addr.ib.qp_type = IBV_QPT_UD; + memcpy(&hca_ptr->hca_address, + &hca_ptr->ib_trans.addr, + sizeof(union dcm_addr)); + + ibv_free_device_list(dev_list); + + /* wait for cm_thread */ + while (hca_ptr->ib_trans.cm_state != IB_THREAD_RUN) + dapl_os_sleep_usec(1000); + + return dat_status; + +bail: + ucm_service_destroy(hca_ptr); + ibv_close_device(hca_ptr->ib_hca_handle); + hca_ptr->ib_hca_handle = IB_INVALID_HANDLE; + +err: + ibv_free_device_list(dev_list); + return DAT_INTERNAL_ERROR; +} + +/* + * dapls_ib_close_hca + * + * Open HCA + * + * Input: + * DAPL_HCA provide CA handle + * + * Output: + * none + * + * Return: + * DAT_SUCCESS + * dapl_convert_errno + * + */ +DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA * hca_ptr) +{ + dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " close_hca: %p\n", hca_ptr); + + if (hca_ptr->ib_trans.cm_state == IB_THREAD_RUN) { + hca_ptr->ib_trans.cm_state = IB_THREAD_CANCEL; + dapls_thread_signal(&hca_ptr->ib_trans.signal); + while (hca_ptr->ib_trans.cm_state != IB_THREAD_EXIT) { + dapl_dbg_log(DAPL_DBG_TYPE_UTIL, + " close_hca: waiting for cr_thread\n"); + dapls_thread_signal(&hca_ptr->ib_trans.signal); + dapl_os_sleep_usec(1000); + } + } + + dapl_os_lock_destroy(&hca_ptr->ib_trans.lock); + dapl_os_lock_destroy(&hca_ptr->ib_trans.llock); + destroy_os_signal(hca_ptr); + ucm_service_destroy(hca_ptr); + + if (hca_ptr->ib_trans.ib_cq) + ibv_destroy_comp_channel(hca_ptr->ib_trans.ib_cq); + + if (hca_ptr->ib_trans.ib_cq_empty) { + struct ibv_comp_channel *channel; + channel = hca_ptr->ib_trans.ib_cq_empty->channel; + ibv_destroy_cq(hca_ptr->ib_trans.ib_cq_empty); + ibv_destroy_comp_channel(channel); + } + + if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) { + if (ibv_close_device(hca_ptr->ib_hca_handle)) + return (dapl_convert_errno(errno, "ib_close_device")); + hca_ptr->ib_hca_handle = IB_INVALID_HANDLE; + } + + return (DAT_SUCCESS); +} + +/* Create uCM endpoint services, allocate remote_ah's array */ +static void ucm_service_destroy(IN DAPL_HCA *hca) +{ + ib_hca_transport_t *tp = &hca->ib_trans; + int msg_size = sizeof(ib_cm_msg_t); + + if (tp->mr_sbuf) + ibv_dereg_mr(tp->mr_sbuf); + + if (tp->mr_rbuf) + ibv_dereg_mr(tp->mr_rbuf); + + if (tp->qp) + ibv_destroy_qp(tp->qp); + + if (tp->scq) + ibv_destroy_cq(tp->scq); + + if (tp->rcq) + ibv_destroy_cq(tp->rcq); + + if (tp->rch) + ibv_destroy_comp_channel(tp->rch); + + if (tp->ah) { + int i; + + for (i = 0;i < 0xffff; i++) { + if (tp->ah[i]) + ibv_destroy_ah(tp->ah[i]); + } + dapl_os_free(tp->ah, (sizeof(*tp->ah) * 0xffff)); + } + + if (tp->pd) + ibv_dealloc_pd(tp->pd); + + if (tp->sid) + dapl_os_free(tp->sid, (sizeof(*tp->sid) * 0xffff)); + + if (tp->rbuf) + dapl_os_free(tp->rbuf, (msg_size * tp->qpe)); + + if (tp->sbuf) + dapl_os_free(tp->sbuf, (msg_size * tp->qpe)); +} + +static int ucm_service_create(IN DAPL_HCA *hca) +{ + struct ibv_qp_init_attr qp_create; + ib_hca_transport_t *tp = &hca->ib_trans; + struct ibv_recv_wr recv_wr, *recv_err; + struct ibv_sge sge; + int i, mlen = sizeof(ib_cm_msg_t); + int hlen = sizeof(struct ibv_grh); /* hdr included with UD recv */ + + dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ucm_create: \n"); + + /* setup CM timers and queue sizes */ + tp->retries = dapl_os_get_env_val("DAPL_UCM_RETRY", DCM_RETRY_CNT); + tp->rep_time = dapl_os_get_env_val("DAPL_UCM_REP_TIME", DCM_REP_TIME); + tp->rtu_time = dapl_os_get_env_val("DAPL_UCM_RTU_TIME", DCM_RTU_TIME); + tp->cm_timer = DAPL_MIN(tp->rep_time,tp->rtu_time); + tp->qpe = dapl_os_get_env_val("DAPL_UCM_QP_SIZE", DCM_QP_SIZE); + tp->cqe = dapl_os_get_env_val("DAPL_UCM_CQ_SIZE", DCM_CQ_SIZE); + tp->burst = dapl_os_get_env_val("DAPL_UCM_TX_BURST", DCM_TX_BURST); + tp->pd = ibv_alloc_pd(hca->ib_hca_handle); + if (!tp->pd) + goto bail; + + dapl_log(DAPL_DBG_TYPE_UTIL, + " create_service: pd %p ctx %p handle 0x%x\n", + tp->pd, tp->pd->context, tp->pd->handle); + + tp->rch = ibv_create_comp_channel(hca->ib_hca_handle); + if (!tp->rch) + goto bail; + + tp->scq = ibv_create_cq(hca->ib_hca_handle, tp->cqe, hca, NULL, 0); + if (!tp->scq) + goto bail; + + tp->rcq = ibv_create_cq(hca->ib_hca_handle, tp->cqe, hca, tp->rch, 0); + if (!tp->rcq) + goto bail; + + if(ibv_req_notify_cq(tp->rcq, 0)) + goto bail; + + dapl_os_memzero((void *)&qp_create, sizeof(qp_create)); + qp_create.qp_type = IBV_QPT_UD; + qp_create.send_cq = tp->scq; + qp_create.recv_cq = tp->rcq; + qp_create.cap.max_send_wr = qp_create.cap.max_recv_wr = tp->qpe; + qp_create.cap.max_send_sge = qp_create.cap.max_recv_sge = 1; + qp_create.cap.max_inline_data = tp->max_inline_send; + qp_create.qp_context = (void *)hca; + + tp->qp = ibv_create_qp(tp->pd, &qp_create); + if (!tp->qp) + goto bail; + + tp->ah = (ib_ah_handle_t*) dapl_os_alloc(sizeof(ib_ah_handle_t) * 0xffff); + tp->sid = (uint8_t*) dapl_os_alloc(sizeof(uint8_t) * 0xffff); + tp->rbuf = (void*) dapl_os_alloc((mlen + hlen) * tp->qpe); + tp->sbuf = (void*) dapl_os_alloc(mlen * tp->qpe); + tp->s_hd = tp->s_tl = 0; + + if (!tp->ah || !tp->rbuf || !tp->sbuf || !tp->sid) + goto bail; + + (void)dapl_os_memzero(tp->ah, (sizeof(ib_ah_handle_t) * 0xffff)); + (void)dapl_os_memzero(tp->sid, (sizeof(uint8_t) * 0xffff)); + tp->sid[0] = 1; /* resv slot 0, 0 == no ports available */ + (void)dapl_os_memzero(tp->rbuf, ((mlen + hlen) * tp->qpe)); + (void)dapl_os_memzero(tp->sbuf, (mlen * tp->qpe)); + + tp->mr_sbuf = ibv_reg_mr(tp->pd, tp->sbuf, + (mlen * tp->qpe), + IBV_ACCESS_LOCAL_WRITE); + if (!tp->mr_sbuf) + goto bail; + + tp->mr_rbuf = ibv_reg_mr(tp->pd, tp->rbuf, + ((mlen + hlen) * tp->qpe), + IBV_ACCESS_LOCAL_WRITE); + if (!tp->mr_rbuf) + goto bail; + + /* modify UD QP: init, rtr, rts */ + if ((dapls_modify_qp_ud(hca, tp->qp)) != DAT_SUCCESS) + goto bail; + + /* post receive buffers, setup head, tail pointers */ + recv_wr.next = NULL; + recv_wr.sg_list = &sge; + recv_wr.num_sge = 1; + sge.length = mlen + hlen; + sge.lkey = tp->mr_rbuf->lkey; + + for (i = 0; i < tp->qpe; i++) { + recv_wr.wr_id = + (uintptr_t)((char *)&tp->rbuf[i] + + sizeof(struct ibv_grh)); + sge.addr = (uintptr_t) &tp->rbuf[i]; + if (ibv_post_recv(tp->qp, &recv_wr, &recv_err)) + goto bail; + } + + /* save qp_num as part of ia_address, network order */ + tp->addr.ib.qpn = htonl(tp->qp->qp_num); + return 0; +bail: + dapl_log(DAPL_DBG_TYPE_ERR, + " ucm_create_services: ERR %s\n", strerror(errno)); + ucm_service_destroy(hca); + return -1; +} + +void ucm_async_event(struct dapl_hca *hca) +{ + struct ibv_async_event event; + struct _ib_hca_transport *tp = &hca->ib_trans; + + if (!ibv_get_async_event(hca->ib_hca_handle, &event)) { + + switch (event.event_type) { + case IBV_EVENT_CQ_ERR: + { + struct dapl_ep *evd_ptr = + event.element.cq->cq_context; + + dapl_log(DAPL_DBG_TYPE_ERR, + "dapl async_event CQ (%p) ERR %d\n", + evd_ptr, event.event_type); + + /* report up if async callback still setup */ + if (tp->async_cq_error) + tp->async_cq_error(hca->ib_hca_handle, + event.element.cq, + &event, (void *)evd_ptr); + break; + } + case IBV_EVENT_COMM_EST: + { + /* Received msgs on connected QP before RTU */ + dapl_log(DAPL_DBG_TYPE_UTIL, + " async_event COMM_EST(%p) rdata beat RTU\n", + event.element.qp); + + break; + } + case IBV_EVENT_QP_FATAL: + case IBV_EVENT_QP_REQ_ERR: + case IBV_EVENT_QP_ACCESS_ERR: + case IBV_EVENT_QP_LAST_WQE_REACHED: + case IBV_EVENT_SRQ_ERR: + case IBV_EVENT_SRQ_LIMIT_REACHED: + case IBV_EVENT_SQ_DRAINED: + { + struct dapl_ep *ep_ptr = + event.element.qp->qp_context; + + dapl_log(DAPL_DBG_TYPE_ERR, + "dapl async_event QP (%p) ERR %d\n", + ep_ptr, event.event_type); + + /* report up if async callback still setup */ + if (tp->async_qp_error) + tp->async_qp_error(hca->ib_hca_handle, + ep_ptr->qp_handle, + &event, (void *)ep_ptr); + break; + } + case IBV_EVENT_PATH_MIG: + case IBV_EVENT_PATH_MIG_ERR: + case IBV_EVENT_DEVICE_FATAL: + case IBV_EVENT_PORT_ACTIVE: + case IBV_EVENT_PORT_ERR: + case IBV_EVENT_LID_CHANGE: + case IBV_EVENT_PKEY_CHANGE: + case IBV_EVENT_SM_CHANGE: + { + dapl_log(DAPL_DBG_TYPE_WARN, + "dapl async_event: DEV ERR %d\n", + event.event_type); + + /* report up if async callback still setup */ + if (tp->async_unafiliated) + tp->async_unafiliated(hca->ib_hca_handle, + &event, + tp->async_un_ctx); + break; + } + case IBV_EVENT_CLIENT_REREGISTER: + /* no need to report this event this time */ + dapl_log(DAPL_DBG_TYPE_UTIL, + " async_event: IBV_CLIENT_REREGISTER\n"); + break; + + default: + dapl_log(DAPL_DBG_TYPE_WARN, + "dapl async_event: %d UNKNOWN\n", + event.event_type); + break; + + } + ibv_ack_async_event(&event); + } +} + diff --git a/trunk/ulp/dapl2/Makefile.am b/trunk/ulp/dapl2/Makefile.am index 0be62980..553fea0f 100644 --- a/trunk/ulp/dapl2/Makefile.am +++ b/trunk/ulp/dapl2/Makefile.am @@ -1,581 +1,581 @@ -# $Id: $ - -OSFLAGS = -DOS_RELEASE=$(shell expr `uname -r | cut -f1 -d.` \* 65536 + `uname -r | cut -f2 -d.`) -# Check for RedHat, needed for ia64 udapl atomic operations (IA64_FETCHADD syntax) -# and built-in atomics for RedHat EL5 -if OS_RHEL4 -OSFLAGS += -DREDHAT_EL4 -endif - -if OS_RHEL5 -OSFLAGS += -DREDHAT_EL5 -endif - -if OS_SUSE11 -OSFLAGS += -DSUSE_11 -endif - -if EXT_TYPE_IB -XFLAGS = -DDAT_EXTENSIONS -XPROGRAMS = dapl/openib_common/ib_extensions.c -else -XFLAGS = -XPROGRAMS = -endif - -if DEFINE_ATTR_LINK_LAYER -XFLAGS += -DDEFINE_ATTR_LINK_LAYER -endif - -if DEBUG -AM_CFLAGS = -g -Wall -D_GNU_SOURCE -DDAPL_DBG -DDAT_CONF="\"$(sysconfdir)/dat.conf\"" -else -AM_CFLAGS = -g -Wall -D_GNU_SOURCE -DDAT_CONF="\"$(sysconfdir)/dat.conf\"" -endif - -datlibdir = $(libdir) -dapllibofadir = $(libdir) -daplliboscmdir = $(libdir) -daplliboucmdir = $(libdir) - -datlib_LTLIBRARIES = dat/udat/libdat2.la -dapllibofa_LTLIBRARIES = dapl/udapl/libdaplofa.la -daplliboscm_LTLIBRARIES = dapl/udapl/libdaploscm.la -daplliboucm_LTLIBRARIES = dapl/udapl/libdaploucm.la - -dat_udat_libdat2_la_CFLAGS = $(AM_CFLAGS) -D_GNU_SOURCE $(OSFLAGS) $(XFLAGS) \ - -I$(srcdir)/dat/include/ -I$(srcdir)/dat/udat/ \ - -I$(srcdir)/dat/udat/linux -I$(srcdir)/dat/common/ - -dapl_udapl_libdaplofa_la_CFLAGS = $(AM_CFLAGS) -D_GNU_SOURCE $(OSFLAGS) $(XFLAGS) \ - -DOPENIB -DCQ_WAIT_OBJECT \ - -I$(srcdir)/dat/include/ -I$(srcdir)/dapl/include/ \ - -I$(srcdir)/dapl/common -I$(srcdir)/dapl/udapl/linux \ - -I$(srcdir)/dapl/openib_common \ - -I$(srcdir)/dapl/openib_cma \ - -I$(srcdir)/dapl/openib_cma/linux - -dapl_udapl_libdaploscm_la_CFLAGS = $(AM_CFLAGS) -D_GNU_SOURCE $(OSFLAGS) $(XFLAGS) \ - -DOPENIB -DCQ_WAIT_OBJECT \ - -I$(srcdir)/dat/include/ -I$(srcdir)/dapl/include/ \ - -I$(srcdir)/dapl/common -I$(srcdir)/dapl/udapl/linux \ - -I$(srcdir)/dapl/openib_common \ - -I$(srcdir)/dapl/openib_scm \ - -I$(srcdir)/dapl/openib_scm/linux - -dapl_udapl_libdaploucm_la_CFLAGS = $(AM_CFLAGS) -D_GNU_SOURCE $(OSFLAGS) $(XFLAGS) \ - -DOPENIB -DCQ_WAIT_OBJECT \ - -I$(srcdir)/dat/include/ -I$(srcdir)/dapl/include/ \ - -I$(srcdir)/dapl/common -I$(srcdir)/dapl/udapl/linux \ - -I$(srcdir)/dapl/openib_common \ - -I$(srcdir)/dapl/openib_ucm \ - -I$(srcdir)/dapl/openib_ucm/linux - -if HAVE_LD_VERSION_SCRIPT - dat_version_script = -Wl,--version-script=$(srcdir)/dat/udat/libdat2.map - daplofa_version_script = -Wl,--version-script=$(srcdir)/dapl/udapl/libdaplofa.map - daploscm_version_script = -Wl,--version-script=$(srcdir)/dapl/udapl/libdaploscm.map - daploucm_version_script = -Wl,--version-script=$(srcdir)/dapl/udapl/libdaploucm.map -else - dat_version_script = - daplofa_version_script = - daploscm_version_script = - daploucm_version_script = -endif - -# -# uDAT: libdat2.so -# -dat_udat_libdat2_la_SOURCES = dat/udat/udat.c \ - dat/udat/udat_api.c \ - dat/udat/udat_sr_parser.c \ - dat/udat/linux/dat_osd.c \ - dat/common/dat_api.c \ - dat/common/dat_dictionary.c \ - dat/common/dat_strerror.c \ - dat/common/dat_init.c \ - dat/common/dat_dr.c \ - dat/common/dat_sr.c -dat_udat_libdat2_la_LDFLAGS = -version-info 2:0:0 $(dat_version_script) -ldl - -# -# uDAPL OpenFabrics rdma_cm version: libdaplofa.so -# -dapl_udapl_libdaplofa_la_SOURCES = dapl/udapl/dapl_init.c \ - dapl/udapl/dapl_evd_create.c \ - dapl/udapl/dapl_evd_query.c \ - dapl/udapl/dapl_cno_create.c \ - dapl/udapl/dapl_cno_modify_agent.c \ - dapl/udapl/dapl_cno_free.c \ - dapl/udapl/dapl_cno_wait.c \ - dapl/udapl/dapl_cno_query.c \ - dapl/udapl/dapl_lmr_create.c \ - dapl/udapl/dapl_evd_wait.c \ - dapl/udapl/dapl_evd_disable.c \ - dapl/udapl/dapl_evd_enable.c \ - dapl/udapl/dapl_evd_modify_cno.c \ - dapl/udapl/dapl_evd_set_unwaitable.c \ - dapl/udapl/dapl_evd_clear_unwaitable.c \ - dapl/udapl/linux/dapl_osd.c \ - dapl/common/dapl_cookie.c \ - dapl/common/dapl_cr_accept.c \ - dapl/common/dapl_cr_query.c \ - dapl/common/dapl_cr_reject.c \ - dapl/common/dapl_cr_util.c \ - dapl/common/dapl_cr_callback.c \ - dapl/common/dapl_cr_handoff.c \ - dapl/common/dapl_ep_connect.c \ - dapl/common/dapl_ep_create.c \ - dapl/common/dapl_ep_disconnect.c \ - dapl/common/dapl_ep_dup_connect.c \ - dapl/common/dapl_ep_free.c \ - dapl/common/dapl_ep_reset.c \ - dapl/common/dapl_ep_get_status.c \ - dapl/common/dapl_ep_modify.c \ - dapl/common/dapl_ep_post_rdma_read.c \ - dapl/common/dapl_ep_post_rdma_write.c \ - dapl/common/dapl_ep_post_recv.c \ - dapl/common/dapl_ep_post_send.c \ - dapl/common/dapl_ep_query.c \ - dapl/common/dapl_ep_util.c \ - dapl/common/dapl_evd_dequeue.c \ - dapl/common/dapl_evd_free.c \ - dapl/common/dapl_evd_post_se.c \ - dapl/common/dapl_evd_resize.c \ - dapl/common/dapl_evd_util.c \ - dapl/common/dapl_evd_cq_async_error_callb.c \ - dapl/common/dapl_evd_qp_async_error_callb.c \ - dapl/common/dapl_evd_un_async_error_callb.c \ - dapl/common/dapl_evd_connection_callb.c \ - dapl/common/dapl_evd_dto_callb.c \ - dapl/common/dapl_get_consumer_context.c \ - dapl/common/dapl_get_handle_type.c \ - dapl/common/dapl_hash.c \ - dapl/common/dapl_hca_util.c \ - dapl/common/dapl_ia_close.c \ - dapl/common/dapl_ia_open.c \ - dapl/common/dapl_ia_query.c \ - dapl/common/dapl_ia_util.c \ - dapl/common/dapl_llist.c \ - dapl/common/dapl_lmr_free.c \ - dapl/common/dapl_lmr_query.c \ - dapl/common/dapl_lmr_util.c \ - dapl/common/dapl_lmr_sync_rdma_read.c \ - dapl/common/dapl_lmr_sync_rdma_write.c \ - dapl/common/dapl_mr_util.c \ - dapl/common/dapl_provider.c \ - dapl/common/dapl_sp_util.c \ - dapl/common/dapl_psp_create.c \ - dapl/common/dapl_psp_create_any.c \ - dapl/common/dapl_psp_free.c \ - dapl/common/dapl_psp_query.c \ - dapl/common/dapl_pz_create.c \ - dapl/common/dapl_pz_free.c \ - dapl/common/dapl_pz_query.c \ - dapl/common/dapl_pz_util.c \ - dapl/common/dapl_rmr_create.c \ - dapl/common/dapl_rmr_free.c \ - dapl/common/dapl_rmr_bind.c \ - dapl/common/dapl_rmr_query.c \ - dapl/common/dapl_rmr_util.c \ - dapl/common/dapl_rsp_create.c \ - dapl/common/dapl_rsp_free.c \ - dapl/common/dapl_rsp_query.c \ - dapl/common/dapl_cno_util.c \ - dapl/common/dapl_set_consumer_context.c \ - dapl/common/dapl_ring_buffer_util.c \ - dapl/common/dapl_name_service.c \ - dapl/common/dapl_timer_util.c \ - dapl/common/dapl_ep_create_with_srq.c \ - dapl/common/dapl_ep_recv_query.c \ - dapl/common/dapl_ep_set_watermark.c \ - dapl/common/dapl_srq_create.c \ - dapl/common/dapl_srq_free.c \ - dapl/common/dapl_srq_query.c \ - dapl/common/dapl_srq_resize.c \ - dapl/common/dapl_srq_post_recv.c \ - dapl/common/dapl_srq_set_lw.c \ - dapl/common/dapl_srq_util.c \ - dapl/common/dapl_debug.c \ - dapl/common/dapl_ia_ha.c \ - dapl/common/dapl_csp.c \ - dapl/common/dapl_ep_post_send_invalidate.c \ - dapl/common/dapl_ep_post_rdma_read_to_rmr.c \ - dapl/openib_common/mem.c \ - dapl/openib_common/cq.c \ - dapl/openib_common/qp.c \ - dapl/openib_common/util.c \ - dapl/openib_cma/cm.c \ - dapl/openib_cma/device.c $(XPROGRAMS) - -dapl_udapl_libdaplofa_la_LDFLAGS = -version-info 2:0:0 $(daplofa_version_script) \ - -Wl,-init,dapl_init -Wl,-fini,dapl_fini \ - -lpthread -libverbs -lrdmacm - -# -# uDAPL OpenFabrics Socket CM version for IB: libdaplscm.so -# -dapl_udapl_libdaploscm_la_SOURCES = dapl/udapl/dapl_init.c \ - dapl/udapl/dapl_evd_create.c \ - dapl/udapl/dapl_evd_query.c \ - dapl/udapl/dapl_cno_create.c \ - dapl/udapl/dapl_cno_modify_agent.c \ - dapl/udapl/dapl_cno_free.c \ - dapl/udapl/dapl_cno_wait.c \ - dapl/udapl/dapl_cno_query.c \ - dapl/udapl/dapl_lmr_create.c \ - dapl/udapl/dapl_evd_wait.c \ - dapl/udapl/dapl_evd_disable.c \ - dapl/udapl/dapl_evd_enable.c \ - dapl/udapl/dapl_evd_modify_cno.c \ - dapl/udapl/dapl_evd_set_unwaitable.c \ - dapl/udapl/dapl_evd_clear_unwaitable.c \ - dapl/udapl/linux/dapl_osd.c \ - dapl/common/dapl_cookie.c \ - dapl/common/dapl_cr_accept.c \ - dapl/common/dapl_cr_query.c \ - dapl/common/dapl_cr_reject.c \ - dapl/common/dapl_cr_util.c \ - dapl/common/dapl_cr_callback.c \ - dapl/common/dapl_cr_handoff.c \ - dapl/common/dapl_ep_connect.c \ - dapl/common/dapl_ep_create.c \ - dapl/common/dapl_ep_disconnect.c \ - dapl/common/dapl_ep_dup_connect.c \ - dapl/common/dapl_ep_free.c \ - dapl/common/dapl_ep_reset.c \ - dapl/common/dapl_ep_get_status.c \ - dapl/common/dapl_ep_modify.c \ - dapl/common/dapl_ep_post_rdma_read.c \ - dapl/common/dapl_ep_post_rdma_write.c \ - dapl/common/dapl_ep_post_recv.c \ - dapl/common/dapl_ep_post_send.c \ - dapl/common/dapl_ep_query.c \ - dapl/common/dapl_ep_util.c \ - dapl/common/dapl_evd_dequeue.c \ - dapl/common/dapl_evd_free.c \ - dapl/common/dapl_evd_post_se.c \ - dapl/common/dapl_evd_resize.c \ - dapl/common/dapl_evd_util.c \ - dapl/common/dapl_evd_cq_async_error_callb.c \ - dapl/common/dapl_evd_qp_async_error_callb.c \ - dapl/common/dapl_evd_un_async_error_callb.c \ - dapl/common/dapl_evd_connection_callb.c \ - dapl/common/dapl_evd_dto_callb.c \ - dapl/common/dapl_get_consumer_context.c \ - dapl/common/dapl_get_handle_type.c \ - dapl/common/dapl_hash.c \ - dapl/common/dapl_hca_util.c \ - dapl/common/dapl_ia_close.c \ - dapl/common/dapl_ia_open.c \ - dapl/common/dapl_ia_query.c \ - dapl/common/dapl_ia_util.c \ - dapl/common/dapl_llist.c \ - dapl/common/dapl_lmr_free.c \ - dapl/common/dapl_lmr_query.c \ - dapl/common/dapl_lmr_util.c \ - dapl/common/dapl_lmr_sync_rdma_read.c \ - dapl/common/dapl_lmr_sync_rdma_write.c \ - dapl/common/dapl_mr_util.c \ - dapl/common/dapl_provider.c \ - dapl/common/dapl_sp_util.c \ - dapl/common/dapl_psp_create.c \ - dapl/common/dapl_psp_create_any.c \ - dapl/common/dapl_psp_free.c \ - dapl/common/dapl_psp_query.c \ - dapl/common/dapl_pz_create.c \ - dapl/common/dapl_pz_free.c \ - dapl/common/dapl_pz_query.c \ - dapl/common/dapl_pz_util.c \ - dapl/common/dapl_rmr_create.c \ - dapl/common/dapl_rmr_free.c \ - dapl/common/dapl_rmr_bind.c \ - dapl/common/dapl_rmr_query.c \ - dapl/common/dapl_rmr_util.c \ - dapl/common/dapl_rsp_create.c \ - dapl/common/dapl_rsp_free.c \ - dapl/common/dapl_rsp_query.c \ - dapl/common/dapl_cno_util.c \ - dapl/common/dapl_set_consumer_context.c \ - dapl/common/dapl_ring_buffer_util.c \ - dapl/common/dapl_name_service.c \ - dapl/common/dapl_timer_util.c \ - dapl/common/dapl_ep_create_with_srq.c \ - dapl/common/dapl_ep_recv_query.c \ - dapl/common/dapl_ep_set_watermark.c \ - dapl/common/dapl_srq_create.c \ - dapl/common/dapl_srq_free.c \ - dapl/common/dapl_srq_query.c \ - dapl/common/dapl_srq_resize.c \ - dapl/common/dapl_srq_post_recv.c \ - dapl/common/dapl_srq_set_lw.c \ - dapl/common/dapl_srq_util.c \ - dapl/common/dapl_debug.c \ - dapl/common/dapl_ia_ha.c \ - dapl/common/dapl_csp.c \ - dapl/common/dapl_ep_post_send_invalidate.c \ - dapl/common/dapl_ep_post_rdma_read_to_rmr.c \ - dapl/openib_common/mem.c \ - dapl/openib_common/cq.c \ - dapl/openib_common/qp.c \ - dapl/openib_common/util.c \ - dapl/openib_scm/cm.c \ - dapl/openib_scm/device.c $(XPROGRAMS) - -dapl_udapl_libdaploscm_la_LDFLAGS = -version-info 2:0:0 $(daploscm_version_script) \ - -Wl,-init,dapl_init -Wl,-fini,dapl_fini \ - -lpthread -libverbs - -# -# uDAPL OpenFabrics UD CM version for IB: libdaplucm.so -# -dapl_udapl_libdaploucm_la_SOURCES = dapl/udapl/dapl_init.c \ - dapl/udapl/dapl_evd_create.c \ - dapl/udapl/dapl_evd_query.c \ - dapl/udapl/dapl_cno_create.c \ - dapl/udapl/dapl_cno_modify_agent.c \ - dapl/udapl/dapl_cno_free.c \ - dapl/udapl/dapl_cno_wait.c \ - dapl/udapl/dapl_cno_query.c \ - dapl/udapl/dapl_lmr_create.c \ - dapl/udapl/dapl_evd_wait.c \ - dapl/udapl/dapl_evd_disable.c \ - dapl/udapl/dapl_evd_enable.c \ - dapl/udapl/dapl_evd_modify_cno.c \ - dapl/udapl/dapl_evd_set_unwaitable.c \ - dapl/udapl/dapl_evd_clear_unwaitable.c \ - dapl/udapl/linux/dapl_osd.c \ - dapl/common/dapl_cookie.c \ - dapl/common/dapl_cr_accept.c \ - dapl/common/dapl_cr_query.c \ - dapl/common/dapl_cr_reject.c \ - dapl/common/dapl_cr_util.c \ - dapl/common/dapl_cr_callback.c \ - dapl/common/dapl_cr_handoff.c \ - dapl/common/dapl_ep_connect.c \ - dapl/common/dapl_ep_create.c \ - dapl/common/dapl_ep_disconnect.c \ - dapl/common/dapl_ep_dup_connect.c \ - dapl/common/dapl_ep_free.c \ - dapl/common/dapl_ep_reset.c \ - dapl/common/dapl_ep_get_status.c \ - dapl/common/dapl_ep_modify.c \ - dapl/common/dapl_ep_post_rdma_read.c \ - dapl/common/dapl_ep_post_rdma_write.c \ - dapl/common/dapl_ep_post_recv.c \ - dapl/common/dapl_ep_post_send.c \ - dapl/common/dapl_ep_query.c \ - dapl/common/dapl_ep_util.c \ - dapl/common/dapl_evd_dequeue.c \ - dapl/common/dapl_evd_free.c \ - dapl/common/dapl_evd_post_se.c \ - dapl/common/dapl_evd_resize.c \ - dapl/common/dapl_evd_util.c \ - dapl/common/dapl_evd_cq_async_error_callb.c \ - dapl/common/dapl_evd_qp_async_error_callb.c \ - dapl/common/dapl_evd_un_async_error_callb.c \ - dapl/common/dapl_evd_connection_callb.c \ - dapl/common/dapl_evd_dto_callb.c \ - dapl/common/dapl_get_consumer_context.c \ - dapl/common/dapl_get_handle_type.c \ - dapl/common/dapl_hash.c \ - dapl/common/dapl_hca_util.c \ - dapl/common/dapl_ia_close.c \ - dapl/common/dapl_ia_open.c \ - dapl/common/dapl_ia_query.c \ - dapl/common/dapl_ia_util.c \ - dapl/common/dapl_llist.c \ - dapl/common/dapl_lmr_free.c \ - dapl/common/dapl_lmr_query.c \ - dapl/common/dapl_lmr_util.c \ - dapl/common/dapl_lmr_sync_rdma_read.c \ - dapl/common/dapl_lmr_sync_rdma_write.c \ - dapl/common/dapl_mr_util.c \ - dapl/common/dapl_provider.c \ - dapl/common/dapl_sp_util.c \ - dapl/common/dapl_psp_create.c \ - dapl/common/dapl_psp_create_any.c \ - dapl/common/dapl_psp_free.c \ - dapl/common/dapl_psp_query.c \ - dapl/common/dapl_pz_create.c \ - dapl/common/dapl_pz_free.c \ - dapl/common/dapl_pz_query.c \ - dapl/common/dapl_pz_util.c \ - dapl/common/dapl_rmr_create.c \ - dapl/common/dapl_rmr_free.c \ - dapl/common/dapl_rmr_bind.c \ - dapl/common/dapl_rmr_query.c \ - dapl/common/dapl_rmr_util.c \ - dapl/common/dapl_rsp_create.c \ - dapl/common/dapl_rsp_free.c \ - dapl/common/dapl_rsp_query.c \ - dapl/common/dapl_cno_util.c \ - dapl/common/dapl_set_consumer_context.c \ - dapl/common/dapl_ring_buffer_util.c \ - dapl/common/dapl_name_service.c \ - dapl/common/dapl_timer_util.c \ - dapl/common/dapl_ep_create_with_srq.c \ - dapl/common/dapl_ep_recv_query.c \ - dapl/common/dapl_ep_set_watermark.c \ - dapl/common/dapl_srq_create.c \ - dapl/common/dapl_srq_free.c \ - dapl/common/dapl_srq_query.c \ - dapl/common/dapl_srq_resize.c \ - dapl/common/dapl_srq_post_recv.c \ - dapl/common/dapl_srq_set_lw.c \ - dapl/common/dapl_srq_util.c \ - dapl/common/dapl_debug.c \ - dapl/common/dapl_ia_ha.c \ - dapl/common/dapl_csp.c \ - dapl/common/dapl_ep_post_send_invalidate.c \ - dapl/common/dapl_ep_post_rdma_read_to_rmr.c \ - dapl/openib_common/mem.c \ - dapl/openib_common/cq.c \ - dapl/openib_common/qp.c \ - dapl/openib_common/util.c \ - dapl/openib_ucm/cm.c \ - dapl/openib_ucm/device.c $(XPROGRAMS) - -dapl_udapl_libdaploucm_la_LDFLAGS = -version-info 2:0:0 $(daploscm_version_script) \ - -Wl,-init,dapl_init -Wl,-fini,dapl_fini \ - -lpthread -libverbs - -libdatincludedir = $(includedir)/dat2 - -libdatinclude_HEADERS = dat/include/dat2/dat.h \ - dat/include/dat2/dat_error.h \ - dat/include/dat2/dat_platform_specific.h \ - dat/include/dat2/dat_redirection.h \ - dat/include/dat2/dat_registry.h \ - dat/include/dat2/dat_vendor_specific.h \ - dat/include/dat2/udat_config.h \ - dat/include/dat2/udat.h \ - dat/include/dat2/udat_redirection.h \ - dat/include/dat2/udat_vendor_specific.h \ - dat/include/dat2/dat_ib_extensions.h - -man_MANS = man/dtest.1 man/dapltest.1 man/dat.conf.5 - -EXTRA_DIST = dat/common/dat_dictionary.h \ - dat/common/dat_dr.h \ - dat/common/dat_init.h \ - dat/common/dat_sr.h \ - dat/udat/udat_sr_parser.h \ - dat/udat/linux/dat_osd.h \ - dat/include/dat2/dat.h \ - dat/include/dat2/dat_error.h \ - dat/include/dat2/dat_platform_specific.h \ - dat/include/dat2/dat_redirection.h \ - dat/include/dat2/dat_registry.h \ - dat/include/dat2/dat_vendor_specific.h \ - dat/include/dat2/udat_config.h \ - dat/include/dat2/udat.h \ - dat/include/dat2/udat_redirection.h \ - dat/include/dat2/udat_vendor_specific.h \ - dapl/common/dapl_adapter_util.h \ - dapl/common/dapl_cno_util.h \ - dapl/common/dapl_cookie.h \ - dapl/common/dapl_cr_util.h \ - dapl/common/dapl_ep_util.h \ - dapl/common/dapl_evd_util.h \ - dapl/common/dapl_hash.h \ - dapl/common/dapl_hca_util.h \ - dapl/common/dapl_ia_util.h \ - dapl/common/dapl_init.h \ - dapl/common/dapl_lmr_util.h \ - dapl/common/dapl_mr_util.h \ - dapl/common/dapl_name_service.h \ - dapl/common/dapl_provider.h \ - dapl/common/dapl_pz_util.h \ - dapl/common/dapl_ring_buffer_util.h \ - dapl/common/dapl_rmr_util.h \ - dapl/common/dapl_sp_util.h \ - dapl/common/dapl_srq_util.h \ - dapl/common/dapl_timer_util.h \ - dapl/udapl/linux/dapl_osd.h \ - dapl/include/dapl.h \ - dapl/include/dapl_debug.h \ - dapl/include/dapl_ipoib_names.h \ - dapl/include/dapl_vendor.h \ - dapl/openib_common/dapl_ib_dto.h \ - dapl/openib_common/dapl_ib_common.h \ - dapl/openib_cma/dapl_ib_util.h \ - dapl/openib_cma/linux/openib_osd.h \ - dapl/openib_scm/dapl_ib_util.h \ - dapl/openib_scm/linux/openib_osd.h \ - dapl/openib_ucm/dapl_ib_util.h \ - dapl/openib_ucm/linux/openib_osd.h \ - dat/udat/libdat2.map \ - dapl/udapl/libdaplofa.map \ - dapl/udapl/libdaploscm.map \ - dapl/udapl/libdaploucm.map \ - LICENSE.txt \ - LICENSE2.txt \ - LICENSE3.txt \ - dapl.spec.in \ - $(man_MANS) \ - test/dapltest/include/dapl_bpool.h \ - test/dapltest/include/dapl_client_info.h \ - test/dapltest/include/dapl_common.h \ - test/dapltest/include/dapl_execute.h \ - test/dapltest/include/dapl_fft_cmd.h \ - test/dapltest/include/dapl_fft_util.h \ - test/dapltest/include/dapl_getopt.h \ - test/dapltest/include/dapl_global.h \ - test/dapltest/include/dapl_limit_cmd.h \ - test/dapltest/include/dapl_mdep.h \ - test/dapltest/include/dapl_memlist.h \ - test/dapltest/include/dapl_params.h \ - test/dapltest/include/dapl_performance_cmd.h \ - test/dapltest/include/dapl_performance_stats.h \ - test/dapltest/include/dapl_performance_test.h \ - test/dapltest/include/dapl_proto.h \ - test/dapltest/include/dapl_quit_cmd.h \ - test/dapltest/include/dapl_server_cmd.h \ - test/dapltest/include/dapl_server_info.h \ - test/dapltest/include/dapl_tdep.h \ - test/dapltest/include/dapl_tdep_print.h \ - test/dapltest/include/dapl_test_data.h \ - test/dapltest/include/dapl_transaction_cmd.h \ - test/dapltest/include/dapl_transaction_stats.h \ - test/dapltest/include/dapl_transaction_test.h \ - test/dapltest/include/dapl_version.h \ - test/dapltest/mdep/linux/dapl_mdep_user.h - -dist-hook: dapl.spec - cp dapl.spec $(distdir) - -install-exec-hook: - if ! test -d $(DESTDIR)$(sysconfdir); then \ - mkdir -p $(DESTDIR)$(sysconfdir); \ - fi; \ - if test -e $(DESTDIR)$(sysconfdir)/dat.conf; then \ - sed -e '/ofa-v2-.* u2/d' < $(DESTDIR)$(sysconfdir)/dat.conf > /tmp/$$$$ofadapl; \ - cp /tmp/$$$$ofadapl $(DESTDIR)$(sysconfdir)/dat.conf; \ - fi; \ - echo ofa-v2-mlx4_0-1 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"mlx4_0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-mlx4_0-2 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"mlx4_0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-ib0 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 '"ib0 0" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-ib1 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 '"ib1 0" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-mthca0-1 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"mthca0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-mthca0-2 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"mthca0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-ipath0-1 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"ipath0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-ipath0-2 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"ipath0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-ehca0-2 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"ehca0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-iwarp u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 '"eth2 0" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-mlx4_0-1u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 '"mlx4_0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-mlx4_0-2u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 '"mlx4_0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-mthca0-1u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 '"mthca0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-mthca0-2u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 '"mthca0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-cma-roe-eth2 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 '"eth2 0" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-cma-roe-eth3 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 '"eth3 0" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-scm-roe-mlx4_0-1 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"mlx4_0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ - echo ofa-v2-scm-roe-mlx4_0-2 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"mlx4_0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; - -uninstall-hook: - if test -e $(DESTDIR)$(sysconfdir)/dat.conf; then \ - sed -e '/ofa-v2-.* u2/d' < $(DESTDIR)$(sysconfdir)/dat.conf > /tmp/$$$$ofadapl; \ - cp /tmp/$$$$ofadapl $(DESTDIR)$(sysconfdir)/dat.conf; \ - fi; - -SUBDIRS = . test/dtest test/dapltest +# $Id: $ + +OSFLAGS = -DOS_RELEASE=$(shell expr `uname -r | cut -f1 -d.` \* 65536 + `uname -r | cut -f2 -d.`) +# Check for RedHat, needed for ia64 udapl atomic operations (IA64_FETCHADD syntax) +# and built-in atomics for RedHat EL5 +if OS_RHEL4 +OSFLAGS += -DREDHAT_EL4 +endif + +if OS_RHEL5 +OSFLAGS += -DREDHAT_EL5 +endif + +if OS_SUSE11 +OSFLAGS += -DSUSE_11 +endif + +if EXT_TYPE_IB +XFLAGS = -DDAT_EXTENSIONS +XPROGRAMS = dapl/openib_common/ib_extensions.c +else +XFLAGS = +XPROGRAMS = +endif + +if DEFINE_ATTR_LINK_LAYER +XFLAGS += -DDEFINE_ATTR_LINK_LAYER +endif + +if DEBUG +AM_CFLAGS = -g -Wall -D_GNU_SOURCE -DDAPL_DBG -DDAT_CONF="\"$(sysconfdir)/dat.conf\"" +else +AM_CFLAGS = -g -Wall -D_GNU_SOURCE -DDAT_CONF="\"$(sysconfdir)/dat.conf\"" +endif + +datlibdir = $(libdir) +dapllibofadir = $(libdir) +daplliboscmdir = $(libdir) +daplliboucmdir = $(libdir) + +datlib_LTLIBRARIES = dat/udat/libdat2.la +dapllibofa_LTLIBRARIES = dapl/udapl/libdaplofa.la +daplliboscm_LTLIBRARIES = dapl/udapl/libdaploscm.la +daplliboucm_LTLIBRARIES = dapl/udapl/libdaploucm.la + +dat_udat_libdat2_la_CFLAGS = $(AM_CFLAGS) -D_GNU_SOURCE $(OSFLAGS) $(XFLAGS) \ + -I$(srcdir)/dat/include/ -I$(srcdir)/dat/udat/ \ + -I$(srcdir)/dat/udat/linux -I$(srcdir)/dat/common/ + +dapl_udapl_libdaplofa_la_CFLAGS = $(AM_CFLAGS) -D_GNU_SOURCE $(OSFLAGS) $(XFLAGS) \ + -DOPENIB -DCQ_WAIT_OBJECT \ + -I$(srcdir)/dat/include/ -I$(srcdir)/dapl/include/ \ + -I$(srcdir)/dapl/common -I$(srcdir)/dapl/udapl/linux \ + -I$(srcdir)/dapl/openib_common \ + -I$(srcdir)/dapl/openib_cma \ + -I$(srcdir)/dapl/openib_cma/linux + +dapl_udapl_libdaploscm_la_CFLAGS = $(AM_CFLAGS) -D_GNU_SOURCE $(OSFLAGS) $(XFLAGS) \ + -DOPENIB -DCQ_WAIT_OBJECT \ + -I$(srcdir)/dat/include/ -I$(srcdir)/dapl/include/ \ + -I$(srcdir)/dapl/common -I$(srcdir)/dapl/udapl/linux \ + -I$(srcdir)/dapl/openib_common \ + -I$(srcdir)/dapl/openib_scm \ + -I$(srcdir)/dapl/openib_scm/linux + +dapl_udapl_libdaploucm_la_CFLAGS = $(AM_CFLAGS) -D_GNU_SOURCE $(OSFLAGS) $(XFLAGS) \ + -DOPENIB -DCQ_WAIT_OBJECT \ + -I$(srcdir)/dat/include/ -I$(srcdir)/dapl/include/ \ + -I$(srcdir)/dapl/common -I$(srcdir)/dapl/udapl/linux \ + -I$(srcdir)/dapl/openib_common \ + -I$(srcdir)/dapl/openib_ucm \ + -I$(srcdir)/dapl/openib_ucm/linux + +if HAVE_LD_VERSION_SCRIPT + dat_version_script = -Wl,--version-script=$(srcdir)/dat/udat/libdat2.map + daplofa_version_script = -Wl,--version-script=$(srcdir)/dapl/udapl/libdaplofa.map + daploscm_version_script = -Wl,--version-script=$(srcdir)/dapl/udapl/libdaploscm.map + daploucm_version_script = -Wl,--version-script=$(srcdir)/dapl/udapl/libdaploucm.map +else + dat_version_script = + daplofa_version_script = + daploscm_version_script = + daploucm_version_script = +endif + +# +# uDAT: libdat2.so +# +dat_udat_libdat2_la_SOURCES = dat/udat/udat.c \ + dat/udat/udat_api.c \ + dat/udat/udat_sr_parser.c \ + dat/udat/linux/dat_osd.c \ + dat/common/dat_api.c \ + dat/common/dat_dictionary.c \ + dat/common/dat_strerror.c \ + dat/common/dat_init.c \ + dat/common/dat_dr.c \ + dat/common/dat_sr.c +dat_udat_libdat2_la_LDFLAGS = -version-info 2:0:0 $(dat_version_script) -ldl + +# +# uDAPL OpenFabrics rdma_cm version: libdaplofa.so +# +dapl_udapl_libdaplofa_la_SOURCES = dapl/udapl/dapl_init.c \ + dapl/udapl/dapl_evd_create.c \ + dapl/udapl/dapl_evd_query.c \ + dapl/udapl/dapl_cno_create.c \ + dapl/udapl/dapl_cno_modify_agent.c \ + dapl/udapl/dapl_cno_free.c \ + dapl/udapl/dapl_cno_wait.c \ + dapl/udapl/dapl_cno_query.c \ + dapl/udapl/dapl_lmr_create.c \ + dapl/udapl/dapl_evd_wait.c \ + dapl/udapl/dapl_evd_disable.c \ + dapl/udapl/dapl_evd_enable.c \ + dapl/udapl/dapl_evd_modify_cno.c \ + dapl/udapl/dapl_evd_set_unwaitable.c \ + dapl/udapl/dapl_evd_clear_unwaitable.c \ + dapl/udapl/linux/dapl_osd.c \ + dapl/common/dapl_cookie.c \ + dapl/common/dapl_cr_accept.c \ + dapl/common/dapl_cr_query.c \ + dapl/common/dapl_cr_reject.c \ + dapl/common/dapl_cr_util.c \ + dapl/common/dapl_cr_callback.c \ + dapl/common/dapl_cr_handoff.c \ + dapl/common/dapl_ep_connect.c \ + dapl/common/dapl_ep_create.c \ + dapl/common/dapl_ep_disconnect.c \ + dapl/common/dapl_ep_dup_connect.c \ + dapl/common/dapl_ep_free.c \ + dapl/common/dapl_ep_reset.c \ + dapl/common/dapl_ep_get_status.c \ + dapl/common/dapl_ep_modify.c \ + dapl/common/dapl_ep_post_rdma_read.c \ + dapl/common/dapl_ep_post_rdma_write.c \ + dapl/common/dapl_ep_post_recv.c \ + dapl/common/dapl_ep_post_send.c \ + dapl/common/dapl_ep_query.c \ + dapl/common/dapl_ep_util.c \ + dapl/common/dapl_evd_dequeue.c \ + dapl/common/dapl_evd_free.c \ + dapl/common/dapl_evd_post_se.c \ + dapl/common/dapl_evd_resize.c \ + dapl/common/dapl_evd_util.c \ + dapl/common/dapl_evd_cq_async_error_callb.c \ + dapl/common/dapl_evd_qp_async_error_callb.c \ + dapl/common/dapl_evd_un_async_error_callb.c \ + dapl/common/dapl_evd_connection_callb.c \ + dapl/common/dapl_evd_dto_callb.c \ + dapl/common/dapl_get_consumer_context.c \ + dapl/common/dapl_get_handle_type.c \ + dapl/common/dapl_hash.c \ + dapl/common/dapl_hca_util.c \ + dapl/common/dapl_ia_close.c \ + dapl/common/dapl_ia_open.c \ + dapl/common/dapl_ia_query.c \ + dapl/common/dapl_ia_util.c \ + dapl/common/dapl_llist.c \ + dapl/common/dapl_lmr_free.c \ + dapl/common/dapl_lmr_query.c \ + dapl/common/dapl_lmr_util.c \ + dapl/common/dapl_lmr_sync_rdma_read.c \ + dapl/common/dapl_lmr_sync_rdma_write.c \ + dapl/common/dapl_mr_util.c \ + dapl/common/dapl_provider.c \ + dapl/common/dapl_sp_util.c \ + dapl/common/dapl_psp_create.c \ + dapl/common/dapl_psp_create_any.c \ + dapl/common/dapl_psp_free.c \ + dapl/common/dapl_psp_query.c \ + dapl/common/dapl_pz_create.c \ + dapl/common/dapl_pz_free.c \ + dapl/common/dapl_pz_query.c \ + dapl/common/dapl_pz_util.c \ + dapl/common/dapl_rmr_create.c \ + dapl/common/dapl_rmr_free.c \ + dapl/common/dapl_rmr_bind.c \ + dapl/common/dapl_rmr_query.c \ + dapl/common/dapl_rmr_util.c \ + dapl/common/dapl_rsp_create.c \ + dapl/common/dapl_rsp_free.c \ + dapl/common/dapl_rsp_query.c \ + dapl/common/dapl_cno_util.c \ + dapl/common/dapl_set_consumer_context.c \ + dapl/common/dapl_ring_buffer_util.c \ + dapl/common/dapl_name_service.c \ + dapl/common/dapl_timer_util.c \ + dapl/common/dapl_ep_create_with_srq.c \ + dapl/common/dapl_ep_recv_query.c \ + dapl/common/dapl_ep_set_watermark.c \ + dapl/common/dapl_srq_create.c \ + dapl/common/dapl_srq_free.c \ + dapl/common/dapl_srq_query.c \ + dapl/common/dapl_srq_resize.c \ + dapl/common/dapl_srq_post_recv.c \ + dapl/common/dapl_srq_set_lw.c \ + dapl/common/dapl_srq_util.c \ + dapl/common/dapl_debug.c \ + dapl/common/dapl_ia_ha.c \ + dapl/common/dapl_csp.c \ + dapl/common/dapl_ep_post_send_invalidate.c \ + dapl/common/dapl_ep_post_rdma_read_to_rmr.c \ + dapl/openib_common/mem.c \ + dapl/openib_common/cq.c \ + dapl/openib_common/qp.c \ + dapl/openib_common/util.c \ + dapl/openib_cma/cm.c \ + dapl/openib_cma/device.c $(XPROGRAMS) + +dapl_udapl_libdaplofa_la_LDFLAGS = -version-info 2:0:0 $(daplofa_version_script) \ + -Wl,-init,dapl_init -Wl,-fini,dapl_fini \ + -lpthread -libverbs -lrdmacm + +# +# uDAPL OpenFabrics Socket CM version for IB: libdaplscm.so +# +dapl_udapl_libdaploscm_la_SOURCES = dapl/udapl/dapl_init.c \ + dapl/udapl/dapl_evd_create.c \ + dapl/udapl/dapl_evd_query.c \ + dapl/udapl/dapl_cno_create.c \ + dapl/udapl/dapl_cno_modify_agent.c \ + dapl/udapl/dapl_cno_free.c \ + dapl/udapl/dapl_cno_wait.c \ + dapl/udapl/dapl_cno_query.c \ + dapl/udapl/dapl_lmr_create.c \ + dapl/udapl/dapl_evd_wait.c \ + dapl/udapl/dapl_evd_disable.c \ + dapl/udapl/dapl_evd_enable.c \ + dapl/udapl/dapl_evd_modify_cno.c \ + dapl/udapl/dapl_evd_set_unwaitable.c \ + dapl/udapl/dapl_evd_clear_unwaitable.c \ + dapl/udapl/linux/dapl_osd.c \ + dapl/common/dapl_cookie.c \ + dapl/common/dapl_cr_accept.c \ + dapl/common/dapl_cr_query.c \ + dapl/common/dapl_cr_reject.c \ + dapl/common/dapl_cr_util.c \ + dapl/common/dapl_cr_callback.c \ + dapl/common/dapl_cr_handoff.c \ + dapl/common/dapl_ep_connect.c \ + dapl/common/dapl_ep_create.c \ + dapl/common/dapl_ep_disconnect.c \ + dapl/common/dapl_ep_dup_connect.c \ + dapl/common/dapl_ep_free.c \ + dapl/common/dapl_ep_reset.c \ + dapl/common/dapl_ep_get_status.c \ + dapl/common/dapl_ep_modify.c \ + dapl/common/dapl_ep_post_rdma_read.c \ + dapl/common/dapl_ep_post_rdma_write.c \ + dapl/common/dapl_ep_post_recv.c \ + dapl/common/dapl_ep_post_send.c \ + dapl/common/dapl_ep_query.c \ + dapl/common/dapl_ep_util.c \ + dapl/common/dapl_evd_dequeue.c \ + dapl/common/dapl_evd_free.c \ + dapl/common/dapl_evd_post_se.c \ + dapl/common/dapl_evd_resize.c \ + dapl/common/dapl_evd_util.c \ + dapl/common/dapl_evd_cq_async_error_callb.c \ + dapl/common/dapl_evd_qp_async_error_callb.c \ + dapl/common/dapl_evd_un_async_error_callb.c \ + dapl/common/dapl_evd_connection_callb.c \ + dapl/common/dapl_evd_dto_callb.c \ + dapl/common/dapl_get_consumer_context.c \ + dapl/common/dapl_get_handle_type.c \ + dapl/common/dapl_hash.c \ + dapl/common/dapl_hca_util.c \ + dapl/common/dapl_ia_close.c \ + dapl/common/dapl_ia_open.c \ + dapl/common/dapl_ia_query.c \ + dapl/common/dapl_ia_util.c \ + dapl/common/dapl_llist.c \ + dapl/common/dapl_lmr_free.c \ + dapl/common/dapl_lmr_query.c \ + dapl/common/dapl_lmr_util.c \ + dapl/common/dapl_lmr_sync_rdma_read.c \ + dapl/common/dapl_lmr_sync_rdma_write.c \ + dapl/common/dapl_mr_util.c \ + dapl/common/dapl_provider.c \ + dapl/common/dapl_sp_util.c \ + dapl/common/dapl_psp_create.c \ + dapl/common/dapl_psp_create_any.c \ + dapl/common/dapl_psp_free.c \ + dapl/common/dapl_psp_query.c \ + dapl/common/dapl_pz_create.c \ + dapl/common/dapl_pz_free.c \ + dapl/common/dapl_pz_query.c \ + dapl/common/dapl_pz_util.c \ + dapl/common/dapl_rmr_create.c \ + dapl/common/dapl_rmr_free.c \ + dapl/common/dapl_rmr_bind.c \ + dapl/common/dapl_rmr_query.c \ + dapl/common/dapl_rmr_util.c \ + dapl/common/dapl_rsp_create.c \ + dapl/common/dapl_rsp_free.c \ + dapl/common/dapl_rsp_query.c \ + dapl/common/dapl_cno_util.c \ + dapl/common/dapl_set_consumer_context.c \ + dapl/common/dapl_ring_buffer_util.c \ + dapl/common/dapl_name_service.c \ + dapl/common/dapl_timer_util.c \ + dapl/common/dapl_ep_create_with_srq.c \ + dapl/common/dapl_ep_recv_query.c \ + dapl/common/dapl_ep_set_watermark.c \ + dapl/common/dapl_srq_create.c \ + dapl/common/dapl_srq_free.c \ + dapl/common/dapl_srq_query.c \ + dapl/common/dapl_srq_resize.c \ + dapl/common/dapl_srq_post_recv.c \ + dapl/common/dapl_srq_set_lw.c \ + dapl/common/dapl_srq_util.c \ + dapl/common/dapl_debug.c \ + dapl/common/dapl_ia_ha.c \ + dapl/common/dapl_csp.c \ + dapl/common/dapl_ep_post_send_invalidate.c \ + dapl/common/dapl_ep_post_rdma_read_to_rmr.c \ + dapl/openib_common/mem.c \ + dapl/openib_common/cq.c \ + dapl/openib_common/qp.c \ + dapl/openib_common/util.c \ + dapl/openib_scm/cm.c \ + dapl/openib_scm/device.c $(XPROGRAMS) + +dapl_udapl_libdaploscm_la_LDFLAGS = -version-info 2:0:0 $(daploscm_version_script) \ + -Wl,-init,dapl_init -Wl,-fini,dapl_fini \ + -lpthread -libverbs -lrdmacm + +# +# uDAPL OpenFabrics UD CM version for IB: libdaplucm.so +# +dapl_udapl_libdaploucm_la_SOURCES = dapl/udapl/dapl_init.c \ + dapl/udapl/dapl_evd_create.c \ + dapl/udapl/dapl_evd_query.c \ + dapl/udapl/dapl_cno_create.c \ + dapl/udapl/dapl_cno_modify_agent.c \ + dapl/udapl/dapl_cno_free.c \ + dapl/udapl/dapl_cno_wait.c \ + dapl/udapl/dapl_cno_query.c \ + dapl/udapl/dapl_lmr_create.c \ + dapl/udapl/dapl_evd_wait.c \ + dapl/udapl/dapl_evd_disable.c \ + dapl/udapl/dapl_evd_enable.c \ + dapl/udapl/dapl_evd_modify_cno.c \ + dapl/udapl/dapl_evd_set_unwaitable.c \ + dapl/udapl/dapl_evd_clear_unwaitable.c \ + dapl/udapl/linux/dapl_osd.c \ + dapl/common/dapl_cookie.c \ + dapl/common/dapl_cr_accept.c \ + dapl/common/dapl_cr_query.c \ + dapl/common/dapl_cr_reject.c \ + dapl/common/dapl_cr_util.c \ + dapl/common/dapl_cr_callback.c \ + dapl/common/dapl_cr_handoff.c \ + dapl/common/dapl_ep_connect.c \ + dapl/common/dapl_ep_create.c \ + dapl/common/dapl_ep_disconnect.c \ + dapl/common/dapl_ep_dup_connect.c \ + dapl/common/dapl_ep_free.c \ + dapl/common/dapl_ep_reset.c \ + dapl/common/dapl_ep_get_status.c \ + dapl/common/dapl_ep_modify.c \ + dapl/common/dapl_ep_post_rdma_read.c \ + dapl/common/dapl_ep_post_rdma_write.c \ + dapl/common/dapl_ep_post_recv.c \ + dapl/common/dapl_ep_post_send.c \ + dapl/common/dapl_ep_query.c \ + dapl/common/dapl_ep_util.c \ + dapl/common/dapl_evd_dequeue.c \ + dapl/common/dapl_evd_free.c \ + dapl/common/dapl_evd_post_se.c \ + dapl/common/dapl_evd_resize.c \ + dapl/common/dapl_evd_util.c \ + dapl/common/dapl_evd_cq_async_error_callb.c \ + dapl/common/dapl_evd_qp_async_error_callb.c \ + dapl/common/dapl_evd_un_async_error_callb.c \ + dapl/common/dapl_evd_connection_callb.c \ + dapl/common/dapl_evd_dto_callb.c \ + dapl/common/dapl_get_consumer_context.c \ + dapl/common/dapl_get_handle_type.c \ + dapl/common/dapl_hash.c \ + dapl/common/dapl_hca_util.c \ + dapl/common/dapl_ia_close.c \ + dapl/common/dapl_ia_open.c \ + dapl/common/dapl_ia_query.c \ + dapl/common/dapl_ia_util.c \ + dapl/common/dapl_llist.c \ + dapl/common/dapl_lmr_free.c \ + dapl/common/dapl_lmr_query.c \ + dapl/common/dapl_lmr_util.c \ + dapl/common/dapl_lmr_sync_rdma_read.c \ + dapl/common/dapl_lmr_sync_rdma_write.c \ + dapl/common/dapl_mr_util.c \ + dapl/common/dapl_provider.c \ + dapl/common/dapl_sp_util.c \ + dapl/common/dapl_psp_create.c \ + dapl/common/dapl_psp_create_any.c \ + dapl/common/dapl_psp_free.c \ + dapl/common/dapl_psp_query.c \ + dapl/common/dapl_pz_create.c \ + dapl/common/dapl_pz_free.c \ + dapl/common/dapl_pz_query.c \ + dapl/common/dapl_pz_util.c \ + dapl/common/dapl_rmr_create.c \ + dapl/common/dapl_rmr_free.c \ + dapl/common/dapl_rmr_bind.c \ + dapl/common/dapl_rmr_query.c \ + dapl/common/dapl_rmr_util.c \ + dapl/common/dapl_rsp_create.c \ + dapl/common/dapl_rsp_free.c \ + dapl/common/dapl_rsp_query.c \ + dapl/common/dapl_cno_util.c \ + dapl/common/dapl_set_consumer_context.c \ + dapl/common/dapl_ring_buffer_util.c \ + dapl/common/dapl_name_service.c \ + dapl/common/dapl_timer_util.c \ + dapl/common/dapl_ep_create_with_srq.c \ + dapl/common/dapl_ep_recv_query.c \ + dapl/common/dapl_ep_set_watermark.c \ + dapl/common/dapl_srq_create.c \ + dapl/common/dapl_srq_free.c \ + dapl/common/dapl_srq_query.c \ + dapl/common/dapl_srq_resize.c \ + dapl/common/dapl_srq_post_recv.c \ + dapl/common/dapl_srq_set_lw.c \ + dapl/common/dapl_srq_util.c \ + dapl/common/dapl_debug.c \ + dapl/common/dapl_ia_ha.c \ + dapl/common/dapl_csp.c \ + dapl/common/dapl_ep_post_send_invalidate.c \ + dapl/common/dapl_ep_post_rdma_read_to_rmr.c \ + dapl/openib_common/mem.c \ + dapl/openib_common/cq.c \ + dapl/openib_common/qp.c \ + dapl/openib_common/util.c \ + dapl/openib_ucm/cm.c \ + dapl/openib_ucm/device.c $(XPROGRAMS) + +dapl_udapl_libdaploucm_la_LDFLAGS = -version-info 2:0:0 $(daploscm_version_script) \ + -Wl,-init,dapl_init -Wl,-fini,dapl_fini \ + -lpthread -libverbs -lrdmacm + +libdatincludedir = $(includedir)/dat2 + +libdatinclude_HEADERS = dat/include/dat2/dat.h \ + dat/include/dat2/dat_error.h \ + dat/include/dat2/dat_platform_specific.h \ + dat/include/dat2/dat_redirection.h \ + dat/include/dat2/dat_registry.h \ + dat/include/dat2/dat_vendor_specific.h \ + dat/include/dat2/udat_config.h \ + dat/include/dat2/udat.h \ + dat/include/dat2/udat_redirection.h \ + dat/include/dat2/udat_vendor_specific.h \ + dat/include/dat2/dat_ib_extensions.h + +man_MANS = man/dtest.1 man/dapltest.1 man/dat.conf.5 + +EXTRA_DIST = dat/common/dat_dictionary.h \ + dat/common/dat_dr.h \ + dat/common/dat_init.h \ + dat/common/dat_sr.h \ + dat/udat/udat_sr_parser.h \ + dat/udat/linux/dat_osd.h \ + dat/include/dat2/dat.h \ + dat/include/dat2/dat_error.h \ + dat/include/dat2/dat_platform_specific.h \ + dat/include/dat2/dat_redirection.h \ + dat/include/dat2/dat_registry.h \ + dat/include/dat2/dat_vendor_specific.h \ + dat/include/dat2/udat_config.h \ + dat/include/dat2/udat.h \ + dat/include/dat2/udat_redirection.h \ + dat/include/dat2/udat_vendor_specific.h \ + dapl/common/dapl_adapter_util.h \ + dapl/common/dapl_cno_util.h \ + dapl/common/dapl_cookie.h \ + dapl/common/dapl_cr_util.h \ + dapl/common/dapl_ep_util.h \ + dapl/common/dapl_evd_util.h \ + dapl/common/dapl_hash.h \ + dapl/common/dapl_hca_util.h \ + dapl/common/dapl_ia_util.h \ + dapl/common/dapl_init.h \ + dapl/common/dapl_lmr_util.h \ + dapl/common/dapl_mr_util.h \ + dapl/common/dapl_name_service.h \ + dapl/common/dapl_provider.h \ + dapl/common/dapl_pz_util.h \ + dapl/common/dapl_ring_buffer_util.h \ + dapl/common/dapl_rmr_util.h \ + dapl/common/dapl_sp_util.h \ + dapl/common/dapl_srq_util.h \ + dapl/common/dapl_timer_util.h \ + dapl/udapl/linux/dapl_osd.h \ + dapl/include/dapl.h \ + dapl/include/dapl_debug.h \ + dapl/include/dapl_ipoib_names.h \ + dapl/include/dapl_vendor.h \ + dapl/openib_common/dapl_ib_dto.h \ + dapl/openib_common/dapl_ib_common.h \ + dapl/openib_cma/dapl_ib_util.h \ + dapl/openib_cma/linux/openib_osd.h \ + dapl/openib_scm/dapl_ib_util.h \ + dapl/openib_scm/linux/openib_osd.h \ + dapl/openib_ucm/dapl_ib_util.h \ + dapl/openib_ucm/linux/openib_osd.h \ + dat/udat/libdat2.map \ + dapl/udapl/libdaplofa.map \ + dapl/udapl/libdaploscm.map \ + dapl/udapl/libdaploucm.map \ + LICENSE.txt \ + LICENSE2.txt \ + LICENSE3.txt \ + dapl.spec.in \ + $(man_MANS) \ + test/dapltest/include/dapl_bpool.h \ + test/dapltest/include/dapl_client_info.h \ + test/dapltest/include/dapl_common.h \ + test/dapltest/include/dapl_execute.h \ + test/dapltest/include/dapl_fft_cmd.h \ + test/dapltest/include/dapl_fft_util.h \ + test/dapltest/include/dapl_getopt.h \ + test/dapltest/include/dapl_global.h \ + test/dapltest/include/dapl_limit_cmd.h \ + test/dapltest/include/dapl_mdep.h \ + test/dapltest/include/dapl_memlist.h \ + test/dapltest/include/dapl_params.h \ + test/dapltest/include/dapl_performance_cmd.h \ + test/dapltest/include/dapl_performance_stats.h \ + test/dapltest/include/dapl_performance_test.h \ + test/dapltest/include/dapl_proto.h \ + test/dapltest/include/dapl_quit_cmd.h \ + test/dapltest/include/dapl_server_cmd.h \ + test/dapltest/include/dapl_server_info.h \ + test/dapltest/include/dapl_tdep.h \ + test/dapltest/include/dapl_tdep_print.h \ + test/dapltest/include/dapl_test_data.h \ + test/dapltest/include/dapl_transaction_cmd.h \ + test/dapltest/include/dapl_transaction_stats.h \ + test/dapltest/include/dapl_transaction_test.h \ + test/dapltest/include/dapl_version.h \ + test/dapltest/mdep/linux/dapl_mdep_user.h + +dist-hook: dapl.spec + cp dapl.spec $(distdir) + +install-exec-hook: + if ! test -d $(DESTDIR)$(sysconfdir); then \ + mkdir -p $(DESTDIR)$(sysconfdir); \ + fi; \ + if test -e $(DESTDIR)$(sysconfdir)/dat.conf; then \ + sed -e '/ofa-v2-.* u2/d' < $(DESTDIR)$(sysconfdir)/dat.conf > /tmp/$$$$ofadapl; \ + cp /tmp/$$$$ofadapl $(DESTDIR)$(sysconfdir)/dat.conf; \ + fi; \ + echo ofa-v2-mlx4_0-1 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"mlx4_0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-mlx4_0-2 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"mlx4_0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-ib0 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 '"ib0 0" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-ib1 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 '"ib1 0" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-mthca0-1 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"mthca0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-mthca0-2 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"mthca0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-ipath0-1 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"ipath0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-ipath0-2 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"ipath0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-ehca0-2 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"ehca0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-iwarp u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 '"eth2 0" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-mlx4_0-1u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 '"mlx4_0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-mlx4_0-2u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 '"mlx4_0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-mthca0-1u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 '"mthca0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-mthca0-2u u2.0 nonthreadsafe default libdaploucm.so.2 dapl.2.0 '"mthca0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-cma-roe-eth2 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 '"eth2 0" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-cma-roe-eth3 u2.0 nonthreadsafe default libdaplofa.so.2 dapl.2.0 '"eth3 0" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-scm-roe-mlx4_0-1 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"mlx4_0 1" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; \ + echo ofa-v2-scm-roe-mlx4_0-2 u2.0 nonthreadsafe default libdaploscm.so.2 dapl.2.0 '"mlx4_0 2" ""' >> $(DESTDIR)$(sysconfdir)/dat.conf; + +uninstall-hook: + if test -e $(DESTDIR)$(sysconfdir)/dat.conf; then \ + sed -e '/ofa-v2-.* u2/d' < $(DESTDIR)$(sysconfdir)/dat.conf > /tmp/$$$$ofadapl; \ + cp /tmp/$$$$ofadapl $(DESTDIR)$(sysconfdir)/dat.conf; \ + fi; + +SUBDIRS = . test/dtest test/dapltest diff --git a/trunk/ulp/dapl2/configure.in b/trunk/ulp/dapl2/configure.in index b67314c1..33783274 100644 --- a/trunk/ulp/dapl2/configure.in +++ b/trunk/ulp/dapl2/configure.in @@ -1,104 +1,118 @@ -dnl Process this file with autoconf to produce a configure script. - -AC_PREREQ(2.57) -AC_INIT(dapl, 2.0.30, linux-rdma@vger.kernel.org) -AC_CONFIG_SRCDIR([dat/udat/udat.c]) -AC_CONFIG_AUX_DIR(config) -AM_CONFIG_HEADER(config.h) -AM_INIT_AUTOMAKE(dapl, 2.0.30) - -AM_PROG_LIBTOOL - -AC_ARG_ENABLE(libcheck, [ --disable-libcheck do not test for presence of ib libraries], -[ if test x$enableval = xno ; then - disable_libcheck=yes - fi -]) - -dnl Checks for programs -AC_PROG_CC - -dnl Checks for libraries -if test "$disable_libcheck" != "yes" -then -AC_CHECK_LIB(ibverbs, ibv_get_device_list, [], - AC_MSG_ERROR([ibv_get_device_list() not found. libdapl requires libibverbs.])) - -AC_CHECK_HEADER(infiniband/verbs.h, [], - AC_MSG_ERROR([ not found. Is libibverbs installed?])) - -AC_CHECK_MEMBER(struct ibv_port_attr.link_layer, - AM_CONDITIONAL(DEFINE_ATTR_LINK_LAYER, test "yes" = "yes"), - AM_CONDITIONAL(DEFINE_ATTR_LINK_LAYER, test "yes" = "no"), - [#include ]) -else - AM_CONDITIONAL(DEFINE_ATTR_LINK_LAYER, test "yes" = "no") -fi - -AC_CACHE_CHECK(whether ld accepts --version-script, ac_cv_version_script, - if test -n "`$LD --help < /dev/null 2>/dev/null | grep version-script`"; then - ac_cv_version_script=yes - else - ac_cv_version_script=no - fi) -AM_CONDITIONAL(HAVE_LD_VERSION_SCRIPT, test "$ac_cv_version_script" = "yes") - -dnl Support debug mode build - if enable-debug provided the DEBUG variable is set -AC_ARG_ENABLE(debug, -[ --enable-debug Turn on debug mode, default=off], -[case "${enableval}" in - yes) debug=true ;; - no) debug=false ;; - *) AC_MSG_ERROR(bad value ${enableval} for --enable-debug) ;; -esac],[debug=false]) -AM_CONDITIONAL(DEBUG, test x$debug = xtrue) - -dnl Support ib_extension build - if enable-ext-type == ib -AC_ARG_ENABLE(ext-type, -[ --enable-ext-type Enable extensions support for library: ib, none, default=ib], - [ if test "x$enableval" = "xib" ; then - ext_type=ib - elif test "x$enableval" = "xnone" ; then - ext_type=none - else - echo - echo "Error!" - echo "Unknown extension type' type" - exit -1 - fi - ],[ext_type=ib]) -AM_CONDITIONAL(EXT_TYPE_IB, test "$ext_type" = "ib") - -dnl Check for Redhat EL release 4 -AC_CACHE_CHECK(Check for RHEL4 system, ac_cv_rhel4, - if test -f /etc/redhat-release && - test -n "`grep -e "release 4" /etc/redhat-release`"; then - ac_cv_rhel4=yes - else - ac_cv_rhel4=no - fi) -AM_CONDITIONAL(OS_RHEL4, test "$ac_cv_rhel4" = "yes") - -dnl Check for Redhat EL release 5 -AC_CACHE_CHECK(Check for RHEL5 system, ac_cv_rhel5, - if test -f /etc/redhat-release && - test -n "`grep -e "release 5" /etc/redhat-release`"; then - ac_cv_rhel5=yes - else - ac_cv_rhel5=no - fi) -AM_CONDITIONAL(OS_RHEL5, test "$ac_cv_rhel5" = "yes") - -dnl Check for SuSE release 11 -AC_CACHE_CHECK(Check for SUSE_11 system, ac_cv_suse11, - if test -f /etc/SuSE-release && - test -n "`grep -e "VERSION = 11" /etc/SuSE-release`"; then - ac_cv_suse11=yes - else - ac_cv_suse11=no - fi) -AM_CONDITIONAL(OS_SUSE11, test "$ac_cv_suse11" = "yes") - -AC_CONFIG_FILES([Makefile test/dtest/Makefile test/dapltest/Makefile dapl.spec]) - -AC_OUTPUT +dnl Process this file with autoconf to produce a configure script. + +AC_PREREQ(2.57) +AC_INIT(dapl, 2.0.30, linux-rdma@vger.kernel.org) +AC_CONFIG_SRCDIR([dat/udat/udat.c]) +AC_CONFIG_AUX_DIR(config) +AM_CONFIG_HEADER(config.h) +AM_INIT_AUTOMAKE(dapl, 2.0.30) + +AM_PROG_LIBTOOL + +AC_ARG_ENABLE(libcheck, [ --disable-libcheck do not test for presence of ib libraries], +[ if test x$enableval = xno ; then + disable_libcheck=yes + fi +]) + +dnl Checks for programs +AC_PROG_CC + +dnl Checks for libraries +if test "$disable_libcheck" != "yes" +then +AC_CHECK_LIB(ibverbs, ibv_get_device_list, [], + AC_MSG_ERROR([ibv_get_device_list() not found. libdapl requires libibverbs.])) + +AC_CHECK_HEADER(infiniband/verbs.h, [], + AC_MSG_ERROR([ not found. Is libibverbs installed?])) + +AC_CHECK_MEMBER(struct ibv_port_attr.link_layer, + AM_CONDITIONAL(DEFINE_ATTR_LINK_LAYER, test "yes" = "yes"), + AM_CONDITIONAL(DEFINE_ATTR_LINK_LAYER, test "yes" = "no"), + [#include ]) + +if test "$with_ib_acm" != "" && test "$with_ib_acm" != "no"; then +AC_CHECK_MEMBER(struct ibv_path_record.service_id, [], + AC_MSG_ERROR([IB ACM support requires libibverbs 1.1.4 or greater.]), + [#include ]) +AC_CHECK_HEADER(infiniband/acm.h, [], + AC_MSG_ERROR([IB ACM requested but not found.])) +fi + +else + AM_CONDITIONAL(DEFINE_ATTR_LINK_LAYER, test "yes" = "no") +fi +dnl End check for libraries + +if test "$with_ib_acm" != "" && test "$with_ib_acm" != "no"; then + AC_DEFINE(DAPL_USE_IBACM, 1, [set to 1 to use IB ACM services]) +fi + +AC_CACHE_CHECK(whether ld accepts --version-script, ac_cv_version_script, + if test -n "`$LD --help < /dev/null 2>/dev/null | grep version-script`"; then + ac_cv_version_script=yes + else + ac_cv_version_script=no + fi) +AM_CONDITIONAL(HAVE_LD_VERSION_SCRIPT, test "$ac_cv_version_script" = "yes") + +dnl Support debug mode build - if enable-debug provided the DEBUG variable is set +AC_ARG_ENABLE(debug, +[ --enable-debug Turn on debug mode, default=off], +[case "${enableval}" in + yes) debug=true ;; + no) debug=false ;; + *) AC_MSG_ERROR(bad value ${enableval} for --enable-debug) ;; +esac],[debug=false]) +AM_CONDITIONAL(DEBUG, test x$debug = xtrue) + +dnl Support ib_extension build - if enable-ext-type == ib +AC_ARG_ENABLE(ext-type, +[ --enable-ext-type Enable extensions support for library: ib, none, default=ib], + [ if test "x$enableval" = "xib" ; then + ext_type=ib + elif test "x$enableval" = "xnone" ; then + ext_type=none + else + echo + echo "Error!" + echo "Unknown extension type' type" + exit -1 + fi + ],[ext_type=ib]) +AM_CONDITIONAL(EXT_TYPE_IB, test "$ext_type" = "ib") + +dnl Check for Redhat EL release 4 +AC_CACHE_CHECK(Check for RHEL4 system, ac_cv_rhel4, + if test -f /etc/redhat-release && + test -n "`grep -e "release 4" /etc/redhat-release`"; then + ac_cv_rhel4=yes + else + ac_cv_rhel4=no + fi) +AM_CONDITIONAL(OS_RHEL4, test "$ac_cv_rhel4" = "yes") + +dnl Check for Redhat EL release 5 +AC_CACHE_CHECK(Check for RHEL5 system, ac_cv_rhel5, + if test -f /etc/redhat-release && + test -n "`grep -e "release 5" /etc/redhat-release`"; then + ac_cv_rhel5=yes + else + ac_cv_rhel5=no + fi) +AM_CONDITIONAL(OS_RHEL5, test "$ac_cv_rhel5" = "yes") + +dnl Check for SuSE release 11 +AC_CACHE_CHECK(Check for SUSE_11 system, ac_cv_suse11, + if test -f /etc/SuSE-release && + test -n "`grep -e "VERSION = 11" /etc/SuSE-release`"; then + ac_cv_suse11=yes + else + ac_cv_suse11=no + fi) +AM_CONDITIONAL(OS_SUSE11, test "$ac_cv_suse11" = "yes") + +AC_CONFIG_FILES([Makefile test/dtest/Makefile test/dapltest/Makefile dapl.spec]) + +AC_OUTPUT diff --git a/trunk/ulp/dapl2/dapl/common/dapl_debug.c b/trunk/ulp/dapl2/dapl/common/dapl_debug.c index 904d0752..f311a7a3 100644 --- a/trunk/ulp/dapl2/dapl/common/dapl_debug.c +++ b/trunk/ulp/dapl2/dapl/common/dapl_debug.c @@ -1,243 +1,252 @@ -/* - * Copyright (c) 2002-2003, Network Appliance, Inc. All rights reserved. - * - * This Software is licensed under one of the following licenses: - * - * 1) under the terms of the "Common Public License 1.0" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/cpl.php. - * - * 2) under the terms of the "The BSD License" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/bsd-license.php. - * - * 3) under the terms of the "GNU General Public License (GPL) Version 2" a - * copy of which is available from the Open Source Initiative, see - * http://www.opensource.org/licenses/gpl-license.php. - * - * Licensee has the right to choose one of the above licenses. - * - * Redistributions of source code must retain the above copyright - * notice and one of the license notices. - * - * Redistributions in binary form must reproduce both the above copyright - * notice, one of the license notices in the documentation - * and/or other materials provided with the distribution. - */ - -#include "dapl.h" -#if !defined(__KDAPL__) -#include -#include -#endif /* __KDAPL__ */ - -DAPL_DBG_TYPE g_dapl_dbg_type; /* initialized in dapl_init.c */ -DAPL_DBG_DEST g_dapl_dbg_dest; /* initialized in dapl_init.c */ - -static char *_ptr_host_ = NULL; -static char _hostname_[128]; - -void dapl_internal_dbg_log(DAPL_DBG_TYPE type, const char *fmt, ...) -{ - va_list args; - - if (_ptr_host_ == NULL) { - gethostname(_hostname_, sizeof(_hostname_)); - _ptr_host_ = _hostname_; - } - - if (type & g_dapl_dbg_type) { - if (DAPL_DBG_DEST_STDOUT & g_dapl_dbg_dest) { - va_start(args, fmt); - fprintf(stdout, "%s:%x: ", _ptr_host_, - dapl_os_getpid()); - dapl_os_vprintf(fmt, args); - va_end(args); - } - - if (DAPL_DBG_DEST_SYSLOG & g_dapl_dbg_dest) { - va_start(args, fmt); - dapl_os_syslog(fmt, args); - va_end(args); - } - } -} - -#ifdef DAPL_COUNTERS - -/* - * The order of this list must match the DAT counter definitions - */ -static char *ia_cntr_names[] = { - "DCNT_IA_PZ_CREATE", - "DCNT_IA_PZ_FREE", - "DCNT_IA_LMR_CREATE", - "DCNT_IA_LMR_FREE", - "DCNT_IA_RMR_CREATE", - "DCNT_IA_RMR_FREE", - "DCNT_IA_PSP_CREATE", - "DCNT_IA_PSP_CREATE_ANY", - "DCNT_IA_PSP_FREE", - "DCNT_IA_RSP_CREATE", - "DCNT_IA_RSP_FREE", - "DCNT_IA_EVD_CREATE", - "DCNT_IA_EVD_FREE", - "DCNT_IA_EP_CREATE", - "DCNT_IA_EP_FREE", - "DCNT_IA_SRQ_CREATE", - "DCNT_IA_SRQ_FREE", - "DCNT_IA_SP_CR", - "DCNT_IA_SP_CR_ACCEPTED", - "DCNT_IA_SP_CR_REJECTED", - "DCNT_IA_MEM_ALLOC", - "DCNT_IA_MEM_ALLOC_DATA", - "DCNT_IA_MEM_FREE", - "DCNT_IA_ASYNC_ERROR", - "DCNT_IA_ASYNC_QP_ERROR", - "DCNT_IA_ASYNC_CQ_ERROR" -}; - -static char *ep_cntr_names[] = { - "DCNT_EP_CONNECT", - "DCNT_EP_DISCONNECT", - "DCNT_EP_POST_SEND", - "DCNT_EP_POST_SEND_DATA", - "DCNT_EP_POST_SEND_UD", - "DCNT_EP_POST_SEND_UD_DATA", - "DCNT_EP_POST_RECV", - "DCNT_EP_POST_RECV_DATA", - "DCNT_EP_POST_WRITE", - "DCNT_EP_POST_WRITE_DATA", - "DCNT_EP_POST_WRITE_IMM", - "DCNT_EP_POST_WRITE_IMM_DATA", - "DCNT_EP_POST_READ", - "DCNT_EP_POST_READ_DATA", - "DCNT_EP_POST_CMP_SWAP", - "DCNT_EP_POST_FETCH_ADD", - "DCNT_EP_RECV", - "DCNT_EP_RECV_DATA", - "DCNT_EP_RECV_UD", - "DCNT_EP_RECV_UD_DATA", - "DCNT_EP_RECV_IMM", - "DCNT_EP_RECV_IMM_DATA", - "DCNT_EP_RECV_RDMA_IMM", - "DCNT_EP_RECV_RDMA_IMM_DATA", -}; - -static char *evd_cntr_names[] = { - "DCNT_EVD_WAIT", - "DCNT_EVD_WAIT_BLOCKED", - "DCNT_EVD_WAIT_NOTIFY", - "DCNT_EVD_DEQUEUE", - "DCNT_EVD_DEQUEUE_FOUND", - "DCNT_EVD_DEQUEUE_NOT_FOUND", - "DCNT_EVD_DEQUEUE_POLL", - "DCNT_EVD_DEQUEUE_POLL_FOUND", - "DCNT_EVD_CONN_CALLBACK", - "DCNT_EVD_DTO_CALLBACK", -}; - -DAT_RETURN dapl_query_counter(DAT_HANDLE dh, - int counter, void *p_cntrs_out, int reset) -{ - int i, max; - DAT_UINT64 *p_cntrs; - DAT_HANDLE_TYPE type = 0; - - dat_get_handle_type(dh, &type); - - switch (type) { - case DAT_HANDLE_TYPE_IA: - max = DCNT_IA_ALL_COUNTERS; - p_cntrs = ((DAPL_IA *) dh)->cntrs; - break; - case DAT_HANDLE_TYPE_EP: - max = DCNT_EP_ALL_COUNTERS; - p_cntrs = ((DAPL_EP *) dh)->cntrs; - break; - case DAT_HANDLE_TYPE_EVD: - max = DCNT_EVD_ALL_COUNTERS; - p_cntrs = ((DAPL_EVD *) dh)->cntrs; - break; - default: - return DAT_INVALID_HANDLE; - } - - for (i = 0; i < max; i++) { - if ((counter == i) || (counter == max)) { - ((DAT_UINT64 *) p_cntrs_out)[i] = p_cntrs[i]; - if (reset) - p_cntrs[i] = 0; - } - } - return DAT_SUCCESS; -} - -char *dapl_query_counter_name(DAT_HANDLE dh, int counter) -{ - DAT_HANDLE_TYPE type = 0; - - dat_get_handle_type(dh, &type); - - switch (type) { - case DAT_HANDLE_TYPE_IA: - if (counter < DCNT_IA_ALL_COUNTERS) - return ia_cntr_names[counter]; - break; - case DAT_HANDLE_TYPE_EP: - if (counter < DCNT_EP_ALL_COUNTERS) - return ep_cntr_names[counter]; - break; - case DAT_HANDLE_TYPE_EVD: - if (counter < DCNT_EVD_ALL_COUNTERS) - return evd_cntr_names[counter]; - break; - default: - return NULL; - } - return NULL; -} - -void dapl_print_counter(DAT_HANDLE dh, int counter, int reset) -{ - int i, max; - DAT_UINT64 *p_cntrs; - DAT_HANDLE_TYPE type = 0; - - dat_get_handle_type(dh, &type); - - switch (type) { - case DAT_HANDLE_TYPE_IA: - max = DCNT_IA_ALL_COUNTERS; - p_cntrs = ((DAPL_IA *) dh)->cntrs; - break; - case DAT_HANDLE_TYPE_EP: - max = DCNT_EP_ALL_COUNTERS; - p_cntrs = ((DAPL_EP *) dh)->cntrs; - break; - case DAT_HANDLE_TYPE_EVD: - max = DCNT_EVD_ALL_COUNTERS; - p_cntrs = ((DAPL_EVD *) dh)->cntrs; - break; - default: - return; - } - - for (i = 0; i < max; i++) { - if ((counter == i) || (counter == max)) { - printf(" %s = " F64u " \n", - dapl_query_counter_name(dh, i), p_cntrs[i]); - if (reset) - p_cntrs[i] = 0; - } - } - - /* Print in process CR's for this IA, if debug type set */ - if ((type == DAT_HANDLE_TYPE_IA) && - (g_dapl_dbg_type & DAPL_DBG_TYPE_CM_LIST)) { - dapls_print_cm_list((DAPL_IA*)dh); - } - return; -} - -#endif /* DAPL_COUNTERS */ +/* + * Copyright (c) 2002-2003, Network Appliance, Inc. All rights reserved. + * + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + */ + +#include "dapl.h" +#if !defined(__KDAPL__) +#include +#include +#endif /* __KDAPL__ */ + +DAPL_DBG_TYPE g_dapl_dbg_type; /* initialized in dapl_init.c */ +DAPL_DBG_DEST g_dapl_dbg_dest; /* initialized in dapl_init.c */ + +static char *_ptr_host_ = NULL; +static char _hostname_[128]; +static DAPL_OS_TIMEVAL start_t, current_t, last_t; /* microsecond timeStamp STDOUT */ +static int delta_t, total_t; + +void dapl_internal_dbg_log(DAPL_DBG_TYPE type, const char *fmt, ...) +{ + va_list args; + + if (_ptr_host_ == NULL) { + gethostname(_hostname_, sizeof(_hostname_)); + _ptr_host_ = _hostname_; + dapl_os_get_time(&start_t); + last_t = start_t; + } + dapl_os_get_time(¤t_t); + delta_t = current_t - last_t; + total_t = current_t - start_t; + last_t = current_t; + + if (type & g_dapl_dbg_type) { + if (DAPL_DBG_DEST_STDOUT & g_dapl_dbg_dest) { + va_start(args, fmt); + fprintf(stdout, "%s:%x:%x: %d us(%d us%s): ", + _ptr_host_, dapl_os_getpid(), dapl_os_gettid(), + total_t, delta_t, delta_t > 500000 ? "!!!":""); + dapl_os_vprintf(fmt, args); + va_end(args); + } + + if (DAPL_DBG_DEST_SYSLOG & g_dapl_dbg_dest) { + va_start(args, fmt); + dapl_os_syslog(fmt, args); + va_end(args); + } + } +} + +#ifdef DAPL_COUNTERS + +/* + * The order of this list must match the DAT counter definitions + */ +static char *ia_cntr_names[] = { + "DCNT_IA_PZ_CREATE", + "DCNT_IA_PZ_FREE", + "DCNT_IA_LMR_CREATE", + "DCNT_IA_LMR_FREE", + "DCNT_IA_RMR_CREATE", + "DCNT_IA_RMR_FREE", + "DCNT_IA_PSP_CREATE", + "DCNT_IA_PSP_CREATE_ANY", + "DCNT_IA_PSP_FREE", + "DCNT_IA_RSP_CREATE", + "DCNT_IA_RSP_FREE", + "DCNT_IA_EVD_CREATE", + "DCNT_IA_EVD_FREE", + "DCNT_IA_EP_CREATE", + "DCNT_IA_EP_FREE", + "DCNT_IA_SRQ_CREATE", + "DCNT_IA_SRQ_FREE", + "DCNT_IA_SP_CR", + "DCNT_IA_SP_CR_ACCEPTED", + "DCNT_IA_SP_CR_REJECTED", + "DCNT_IA_MEM_ALLOC", + "DCNT_IA_MEM_ALLOC_DATA", + "DCNT_IA_MEM_FREE", + "DCNT_IA_ASYNC_ERROR", + "DCNT_IA_ASYNC_QP_ERROR", + "DCNT_IA_ASYNC_CQ_ERROR" +}; + +static char *ep_cntr_names[] = { + "DCNT_EP_CONNECT", + "DCNT_EP_DISCONNECT", + "DCNT_EP_POST_SEND", + "DCNT_EP_POST_SEND_DATA", + "DCNT_EP_POST_SEND_UD", + "DCNT_EP_POST_SEND_UD_DATA", + "DCNT_EP_POST_RECV", + "DCNT_EP_POST_RECV_DATA", + "DCNT_EP_POST_WRITE", + "DCNT_EP_POST_WRITE_DATA", + "DCNT_EP_POST_WRITE_IMM", + "DCNT_EP_POST_WRITE_IMM_DATA", + "DCNT_EP_POST_READ", + "DCNT_EP_POST_READ_DATA", + "DCNT_EP_POST_CMP_SWAP", + "DCNT_EP_POST_FETCH_ADD", + "DCNT_EP_RECV", + "DCNT_EP_RECV_DATA", + "DCNT_EP_RECV_UD", + "DCNT_EP_RECV_UD_DATA", + "DCNT_EP_RECV_IMM", + "DCNT_EP_RECV_IMM_DATA", + "DCNT_EP_RECV_RDMA_IMM", + "DCNT_EP_RECV_RDMA_IMM_DATA", +}; + +static char *evd_cntr_names[] = { + "DCNT_EVD_WAIT", + "DCNT_EVD_WAIT_BLOCKED", + "DCNT_EVD_WAIT_NOTIFY", + "DCNT_EVD_DEQUEUE", + "DCNT_EVD_DEQUEUE_FOUND", + "DCNT_EVD_DEQUEUE_NOT_FOUND", + "DCNT_EVD_DEQUEUE_POLL", + "DCNT_EVD_DEQUEUE_POLL_FOUND", + "DCNT_EVD_CONN_CALLBACK", + "DCNT_EVD_DTO_CALLBACK", +}; + +DAT_RETURN dapl_query_counter(DAT_HANDLE dh, + int counter, void *p_cntrs_out, int reset) +{ + int i, max; + DAT_UINT64 *p_cntrs; + DAT_HANDLE_TYPE type = 0; + + dat_get_handle_type(dh, &type); + + switch (type) { + case DAT_HANDLE_TYPE_IA: + max = DCNT_IA_ALL_COUNTERS; + p_cntrs = ((DAPL_IA *) dh)->cntrs; + break; + case DAT_HANDLE_TYPE_EP: + max = DCNT_EP_ALL_COUNTERS; + p_cntrs = ((DAPL_EP *) dh)->cntrs; + break; + case DAT_HANDLE_TYPE_EVD: + max = DCNT_EVD_ALL_COUNTERS; + p_cntrs = ((DAPL_EVD *) dh)->cntrs; + break; + default: + return DAT_INVALID_HANDLE; + } + + for (i = 0; i < max; i++) { + if ((counter == i) || (counter == max)) { + ((DAT_UINT64 *) p_cntrs_out)[i] = p_cntrs[i]; + if (reset) + p_cntrs[i] = 0; + } + } + return DAT_SUCCESS; +} + +char *dapl_query_counter_name(DAT_HANDLE dh, int counter) +{ + DAT_HANDLE_TYPE type = 0; + + dat_get_handle_type(dh, &type); + + switch (type) { + case DAT_HANDLE_TYPE_IA: + if (counter < DCNT_IA_ALL_COUNTERS) + return ia_cntr_names[counter]; + break; + case DAT_HANDLE_TYPE_EP: + if (counter < DCNT_EP_ALL_COUNTERS) + return ep_cntr_names[counter]; + break; + case DAT_HANDLE_TYPE_EVD: + if (counter < DCNT_EVD_ALL_COUNTERS) + return evd_cntr_names[counter]; + break; + default: + return NULL; + } + return NULL; +} + +void dapl_print_counter(DAT_HANDLE dh, int counter, int reset) +{ + int i, max; + DAT_UINT64 *p_cntrs; + DAT_HANDLE_TYPE type = 0; + + dat_get_handle_type(dh, &type); + + switch (type) { + case DAT_HANDLE_TYPE_IA: + max = DCNT_IA_ALL_COUNTERS; + p_cntrs = ((DAPL_IA *) dh)->cntrs; + break; + case DAT_HANDLE_TYPE_EP: + max = DCNT_EP_ALL_COUNTERS; + p_cntrs = ((DAPL_EP *) dh)->cntrs; + break; + case DAT_HANDLE_TYPE_EVD: + max = DCNT_EVD_ALL_COUNTERS; + p_cntrs = ((DAPL_EVD *) dh)->cntrs; + break; + default: + return; + } + + for (i = 0; i < max; i++) { + if ((counter == i) || (counter == max)) { + printf(" %s = " F64u " \n", + dapl_query_counter_name(dh, i), p_cntrs[i]); + if (reset) + p_cntrs[i] = 0; + } + } + + /* Print in process CR's for this IA, if debug type set */ + if ((type == DAT_HANDLE_TYPE_IA) && + (g_dapl_dbg_type & DAPL_DBG_TYPE_CM_LIST)) { + dapls_print_cm_list((DAPL_IA*)dh); + } + return; +} + +#endif /* DAPL_COUNTERS */ diff --git a/trunk/ulp/dapl2/dapl/common/dapl_ep_free.c b/trunk/ulp/dapl2/dapl/common/dapl_ep_free.c index 32d50cce..d2f9b504 100644 --- a/trunk/ulp/dapl2/dapl/common/dapl_ep_free.c +++ b/trunk/ulp/dapl2/dapl/common/dapl_ep_free.c @@ -1,221 +1,224 @@ -/* - * Copyright (c) 2002-2003, Network Appliance, Inc. All rights reserved. - * - * This Software is licensed under one of the following licenses: - * - * 1) under the terms of the "Common Public License 1.0" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/cpl.php. - * - * 2) under the terms of the "The BSD License" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/bsd-license.php. - * - * 3) under the terms of the "GNU General Public License (GPL) Version 2" a - * copy of which is available from the Open Source Initiative, see - * http://www.opensource.org/licenses/gpl-license.php. - * - * Licensee has the right to choose one of the above licenses. - * - * Redistributions of source code must retain the above copyright - * notice and one of the license notices. - * - * Redistributions in binary form must reproduce both the above copyright - * notice, one of the license notices in the documentation - * and/or other materials provided with the distribution. - */ - -/********************************************************************** - * - * MODULE: dapl_ep_free.c - * - * PURPOSE: Endpoint management - * Description: Interfaces in this file are completely described in - * the DAPL 1.1 API, Chapter 6, section 5.4 - * - * $Id:$ - **********************************************************************/ - -#include "dapl.h" -#include "dapl_ia_util.h" -#include "dapl_ep_util.h" -#include "dapl_adapter_util.h" -#include "dapl_ring_buffer_util.h" -#include "dapl_timer_util.h" - -/* - * dapl_ep_free - * - * DAPL Requirements Version xxx, 6.5.3 - * - * Destroy an instance of the Endpoint - * - * Input: - * ep_handle - * - * Output: - * none - * - * Returns: - * DAT_SUCCESS - * DAT_INVALID_PARAMETER - * DAT_INVALID_STATE - */ -DAT_RETURN DAT_API dapl_ep_free(IN DAT_EP_HANDLE ep_handle) -{ - DAPL_EP *ep_ptr; - DAPL_IA *ia_ptr; - DAT_EP_PARAM *param; - dp_ib_cm_handle_t cm_ptr, next_cm_ptr; - ib_qp_state_t save_qp_state; - DAT_RETURN dat_status = DAT_SUCCESS; - - dapl_dbg_log(DAPL_DBG_TYPE_API | DAPL_DBG_TYPE_CM, - "dapl_ep_free (%p)\n", ep_handle); - - ep_ptr = (DAPL_EP *) ep_handle; - param = &ep_ptr->param; - - /* - * Verify parameter & state - */ - if (DAPL_BAD_HANDLE(ep_ptr, DAPL_MAGIC_EP)) { - dat_status = - DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP); - goto bail; - } - DAPL_CNTR(ep_ptr->header.owner_ia, DCNT_IA_EP_FREE); - - if (ep_ptr->param.ep_state == DAT_EP_STATE_RESERVED || - ep_ptr->param.ep_state == DAT_EP_STATE_PASSIVE_CONNECTION_PENDING || - ep_ptr->param.ep_state == DAT_EP_STATE_TENTATIVE_CONNECTION_PENDING) - { - dapl_dbg_log(DAPL_DBG_TYPE_WARN, - "--> dapl_ep_free: invalid state: %x, ep %p\n", - ep_ptr->param.ep_state, ep_ptr); - dat_status = DAT_ERROR(DAT_INVALID_STATE, - dapls_ep_state_subtype(ep_ptr)); - goto bail; - } - - ia_ptr = ep_ptr->header.owner_ia; - - /* If we are connected, issue a disconnect. If we are in the - * disconnect_pending state, disconnect with the ABRUPT flag - * set. - */ - - /* - * Invoke ep_disconnect to clean up outstanding connections - */ - (void)dapl_ep_disconnect(ep_ptr, DAT_CLOSE_ABRUPT_FLAG); - - /* Free all CM objects */ - cm_ptr = (dapl_llist_is_empty(&ep_ptr->cm_list_head) - ? NULL : dapl_llist_peek_head(&ep_ptr->cm_list_head)); - while (cm_ptr != NULL) { - dapl_log(DAPL_DBG_TYPE_EP, - "dapl_ep_free: Free CM: EP=%p CM=%p\n", - ep_ptr, cm_ptr); - - next_cm_ptr = dapl_llist_next_entry(&ep_ptr->cm_list_head, - &cm_ptr->list_entry); - dapls_cm_free(cm_ptr); /* blocking call */ - cm_ptr = next_cm_ptr; - } - - /* - * Do verification of parameters and the state change atomically. - */ - dapl_os_lock(&ep_ptr->header.lock); - -#ifdef DAPL_DBG - /* check if event pending and warn, don't assert, state is valid */ - if (ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECT_PENDING) { - dapl_dbg_log(DAPL_DBG_TYPE_WARN, " dat_ep_free WARNING: " - "EVENT PENDING on ep %p, disconnect " - "and wait before calling dat_ep_free\n", ep_ptr); - } -#endif - - if (ep_ptr->cxn_timer != NULL) { - dapls_timer_cancel(ep_ptr->cxn_timer); - dapl_os_free(ep_ptr->cxn_timer, sizeof(DAPL_OS_TIMER)); - ep_ptr->cxn_timer = NULL; - } - - /* Remove the EP from the IA */ - dapl_ia_unlink_ep(ia_ptr, ep_ptr); - - /* - * Update ref counts. Note the user may have used ep_modify - * to set handles to NULL. Set handles to NULL so this routine - * is idempotent. - */ - if (param->pz_handle != NULL) { - dapl_os_atomic_dec(&((DAPL_PZ *) param->pz_handle)-> - pz_ref_count); - param->pz_handle = NULL; - } - if (param->recv_evd_handle != NULL) { - dapl_os_atomic_dec(&((DAPL_EVD *) param->recv_evd_handle)-> - evd_ref_count); - param->recv_evd_handle = NULL; - } - if (param->request_evd_handle != NULL) { - dapl_os_atomic_dec(&((DAPL_EVD *) param->request_evd_handle)-> - evd_ref_count); - param->request_evd_handle = NULL; - } - if (param->connect_evd_handle != NULL) { - dapl_os_atomic_dec(&((DAPL_EVD *) param->connect_evd_handle)-> - evd_ref_count); - param->connect_evd_handle = NULL; - } - - /* - * Finish tearing everything down. - */ - dapl_dbg_log(DAPL_DBG_TYPE_EP | DAPL_DBG_TYPE_CM, - "dapl_ep_free: Free EP: %x, ep %p qp_state %x qp_handle %x\n", - ep_ptr->param.ep_state, - ep_ptr, ep_ptr->qp_state, ep_ptr->qp_handle); - /* - * Take care of the transport resource. Make a copy of the qp_state - * to prevent race conditions when we exit the lock. - */ - save_qp_state = ep_ptr->qp_state; - ep_ptr->qp_state = DAPL_QP_STATE_UNATTACHED; - dapl_os_unlock(&ep_ptr->header.lock); - - /* Free the QP. If the EP has never been used, the QP is invalid */ - if (save_qp_state != DAPL_QP_STATE_UNATTACHED) { - dat_status = dapls_ib_qp_free(ia_ptr, ep_ptr); - /* This should always succeed, but report to the user if - * there is a problem. The qp_state must be restored so - * they can try it again in the face of EINTR or similar - * where the QP is OK but the call couldn't complete. - */ - if (dat_status != DAT_SUCCESS) { - ep_ptr->qp_state = save_qp_state; - goto bail; - } - } - - dapls_ep_flush_cqs(ep_ptr); - - /* Free the resource */ - dapl_ep_dealloc(ep_ptr); - - bail: - return dat_status; - -} - -/* - * Local variables: - * c-indent-level: 4 - * c-basic-offset: 4 - * tab-width: 8 - * End: - */ +/* + * Copyright (c) 2002-2003, Network Appliance, Inc. All rights reserved. + * + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + */ + +/********************************************************************** + * + * MODULE: dapl_ep_free.c + * + * PURPOSE: Endpoint management + * Description: Interfaces in this file are completely described in + * the DAPL 1.1 API, Chapter 6, section 5.4 + * + * $Id:$ + **********************************************************************/ + +#include "dapl.h" +#include "dapl_ia_util.h" +#include "dapl_ep_util.h" +#include "dapl_adapter_util.h" +#include "dapl_ring_buffer_util.h" +#include "dapl_timer_util.h" + +/* + * dapl_ep_free + * + * DAPL Requirements Version xxx, 6.5.3 + * + * Destroy an instance of the Endpoint + * + * Input: + * ep_handle + * + * Output: + * none + * + * Returns: + * DAT_SUCCESS + * DAT_INVALID_PARAMETER + * DAT_INVALID_STATE + */ +DAT_RETURN DAT_API dapl_ep_free(IN DAT_EP_HANDLE ep_handle) +{ + DAPL_EP *ep_ptr; + DAPL_IA *ia_ptr; + DAT_EP_PARAM *param; + dp_ib_cm_handle_t cm_ptr, next_cm_ptr; + ib_qp_state_t save_qp_state; + DAT_RETURN dat_status = DAT_SUCCESS; + + dapl_dbg_log(DAPL_DBG_TYPE_API | DAPL_DBG_TYPE_CM, + "dapl_ep_free (%p)\n", ep_handle); + + ep_ptr = (DAPL_EP *) ep_handle; + param = &ep_ptr->param; + + /* + * Verify parameter & state + */ + if (DAPL_BAD_HANDLE(ep_ptr, DAPL_MAGIC_EP)) { + dat_status = + DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP); + goto bail; + } + DAPL_CNTR(ep_ptr->header.owner_ia, DCNT_IA_EP_FREE); + + if (ep_ptr->param.ep_state == DAT_EP_STATE_RESERVED || + ep_ptr->param.ep_state == DAT_EP_STATE_PASSIVE_CONNECTION_PENDING || + ep_ptr->param.ep_state == DAT_EP_STATE_TENTATIVE_CONNECTION_PENDING) + { + dapl_dbg_log(DAPL_DBG_TYPE_WARN, + "--> dapl_ep_free: invalid state: %x, ep %p\n", + ep_ptr->param.ep_state, ep_ptr); + dat_status = DAT_ERROR(DAT_INVALID_STATE, + dapls_ep_state_subtype(ep_ptr)); + goto bail; + } + + ia_ptr = ep_ptr->header.owner_ia; + + /* If we are connected, issue a disconnect. If we are in the + * disconnect_pending state, disconnect with the ABRUPT flag + * set. + */ + + /* + * Invoke ep_disconnect to clean up outstanding connections + */ + (void)dapl_ep_disconnect(ep_ptr, DAT_CLOSE_ABRUPT_FLAG); + + /* Free all CM objects */ + cm_ptr = (dapl_llist_is_empty(&ep_ptr->cm_list_head) + ? NULL : dapl_llist_peek_head(&ep_ptr->cm_list_head)); + while (cm_ptr != NULL) { + dapl_log(DAPL_DBG_TYPE_EP, + "dapl_ep_free: Free CM: EP=%p CM=%p\n", + ep_ptr, cm_ptr); + + next_cm_ptr = dapl_llist_next_entry(&ep_ptr->cm_list_head, + &cm_ptr->list_entry); + dapls_cm_free(cm_ptr); /* blocking call */ + cm_ptr = next_cm_ptr; + } + + /* + * Do verification of parameters and the state change atomically. + */ + dapl_os_lock(&ep_ptr->header.lock); + +#ifdef DAPL_DBG + /* check if event pending and warn, don't assert, state is valid */ + if (ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECT_PENDING) { + dapl_dbg_log(DAPL_DBG_TYPE_WARN, " dat_ep_free WARNING: " + "EVENT PENDING on ep %p, disconnect " + "and wait before calling dat_ep_free\n", ep_ptr); + } +#endif + + if (ep_ptr->cxn_timer != NULL) { + dapls_timer_cancel(ep_ptr->cxn_timer); + dapl_os_free(ep_ptr->cxn_timer, sizeof(DAPL_OS_TIMER)); + ep_ptr->cxn_timer = NULL; + } + + /* Remove the EP from the IA */ + dapl_ia_unlink_ep(ia_ptr, ep_ptr); + + /* + * Update ref counts. Note the user may have used ep_modify + * to set handles to NULL. Set handles to NULL so this routine + * is idempotent. + */ + if (param->pz_handle != NULL) { + dapl_os_atomic_dec(&((DAPL_PZ *) param->pz_handle)-> + pz_ref_count); + param->pz_handle = NULL; + } + if (param->connect_evd_handle != NULL) { + dapl_os_atomic_dec(&((DAPL_EVD *) param->connect_evd_handle)-> + evd_ref_count); + param->connect_evd_handle = NULL; + } + + /* + * Finish tearing everything down. + */ + dapl_dbg_log(DAPL_DBG_TYPE_EP | DAPL_DBG_TYPE_CM, + "dapl_ep_free: Free EP: %x, ep %p qp_state %x qp_handle %x\n", + ep_ptr->param.ep_state, + ep_ptr, ep_ptr->qp_state, ep_ptr->qp_handle); + /* + * Take care of the transport resource. Make a copy of the qp_state + * to prevent race conditions when we exit the lock. + */ + save_qp_state = ep_ptr->qp_state; + ep_ptr->qp_state = DAPL_QP_STATE_UNATTACHED; + dapl_os_unlock(&ep_ptr->header.lock); + + /* Free the QP. If the EP has never been used, the QP is invalid */ + if (save_qp_state != DAPL_QP_STATE_UNATTACHED) { + dat_status = dapls_ib_qp_free(ia_ptr, ep_ptr); + /* This should always succeed, but report to the user if + * there is a problem. The qp_state must be restored so + * they can try it again in the face of EINTR or similar + * where the QP is OK but the call couldn't complete. + */ + if (dat_status != DAT_SUCCESS) { + ep_ptr->qp_state = save_qp_state; + goto bail; + } + } + + /* + * Release the EVD handles after we destroy the QP, so we can flush all + * QP entries. + */ + if (param->recv_evd_handle != NULL) { + dapl_os_atomic_dec(&((DAPL_EVD *) param->recv_evd_handle)-> + evd_ref_count); + param->recv_evd_handle = NULL; + } + if (param->request_evd_handle != NULL) { + dapl_os_atomic_dec(&((DAPL_EVD *) param->request_evd_handle)-> + evd_ref_count); + param->request_evd_handle = NULL; + } + + /* Free the resource */ + dapl_ep_dealloc(ep_ptr); + + bail: + return dat_status; + +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * tab-width: 8 + * End: + */ diff --git a/trunk/ulp/dapl2/dapl/common/dapl_ep_util.c b/trunk/ulp/dapl2/dapl/common/dapl_ep_util.c index fc911a6a..eb7cab24 100644 --- a/trunk/ulp/dapl2/dapl/common/dapl_ep_util.c +++ b/trunk/ulp/dapl2/dapl/common/dapl_ep_util.c @@ -1,636 +1,634 @@ -/* - * Copyright (c) 2002-2003, Network Appliance, Inc. All rights reserved. - * - * This Software is licensed under one of the following licenses: - * - * 1) under the terms of the "Common Public License 1.0" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/cpl.php. - * - * 2) under the terms of the "The BSD License" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/bsd-license.php. - * - * 3) under the terms of the "GNU General Public License (GPL) Version 2" a - * copy of which is available from the Open Source Initiative, see - * http://www.opensource.org/licenses/gpl-license.php. - * - * Licensee has the right to choose one of the above licenses. - * - * Redistributions of source code must retain the above copyright - * notice and one of the license notices. - * - * Redistributions in binary form must reproduce both the above copyright - * notice, one of the license notices in the documentation - * and/or other materials provided with the distribution. - */ - -/********************************************************************** - * - * MODULE: dapl_ep_util.c - * - * PURPOSE: Manage EP Info structure - * - * $Id:$ - **********************************************************************/ - -#include "dapl_ep_util.h" -#include "dapl_ring_buffer_util.h" -#include "dapl_cookie.h" -#include "dapl_adapter_util.h" -#include "dapl_evd_util.h" -#include "dapl_cr_util.h" /* for callback routine */ - -/* - * Local definitions - */ -/* - * Default number of I/O operations on an end point - */ -#define IB_IO_DEFAULT 16 -/* - * Default number of scatter/gather entries available to a single - * post send/recv - */ -#define IB_IOV_DEFAULT 4 - -/* - * Default number of RDMA operations in progress at a time - */ -#define IB_RDMA_DEFAULT 4 - -extern void dapli_ep_default_attrs(IN DAPL_EP * ep_ptr); - -char *dapl_get_ep_state_str(DAT_EP_STATE state) -{ -#ifdef DAPL_DBG - static char *state_str[DAT_EP_STATE_CONNECTED_MULTI_PATH + 1] = { - "DAT_EP_STATE_UNCONNECTED", /* quiescent state */ - "DAT_EP_STATE_UNCONFIGURED_UNCONNECTED", - "DAT_EP_STATE_RESERVED", - "DAT_EP_STATE_UNCONFIGURED_RESERVED", - "DAT_EP_STATE_PASSIVE_CONNECTION_PENDING", - "DAT_EP_STATE_UNCONFIGURED_PASSIVE", - "DAT_EP_STATE_ACTIVE_CONNECTION_PENDING", - "DAT_EP_STATE_TENTATIVE_CONNECTION_PENDING", - "DAT_EP_STATE_UNCONFIGURED_TENTATIVE", - "DAT_EP_STATE_CONNECTED", - "DAT_EP_STATE_DISCONNECT_PENDING", - "DAT_EP_STATE_DISCONNECTED", - "DAT_EP_STATE_COMPLETION_PENDING", - "DAT_EP_STATE_CONNECTED_SINGLE_PATH", - "DAT_EP_STATE_CONNECTED_MULTI_PATH" - }; - return state_str[state]; -#else - static char buf[12]; - sprintf(buf, "%d", state); - return buf; -#endif -} - -/* - * dapl_ep_alloc - * - * alloc and initialize an EP INFO struct - * - * Input: - * IA INFO struct ptr - * - * Output: - * ep_ptr - * - * Returns: - * none - * - */ -DAPL_EP *dapl_ep_alloc(IN DAPL_IA * ia_ptr, IN const DAT_EP_ATTR * ep_attr) -{ - DAPL_EP *ep_ptr; - - /* Allocate EP */ - ep_ptr = - (DAPL_EP *) dapl_os_alloc(sizeof(DAPL_EP) + sizeof(DAT_SOCK_ADDR)); - if (ep_ptr == NULL) { - goto bail; - } - - /* zero the structure */ - dapl_os_memzero(ep_ptr, sizeof(DAPL_EP) + sizeof(DAT_SOCK_ADDR)); - -#ifdef DAPL_COUNTERS - /* Allocate counters */ - ep_ptr->cntrs = - dapl_os_alloc(sizeof(DAT_UINT64) * DCNT_EP_ALL_COUNTERS); - if (ep_ptr->cntrs == NULL) { - dapl_os_free(ep_ptr, sizeof(DAPL_EP) + sizeof(DAT_SOCK_ADDR)); - return (NULL); - } - dapl_os_memzero(ep_ptr->cntrs, - sizeof(DAT_UINT64) * DCNT_EP_ALL_COUNTERS); -#endif /* DAPL_COUNTERS */ - - /* - * initialize the header - */ - ep_ptr->header.provider = ia_ptr->header.provider; - ep_ptr->header.magic = DAPL_MAGIC_EP; - ep_ptr->header.handle_type = DAT_HANDLE_TYPE_EP; - ep_ptr->header.owner_ia = ia_ptr; - ep_ptr->header.user_context.as_64 = 0; - ep_ptr->header.user_context.as_ptr = NULL; - - dapl_llist_init_entry(&ep_ptr->header.ia_list_entry); - dapl_llist_init_head(&ep_ptr->cm_list_head); - dapl_os_lock_init(&ep_ptr->header.lock); - - /* - * Initialize the body - */ - /* - * Set up default parameters if the user passed in a NULL - */ - if (ep_attr == NULL) { - dapli_ep_default_attrs(ep_ptr); - } else { - ep_ptr->param.ep_attr = *ep_attr; - } - - /* - * IBM OS API specific fields - */ - ep_ptr->qp_handle = IB_INVALID_HANDLE; - ep_ptr->qpn = 0; - ep_ptr->qp_state = DAPL_QP_STATE_UNATTACHED; - - if (DAT_SUCCESS != dapls_cb_create(&ep_ptr->req_buffer, - ep_ptr, - ep_ptr->param.ep_attr. - max_request_dtos)) { - dapl_ep_dealloc(ep_ptr); - ep_ptr = NULL; - goto bail; - } - - if (DAT_SUCCESS != dapls_cb_create(&ep_ptr->recv_buffer, - ep_ptr, - ep_ptr->param.ep_attr.max_recv_dtos)) - { - dapl_ep_dealloc(ep_ptr); - ep_ptr = NULL; - goto bail; - } - - dapls_io_trc_alloc(ep_ptr); - - bail: - return ep_ptr; -} - -/* - * dapl_ep_dealloc - * - * Free the passed in EP structure. - * - * Input: - * entry point pointer - * - * Output: - * none - * - * Returns: - * none - * - */ -void dapl_ep_dealloc(IN DAPL_EP * ep_ptr) -{ - dapl_os_assert(ep_ptr->header.magic == DAPL_MAGIC_EP); - - ep_ptr->header.magic = DAPL_MAGIC_INVALID; /* reset magic to prevent reuse */ - - dapls_cb_free(&ep_ptr->req_buffer); - dapls_cb_free(&ep_ptr->recv_buffer); - - if (NULL != ep_ptr->cxn_timer) { - dapl_os_free(ep_ptr->cxn_timer, sizeof(DAPL_OS_TIMER)); - } - -#ifdef DAPL_COUNTERS - dapl_os_free(ep_ptr->cntrs, sizeof(DAT_UINT64) * DCNT_EP_ALL_COUNTERS); -#endif /* DAPL_COUNTERS */ - - dapl_os_free(ep_ptr, sizeof(DAPL_EP) + sizeof(DAT_SOCK_ADDR)); -} - -/* - * dapl_ep_default_attrs - * - * Set default values in the parameter fields - * - * Input: - * entry point pointer - * - * Output: - * none - * - * Returns: - * none - * - */ -void dapli_ep_default_attrs(IN DAPL_EP * ep_ptr) -{ - DAT_EP_ATTR ep_attr_limit; - DAT_EP_ATTR *ep_attr; - DAT_RETURN dat_status; - - ep_attr = &ep_ptr->param.ep_attr; - /* Set up defaults */ - dapl_os_memzero(ep_attr, sizeof(DAT_EP_ATTR)); - - /* mtu and rdma sizes fixed in IB as per IBTA 1.1, 9.4.3, 9.4.4, 9.7.7. */ - ep_attr->max_mtu_size = 0x80000000; - ep_attr->max_rdma_size = 0x80000000; - - ep_attr->qos = DAT_QOS_BEST_EFFORT; - ep_attr->service_type = DAT_SERVICE_TYPE_RC; - ep_attr->max_recv_dtos = IB_IO_DEFAULT; - ep_attr->max_request_dtos = IB_IO_DEFAULT; - ep_attr->max_recv_iov = IB_IOV_DEFAULT; - ep_attr->max_request_iov = IB_IOV_DEFAULT; - ep_attr->max_rdma_read_in = IB_RDMA_DEFAULT; - ep_attr->max_rdma_read_out = IB_RDMA_DEFAULT; - - /* - * Configure the EP as a standard completion type, which will be - * used by the EVDs. A threshold of 1 is the default state of an - * EVD. - */ - ep_attr->request_completion_flags = DAT_COMPLETION_EVD_THRESHOLD_FLAG; - ep_attr->recv_completion_flags = DAT_COMPLETION_EVD_THRESHOLD_FLAG; - /* - * Unspecified defaults: - * - ep_privileges: No RDMA capabilities - * - num_transport_specific_params: none - * - transport_specific_params: none - * - num_provider_specific_params: 0 - * - provider_specific_params: 0 - */ - - dat_status = dapls_ib_query_hca(ep_ptr->header.owner_ia->hca_ptr, - NULL, &ep_attr_limit, NULL); - /* check against HCA maximums */ - if (dat_status == DAT_SUCCESS) { - ep_ptr->param.ep_attr.max_mtu_size = - DAPL_MIN(ep_ptr->param.ep_attr.max_mtu_size, - ep_attr_limit.max_mtu_size); - ep_ptr->param.ep_attr.max_rdma_size = - DAPL_MIN(ep_ptr->param.ep_attr.max_rdma_size, - ep_attr_limit.max_rdma_size); - ep_ptr->param.ep_attr.max_recv_dtos = - DAPL_MIN(ep_ptr->param.ep_attr.max_recv_dtos, - ep_attr_limit.max_recv_dtos); - ep_ptr->param.ep_attr.max_request_dtos = - DAPL_MIN(ep_ptr->param.ep_attr.max_request_dtos, - ep_attr_limit.max_request_dtos); - ep_ptr->param.ep_attr.max_recv_iov = - DAPL_MIN(ep_ptr->param.ep_attr.max_recv_iov, - ep_attr_limit.max_recv_iov); - ep_ptr->param.ep_attr.max_request_iov = - DAPL_MIN(ep_ptr->param.ep_attr.max_request_iov, - ep_attr_limit.max_request_iov); - ep_ptr->param.ep_attr.max_rdma_read_in = - DAPL_MIN(ep_ptr->param.ep_attr.max_rdma_read_in, - ep_attr_limit.max_rdma_read_in); - ep_ptr->param.ep_attr.max_rdma_read_out = - DAPL_MIN(ep_ptr->param.ep_attr.max_rdma_read_out, - ep_attr_limit.max_rdma_read_out); - } -} - -DAT_RETURN dapl_ep_check_recv_completion_flags(DAT_COMPLETION_FLAGS flags) -{ - - /* - * InfiniBand will not allow signal suppression for RECV completions, - * see the 1.0.1 spec section 10.7.3.1, 10.8.6. - * N.B. SIGNALLED has a different meaning in dapl than it does - * in IB; IB SIGNALLED is the same as DAPL SUPPRESS. DAPL - * SIGNALLED simply means the user will not get awakened when - * an EVD completes, even though the dapl handler is invoked. - */ - - if (flags & DAT_COMPLETION_SUPPRESS_FLAG) { - return DAT_INVALID_PARAMETER; - } - - return DAT_SUCCESS; -} - -DAT_RETURN dapl_ep_check_request_completion_flags(DAT_COMPLETION_FLAGS flags) -{ - return DAT_SUCCESS; -} - -DAT_RETURN -dapl_ep_post_send_req(IN DAT_EP_HANDLE ep_handle, - IN DAT_COUNT num_segments, - IN DAT_LMR_TRIPLET * local_iov, - IN DAT_DTO_COOKIE user_cookie, - IN const DAT_RMR_TRIPLET * remote_iov, - IN DAT_COMPLETION_FLAGS completion_flags, - IN DAPL_DTO_TYPE dto_type, IN int op_type) -{ - DAPL_EP *ep_ptr; - DAPL_COOKIE *cookie; - DAT_RETURN dat_status; - - if (DAPL_BAD_HANDLE(ep_handle, DAPL_MAGIC_EP)) { - dat_status = - DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP); - goto bail; - } - - ep_ptr = (DAPL_EP *) ep_handle; - - /* - * Synchronization ok since this buffer is only used for send - * requests, which aren't allowed to race with each other. - */ - dat_status = dapls_dto_cookie_alloc(&ep_ptr->req_buffer, - dto_type, user_cookie, &cookie); - if (dat_status != DAT_SUCCESS) { - dapl_log(DAPL_DBG_TYPE_ERR, - " dapl_post_req resource ERR:" - " dtos pending = %d, max_dtos %d, max_cb %d hd %d tl %d\n", - dapls_cb_pending(&ep_ptr->req_buffer), - ep_ptr->param.ep_attr.max_request_dtos, - ep_ptr->req_buffer.pool_size, - ep_ptr->req_buffer.head, ep_ptr->req_buffer.tail); - - goto bail; - } - - /* - * Invoke provider specific routine to post DTO - */ - dat_status = dapls_ib_post_send(ep_ptr, - op_type, - cookie, - num_segments, - local_iov, - remote_iov, completion_flags); - - if (dat_status != DAT_SUCCESS) { - dapls_cookie_dealloc(&ep_ptr->req_buffer, cookie); - } - - bail: - return dat_status; -} - -/* - * dapli_ep_timeout - * - * If this routine is invoked before a connection occurs, generate an - * event - */ -void dapls_ep_timeout(uintptr_t arg) -{ - DAPL_EP *ep_ptr; - ib_cm_events_t ib_cm_event; - - dapl_dbg_log(DAPL_DBG_TYPE_CM, "--> dapls_ep_timeout! ep %lx\n", arg); - - ep_ptr = (DAPL_EP *) arg; - - /* reset the EP state */ - ep_ptr->param.ep_state = DAT_EP_STATE_DISCONNECTED; - - /* Clean up the EP and put the underlying QP into the ERROR state. - * The disconnect_clean interface requires the provided dependent - *cm event number. - */ - ib_cm_event = dapls_ib_get_cm_event(DAT_CONNECTION_EVENT_TIMED_OUT); - dapls_ib_disconnect_clean(ep_ptr, DAT_TRUE, ib_cm_event); - - (void)dapls_evd_post_connection_event((DAPL_EVD *) ep_ptr->param. - connect_evd_handle, - DAT_CONNECTION_EVENT_TIMED_OUT, - (DAT_HANDLE) ep_ptr, 0, 0); -} - -/* - * dapls_ep_state_subtype - * - * Return the INVALID_STATE connection subtype associated with an - * INVALID_STATE on an EP. Strictly for error reporting. - */ -DAT_RETURN_SUBTYPE dapls_ep_state_subtype(IN DAPL_EP * ep_ptr) -{ - DAT_RETURN_SUBTYPE dat_status; - - switch (ep_ptr->param.ep_state) { - case DAT_EP_STATE_UNCONNECTED: - { - dat_status = DAT_INVALID_STATE_EP_UNCONNECTED; - break; - } - case DAT_EP_STATE_RESERVED: - { - dat_status = DAT_INVALID_STATE_EP_RESERVED; - break; - } - case DAT_EP_STATE_PASSIVE_CONNECTION_PENDING: - { - dat_status = DAT_INVALID_STATE_EP_PASSCONNPENDING; - break; - } - case DAT_EP_STATE_ACTIVE_CONNECTION_PENDING: - { - dat_status = DAT_INVALID_STATE_EP_ACTCONNPENDING; - break; - } - case DAT_EP_STATE_TENTATIVE_CONNECTION_PENDING: - { - dat_status = DAT_INVALID_STATE_EP_TENTCONNPENDING; - break; - } - case DAT_EP_STATE_CONNECTED: - { - dat_status = DAT_INVALID_STATE_EP_CONNECTED; - break; - } - case DAT_EP_STATE_DISCONNECT_PENDING: - { - dat_status = DAT_INVALID_STATE_EP_DISCPENDING; - break; - } - case DAT_EP_STATE_DISCONNECTED: - { - dat_status = DAT_INVALID_STATE_EP_DISCONNECTED; - break; - } - case DAT_EP_STATE_COMPLETION_PENDING: - { - dat_status = DAT_INVALID_STATE_EP_COMPLPENDING; - break; - } - - default: - { - dat_status = 0; - break; - } - } - - return dat_status; -} - -#ifdef DAPL_DBG_IO_TRC -/* allocate trace buffer */ -void dapls_io_trc_alloc(DAPL_EP * ep_ptr) -{ - DAT_RETURN dat_status; - int i; - struct io_buf_track *ibt; - - ep_ptr->ibt_dumped = 0; /* bool to control how often we print */ - dat_status = dapls_rbuf_alloc(&ep_ptr->ibt_queue, DBG_IO_TRC_QLEN); - if (dat_status != DAT_SUCCESS) { - goto bail; - } - ibt = - (struct io_buf_track *)dapl_os_alloc(sizeof(struct io_buf_track) * - DBG_IO_TRC_QLEN); - - if (dat_status != DAT_SUCCESS) { - dapls_rbuf_destroy(&ep_ptr->ibt_queue); - goto bail; - } - ep_ptr->ibt_base = ibt; - dapl_os_memzero(ibt, sizeof(struct io_buf_track) * DBG_IO_TRC_QLEN); - - /* add events to free event queue */ - for (i = 0; i < DBG_IO_TRC_QLEN; i++) { - dapls_rbuf_add(&ep_ptr->ibt_queue, ibt++); - } - bail: - return; -} -#endif /* DAPL_DBG_IO_TRC */ - -/* - * Generate a disconnect event on abruct close for older verbs providers - * that do not do it automatically. - */ - -void -dapl_ep_legacy_post_disconnect(DAPL_EP * ep_ptr, - DAT_CLOSE_FLAGS disconnect_flags) -{ - ib_cm_events_t ib_cm_event; - DAPL_CR *cr_ptr; - dp_ib_cm_handle_t cm_ptr; - - /* - * Acquire the lock and make sure we didn't get a callback - * that cleaned up. - */ - dapl_os_lock(&ep_ptr->header.lock); - if (disconnect_flags == DAT_CLOSE_ABRUPT_FLAG && - ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECT_PENDING) { - /* - * If this is an ABRUPT close, the provider will not generate - * a disconnect message so we do it manually here. Just invoke - * the CM callback as it will clean up the appropriate - * data structures, reset the state, and generate the event - * on the way out. Obtain the provider dependent cm_event to - * pass into the callback for a disconnect. - */ - ib_cm_event = - dapls_ib_get_cm_event(DAT_CONNECTION_EVENT_DISCONNECTED); - - cr_ptr = ep_ptr->cr_ptr; - cm_ptr = (dapl_llist_is_empty(&ep_ptr->cm_list_head) - ? NULL : dapl_llist_peek_head(&ep_ptr->cm_list_head)); - dapl_os_unlock(&ep_ptr->header.lock); - - if (cr_ptr != NULL) { - dapl_dbg_log(DAPL_DBG_TYPE_API | DAPL_DBG_TYPE_CM, - " dapl_ep_disconnect force callback on EP %p CM handle %x\n", - ep_ptr, cr_ptr->ib_cm_handle); - - dapls_cr_callback(cr_ptr->ib_cm_handle, - ib_cm_event, NULL, 0, cr_ptr->sp_ptr); - } else { - dapl_evd_connection_callback(cm_ptr, - ib_cm_event, - NULL, 0, (void *)ep_ptr); - } - } else { - dapl_os_unlock(&ep_ptr->header.lock); - } -} - -/* - * dapl_ep_link_cm - * - * Add linking of provider's CM object to a EP structure - * This enables multiple CM's per EP, and syncronization - * - * Input: - * DAPL_EP *ep_ptr - * dp_ib_cm_handle_t *cm_ptr defined in provider's dapl_util.h - * - * CM objects linked with EP using ->list_entry - * Output: - * none - * - * Returns: - * none - * - */ -void dapl_ep_link_cm(IN DAPL_EP *ep_ptr, IN dp_ib_cm_handle_t cm_ptr) -{ - dapl_os_lock(&ep_ptr->header.lock); - dapls_cm_acquire(cm_ptr); - dapl_llist_add_tail(&ep_ptr->cm_list_head, &cm_ptr->list_entry, cm_ptr); - dapl_os_unlock(&ep_ptr->header.lock); -} - -void dapl_ep_unlink_cm(IN DAPL_EP *ep_ptr, IN dp_ib_cm_handle_t cm_ptr) -{ - dapl_os_lock(&ep_ptr->header.lock); - dapl_llist_remove_entry(&ep_ptr->cm_list_head, &cm_ptr->list_entry); - dapls_cm_release(cm_ptr); - dapl_os_unlock(&ep_ptr->header.lock); -} - -static void dapli_ep_flush_evd(DAPL_EVD *evd_ptr) -{ - DAT_RETURN dat_status; - - dapl_os_lock(&evd_ptr->header.lock); - dat_status = dapls_evd_copy_cq(evd_ptr); - dapl_os_unlock(&evd_ptr->header.lock); - - if (dat_status == DAT_QUEUE_FULL) - dapls_evd_post_overflow_event(evd_ptr); -} - -void dapls_ep_flush_cqs(DAPL_EP * ep_ptr) -{ - if (ep_ptr->param.request_evd_handle) - dapli_ep_flush_evd((DAPL_EVD *) ep_ptr->param.request_evd_handle); - - if (ep_ptr->param.recv_evd_handle) - dapli_ep_flush_evd((DAPL_EVD *) ep_ptr->param.recv_evd_handle); -} - -/* - * Local variables: - * c-indent-level: 4 - * c-basic-offset: 4 - * tab-width: 8 - * End: - */ +/* + * Copyright (c) 2002-2003, Network Appliance, Inc. All rights reserved. + * + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + */ + +/********************************************************************** + * + * MODULE: dapl_ep_util.c + * + * PURPOSE: Manage EP Info structure + * + * $Id:$ + **********************************************************************/ + +#include "dapl_ep_util.h" +#include "dapl_ring_buffer_util.h" +#include "dapl_cookie.h" +#include "dapl_adapter_util.h" +#include "dapl_evd_util.h" +#include "dapl_cr_util.h" /* for callback routine */ + +/* + * Local definitions + */ +/* + * Default number of I/O operations on an end point + */ +#define IB_IO_DEFAULT 16 +/* + * Default number of scatter/gather entries available to a single + * post send/recv + */ +#define IB_IOV_DEFAULT 4 + +/* + * Default number of RDMA operations in progress at a time + */ +#define IB_RDMA_DEFAULT 4 + +extern void dapli_ep_default_attrs(IN DAPL_EP * ep_ptr); + +char *dapl_get_ep_state_str(DAT_EP_STATE state) +{ +#ifdef DAPL_DBG + static char *state_str[DAT_EP_STATE_CONNECTED_MULTI_PATH + 1] = { + "DAT_EP_STATE_UNCONNECTED", /* quiescent state */ + "DAT_EP_STATE_UNCONFIGURED_UNCONNECTED", + "DAT_EP_STATE_RESERVED", + "DAT_EP_STATE_UNCONFIGURED_RESERVED", + "DAT_EP_STATE_PASSIVE_CONNECTION_PENDING", + "DAT_EP_STATE_UNCONFIGURED_PASSIVE", + "DAT_EP_STATE_ACTIVE_CONNECTION_PENDING", + "DAT_EP_STATE_TENTATIVE_CONNECTION_PENDING", + "DAT_EP_STATE_UNCONFIGURED_TENTATIVE", + "DAT_EP_STATE_CONNECTED", + "DAT_EP_STATE_DISCONNECT_PENDING", + "DAT_EP_STATE_DISCONNECTED", + "DAT_EP_STATE_COMPLETION_PENDING", + "DAT_EP_STATE_CONNECTED_SINGLE_PATH", + "DAT_EP_STATE_CONNECTED_MULTI_PATH" + }; + return state_str[state]; +#else + static char buf[12]; + sprintf(buf, "%d", state); + return buf; +#endif +} + +/* + * dapl_ep_alloc + * + * alloc and initialize an EP INFO struct + * + * Input: + * IA INFO struct ptr + * + * Output: + * ep_ptr + * + * Returns: + * none + * + */ +DAPL_EP *dapl_ep_alloc(IN DAPL_IA * ia_ptr, IN const DAT_EP_ATTR * ep_attr) +{ + DAPL_EP *ep_ptr; + + /* Allocate EP */ + ep_ptr = + (DAPL_EP *) dapl_os_alloc(sizeof(DAPL_EP) + sizeof(DAT_SOCK_ADDR)); + if (ep_ptr == NULL) { + goto bail; + } + + /* zero the structure */ + dapl_os_memzero(ep_ptr, sizeof(DAPL_EP) + sizeof(DAT_SOCK_ADDR)); + +#ifdef DAPL_COUNTERS + /* Allocate counters */ + ep_ptr->cntrs = + dapl_os_alloc(sizeof(DAT_UINT64) * DCNT_EP_ALL_COUNTERS); + if (ep_ptr->cntrs == NULL) { + dapl_os_free(ep_ptr, sizeof(DAPL_EP) + sizeof(DAT_SOCK_ADDR)); + return (NULL); + } + dapl_os_memzero(ep_ptr->cntrs, + sizeof(DAT_UINT64) * DCNT_EP_ALL_COUNTERS); +#endif /* DAPL_COUNTERS */ + + /* + * initialize the header + */ + ep_ptr->header.provider = ia_ptr->header.provider; + ep_ptr->header.magic = DAPL_MAGIC_EP; + ep_ptr->header.handle_type = DAT_HANDLE_TYPE_EP; + ep_ptr->header.owner_ia = ia_ptr; + ep_ptr->header.user_context.as_64 = 0; + ep_ptr->header.user_context.as_ptr = NULL; + + dapl_llist_init_entry(&ep_ptr->header.ia_list_entry); + dapl_llist_init_head(&ep_ptr->cm_list_head); + dapl_os_lock_init(&ep_ptr->header.lock); + + /* + * Initialize the body + */ + /* + * Set up default parameters if the user passed in a NULL + */ + if (ep_attr == NULL) { + dapli_ep_default_attrs(ep_ptr); + } else { + ep_ptr->param.ep_attr = *ep_attr; + } + + /* + * IBM OS API specific fields + */ + ep_ptr->qp_handle = IB_INVALID_HANDLE; + ep_ptr->qpn = 0; + ep_ptr->qp_state = DAPL_QP_STATE_UNATTACHED; + + if (DAT_SUCCESS != dapls_cb_create(&ep_ptr->req_buffer, + ep_ptr, + ep_ptr->param.ep_attr. + max_request_dtos)) { + dapl_ep_dealloc(ep_ptr); + ep_ptr = NULL; + goto bail; + } + + if (DAT_SUCCESS != dapls_cb_create(&ep_ptr->recv_buffer, + ep_ptr, + ep_ptr->param.ep_attr.max_recv_dtos)) + { + dapl_ep_dealloc(ep_ptr); + ep_ptr = NULL; + goto bail; + } + + dapls_io_trc_alloc(ep_ptr); + + bail: + return ep_ptr; +} + +/* + * dapl_ep_dealloc + * + * Free the passed in EP structure. + * + * Input: + * entry point pointer + * + * Output: + * none + * + * Returns: + * none + * + */ +void dapl_ep_dealloc(IN DAPL_EP * ep_ptr) +{ + dapl_os_assert(ep_ptr->header.magic == DAPL_MAGIC_EP); + + ep_ptr->header.magic = DAPL_MAGIC_INVALID; /* reset magic to prevent reuse */ + + dapls_cb_free(&ep_ptr->req_buffer); + dapls_cb_free(&ep_ptr->recv_buffer); + + if (NULL != ep_ptr->cxn_timer) { + dapl_os_free(ep_ptr->cxn_timer, sizeof(DAPL_OS_TIMER)); + } + +#ifdef DAPL_COUNTERS + dapl_os_free(ep_ptr->cntrs, sizeof(DAT_UINT64) * DCNT_EP_ALL_COUNTERS); +#endif /* DAPL_COUNTERS */ + + dapl_os_free(ep_ptr, sizeof(DAPL_EP) + sizeof(DAT_SOCK_ADDR)); +} + +/* + * dapl_ep_default_attrs + * + * Set default values in the parameter fields + * + * Input: + * entry point pointer + * + * Output: + * none + * + * Returns: + * none + * + */ +void dapli_ep_default_attrs(IN DAPL_EP * ep_ptr) +{ + DAT_EP_ATTR ep_attr_limit; + DAT_EP_ATTR *ep_attr; + DAT_RETURN dat_status; + + ep_attr = &ep_ptr->param.ep_attr; + /* Set up defaults */ + dapl_os_memzero(ep_attr, sizeof(DAT_EP_ATTR)); + + /* mtu and rdma sizes fixed in IB as per IBTA 1.1, 9.4.3, 9.4.4, 9.7.7. */ + ep_attr->max_mtu_size = 0x80000000; + ep_attr->max_rdma_size = 0x80000000; + + ep_attr->qos = DAT_QOS_BEST_EFFORT; + ep_attr->service_type = DAT_SERVICE_TYPE_RC; + ep_attr->max_recv_dtos = IB_IO_DEFAULT; + ep_attr->max_request_dtos = IB_IO_DEFAULT; + ep_attr->max_recv_iov = IB_IOV_DEFAULT; + ep_attr->max_request_iov = IB_IOV_DEFAULT; + ep_attr->max_rdma_read_in = IB_RDMA_DEFAULT; + ep_attr->max_rdma_read_out = IB_RDMA_DEFAULT; + + /* + * Configure the EP as a standard completion type, which will be + * used by the EVDs. A threshold of 1 is the default state of an + * EVD. + */ + ep_attr->request_completion_flags = DAT_COMPLETION_EVD_THRESHOLD_FLAG; + ep_attr->recv_completion_flags = DAT_COMPLETION_EVD_THRESHOLD_FLAG; + /* + * Unspecified defaults: + * - ep_privileges: No RDMA capabilities + * - num_transport_specific_params: none + * - transport_specific_params: none + * - num_provider_specific_params: 0 + * - provider_specific_params: 0 + */ + + dat_status = dapls_ib_query_hca(ep_ptr->header.owner_ia->hca_ptr, + NULL, &ep_attr_limit, NULL); + /* check against HCA maximums */ + if (dat_status == DAT_SUCCESS) { + ep_ptr->param.ep_attr.max_mtu_size = + DAPL_MIN(ep_ptr->param.ep_attr.max_mtu_size, + ep_attr_limit.max_mtu_size); + ep_ptr->param.ep_attr.max_rdma_size = + DAPL_MIN(ep_ptr->param.ep_attr.max_rdma_size, + ep_attr_limit.max_rdma_size); + ep_ptr->param.ep_attr.max_recv_dtos = + DAPL_MIN(ep_ptr->param.ep_attr.max_recv_dtos, + ep_attr_limit.max_recv_dtos); + ep_ptr->param.ep_attr.max_request_dtos = + DAPL_MIN(ep_ptr->param.ep_attr.max_request_dtos, + ep_attr_limit.max_request_dtos); + ep_ptr->param.ep_attr.max_recv_iov = + DAPL_MIN(ep_ptr->param.ep_attr.max_recv_iov, + ep_attr_limit.max_recv_iov); + ep_ptr->param.ep_attr.max_request_iov = + DAPL_MIN(ep_ptr->param.ep_attr.max_request_iov, + ep_attr_limit.max_request_iov); + ep_ptr->param.ep_attr.max_rdma_read_in = + DAPL_MIN(ep_ptr->param.ep_attr.max_rdma_read_in, + ep_attr_limit.max_rdma_read_in); + ep_ptr->param.ep_attr.max_rdma_read_out = + DAPL_MIN(ep_ptr->param.ep_attr.max_rdma_read_out, + ep_attr_limit.max_rdma_read_out); + } +} + +DAT_RETURN dapl_ep_check_recv_completion_flags(DAT_COMPLETION_FLAGS flags) +{ + + /* + * InfiniBand will not allow signal suppression for RECV completions, + * see the 1.0.1 spec section 10.7.3.1, 10.8.6. + * N.B. SIGNALLED has a different meaning in dapl than it does + * in IB; IB SIGNALLED is the same as DAPL SUPPRESS. DAPL + * SIGNALLED simply means the user will not get awakened when + * an EVD completes, even though the dapl handler is invoked. + */ + + if (flags & DAT_COMPLETION_SUPPRESS_FLAG) { + return DAT_INVALID_PARAMETER; + } + + return DAT_SUCCESS; +} + +DAT_RETURN dapl_ep_check_request_completion_flags(DAT_COMPLETION_FLAGS flags) +{ + return DAT_SUCCESS; +} + +DAT_RETURN +dapl_ep_post_send_req(IN DAT_EP_HANDLE ep_handle, + IN DAT_COUNT num_segments, + IN DAT_LMR_TRIPLET * local_iov, + IN DAT_DTO_COOKIE user_cookie, + IN const DAT_RMR_TRIPLET * remote_iov, + IN DAT_COMPLETION_FLAGS completion_flags, + IN DAPL_DTO_TYPE dto_type, IN int op_type) +{ + DAPL_EP *ep_ptr; + DAPL_COOKIE *cookie; + DAT_RETURN dat_status; + + if (DAPL_BAD_HANDLE(ep_handle, DAPL_MAGIC_EP)) { + dat_status = + DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_EP); + goto bail; + } + + ep_ptr = (DAPL_EP *) ep_handle; + + /* + * Synchronization ok since this buffer is only used for send + * requests, which aren't allowed to race with each other. + */ + dat_status = dapls_dto_cookie_alloc(&ep_ptr->req_buffer, + dto_type, user_cookie, &cookie); + if (dat_status != DAT_SUCCESS) { + dapl_log(DAPL_DBG_TYPE_ERR, + " dapl_post_req resource ERR:" + " dtos pending = %d, max_dtos %d, max_cb %d hd %d tl %d\n", + dapls_cb_pending(&ep_ptr->req_buffer), + ep_ptr->param.ep_attr.max_request_dtos, + ep_ptr->req_buffer.pool_size, + ep_ptr->req_buffer.head, ep_ptr->req_buffer.tail); + + goto bail; + } + + /* + * Invoke provider specific routine to post DTO + */ + dat_status = dapls_ib_post_send(ep_ptr, + op_type, + cookie, + num_segments, + local_iov, + remote_iov, completion_flags); + + if (dat_status != DAT_SUCCESS) { + dapls_cookie_dealloc(&ep_ptr->req_buffer, cookie); + } + + bail: + return dat_status; +} + +/* + * dapli_ep_timeout + * + * If this routine is invoked before a connection occurs, generate an + * event + */ +void dapls_ep_timeout(uintptr_t arg) +{ + DAPL_EP *ep_ptr; + ib_cm_events_t ib_cm_event; + + dapl_dbg_log(DAPL_DBG_TYPE_CM, "--> dapls_ep_timeout! ep %lx\n", arg); + + ep_ptr = (DAPL_EP *) arg; + + /* reset the EP state */ + ep_ptr->param.ep_state = DAT_EP_STATE_DISCONNECTED; + + /* Clean up the EP and put the underlying QP into the ERROR state. + * The disconnect_clean interface requires the provided dependent + *cm event number. + */ + ib_cm_event = dapls_ib_get_cm_event(DAT_CONNECTION_EVENT_TIMED_OUT); + dapls_ib_disconnect_clean(ep_ptr, DAT_TRUE, ib_cm_event); + + (void)dapls_evd_post_connection_event((DAPL_EVD *) ep_ptr->param. + connect_evd_handle, + DAT_CONNECTION_EVENT_TIMED_OUT, + (DAT_HANDLE) ep_ptr, 0, 0); +} + +/* + * dapls_ep_state_subtype + * + * Return the INVALID_STATE connection subtype associated with an + * INVALID_STATE on an EP. Strictly for error reporting. + */ +DAT_RETURN_SUBTYPE dapls_ep_state_subtype(IN DAPL_EP * ep_ptr) +{ + DAT_RETURN_SUBTYPE dat_status; + + switch (ep_ptr->param.ep_state) { + case DAT_EP_STATE_UNCONNECTED: + { + dat_status = DAT_INVALID_STATE_EP_UNCONNECTED; + break; + } + case DAT_EP_STATE_RESERVED: + { + dat_status = DAT_INVALID_STATE_EP_RESERVED; + break; + } + case DAT_EP_STATE_PASSIVE_CONNECTION_PENDING: + { + dat_status = DAT_INVALID_STATE_EP_PASSCONNPENDING; + break; + } + case DAT_EP_STATE_ACTIVE_CONNECTION_PENDING: + { + dat_status = DAT_INVALID_STATE_EP_ACTCONNPENDING; + break; + } + case DAT_EP_STATE_TENTATIVE_CONNECTION_PENDING: + { + dat_status = DAT_INVALID_STATE_EP_TENTCONNPENDING; + break; + } + case DAT_EP_STATE_CONNECTED: + { + dat_status = DAT_INVALID_STATE_EP_CONNECTED; + break; + } + case DAT_EP_STATE_DISCONNECT_PENDING: + { + dat_status = DAT_INVALID_STATE_EP_DISCPENDING; + break; + } + case DAT_EP_STATE_DISCONNECTED: + { + dat_status = DAT_INVALID_STATE_EP_DISCONNECTED; + break; + } + case DAT_EP_STATE_COMPLETION_PENDING: + { + dat_status = DAT_INVALID_STATE_EP_COMPLPENDING; + break; + } + + default: + { + dat_status = 0; + break; + } + } + + return dat_status; +} + +#ifdef DAPL_DBG_IO_TRC +/* allocate trace buffer */ +void dapls_io_trc_alloc(DAPL_EP * ep_ptr) +{ + DAT_RETURN dat_status; + int i; + struct io_buf_track *ibt; + + ep_ptr->ibt_dumped = 0; /* bool to control how often we print */ + dat_status = dapls_rbuf_alloc(&ep_ptr->ibt_queue, DBG_IO_TRC_QLEN); + if (dat_status != DAT_SUCCESS) { + goto bail; + } + ibt = + (struct io_buf_track *)dapl_os_alloc(sizeof(struct io_buf_track) * + DBG_IO_TRC_QLEN); + + if (dat_status != DAT_SUCCESS) { + dapls_rbuf_destroy(&ep_ptr->ibt_queue); + goto bail; + } + ep_ptr->ibt_base = ibt; + dapl_os_memzero(ibt, sizeof(struct io_buf_track) * DBG_IO_TRC_QLEN); + + /* add events to free event queue */ + for (i = 0; i < DBG_IO_TRC_QLEN; i++) { + dapls_rbuf_add(&ep_ptr->ibt_queue, ibt++); + } + bail: + return; +} +#endif /* DAPL_DBG_IO_TRC */ + +/* + * Generate a disconnect event on abruct close for older verbs providers + * that do not do it automatically. + */ + +void +dapl_ep_legacy_post_disconnect(DAPL_EP * ep_ptr, + DAT_CLOSE_FLAGS disconnect_flags) +{ + ib_cm_events_t ib_cm_event; + DAPL_CR *cr_ptr; + dp_ib_cm_handle_t cm_ptr; + + /* + * Acquire the lock and make sure we didn't get a callback + * that cleaned up. + */ + dapl_os_lock(&ep_ptr->header.lock); + if (disconnect_flags == DAT_CLOSE_ABRUPT_FLAG && + ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECT_PENDING) { + /* + * If this is an ABRUPT close, the provider will not generate + * a disconnect message so we do it manually here. Just invoke + * the CM callback as it will clean up the appropriate + * data structures, reset the state, and generate the event + * on the way out. Obtain the provider dependent cm_event to + * pass into the callback for a disconnect. + */ + ib_cm_event = + dapls_ib_get_cm_event(DAT_CONNECTION_EVENT_DISCONNECTED); + + cr_ptr = ep_ptr->cr_ptr; + cm_ptr = (dapl_llist_is_empty(&ep_ptr->cm_list_head) + ? NULL : dapl_llist_peek_head(&ep_ptr->cm_list_head)); + dapl_os_unlock(&ep_ptr->header.lock); + + if (cr_ptr != NULL) { + dapl_dbg_log(DAPL_DBG_TYPE_API | DAPL_DBG_TYPE_CM, + " dapl_ep_disconnect force callback on EP %p CM handle %x\n", + ep_ptr, cr_ptr->ib_cm_handle); + + dapls_cr_callback(cr_ptr->ib_cm_handle, + ib_cm_event, NULL, 0, cr_ptr->sp_ptr); + } else { + dapl_evd_connection_callback(cm_ptr, + ib_cm_event, + NULL, 0, (void *)ep_ptr); + } + } else { + dapl_os_unlock(&ep_ptr->header.lock); + } +} + +/* + * dapl_ep_link_cm + * + * Add linking of provider's CM object to a EP structure + * This enables multiple CM's per EP, and syncronization + * + * Input: + * DAPL_EP *ep_ptr + * dp_ib_cm_handle_t *cm_ptr defined in provider's dapl_util.h + * + * CM objects linked with EP using ->list_entry + * Output: + * none + * + * Returns: + * none + * + */ +void dapl_ep_link_cm(IN DAPL_EP *ep_ptr, IN dp_ib_cm_handle_t cm_ptr) +{ + dapl_os_lock(&ep_ptr->header.lock); + dapls_cm_acquire(cm_ptr); + dapl_llist_add_tail(&ep_ptr->cm_list_head, &cm_ptr->list_entry, cm_ptr); + dapl_os_unlock(&ep_ptr->header.lock); +} + +void dapl_ep_unlink_cm(IN DAPL_EP *ep_ptr, IN dp_ib_cm_handle_t cm_ptr) +{ + dapl_os_lock(&ep_ptr->header.lock); + dapl_llist_remove_entry(&ep_ptr->cm_list_head, &cm_ptr->list_entry); + dapls_cm_release(cm_ptr); + dapl_os_unlock(&ep_ptr->header.lock); +} + +static void dapli_ep_flush_evd(DAPL_EVD *evd_ptr) +{ + DAT_RETURN dat_status; + + dapl_os_lock(&evd_ptr->header.lock); + dat_status = dapls_evd_copy_cq(evd_ptr); + dapl_os_unlock(&evd_ptr->header.lock); + + if (dat_status == DAT_QUEUE_FULL) + dapls_evd_post_overflow_event(evd_ptr); +} + +void dapls_ep_flush_cqs(DAPL_EP * ep_ptr) +{ + dapli_ep_flush_evd((DAPL_EVD *) ep_ptr->param.request_evd_handle); + while (dapls_cb_pending(&ep_ptr->recv_buffer)) + dapli_ep_flush_evd((DAPL_EVD *) ep_ptr->param.recv_evd_handle); +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * tab-width: 8 + * End: + */ diff --git a/trunk/ulp/dapl2/dapl/common/dapl_evd_dequeue.c b/trunk/ulp/dapl2/dapl/common/dapl_evd_dequeue.c index 7632fe1e..071e0a67 100644 --- a/trunk/ulp/dapl2/dapl/common/dapl_evd_dequeue.c +++ b/trunk/ulp/dapl2/dapl/common/dapl_evd_dequeue.c @@ -1,134 +1,135 @@ -/* - * Copyright (c) 2002-2003, Network Appliance, Inc. All rights reserved. - * - * This Software is licensed under one of the following licenses: - * - * 1) under the terms of the "Common Public License 1.0" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/cpl.php. - * - * 2) under the terms of the "The BSD License" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/bsd-license.php. - * - * 3) under the terms of the "GNU General Public License (GPL) Version 2" a - * copy of which is available from the Open Source Initiative, see - * http://www.opensource.org/licenses/gpl-license.php. - * - * Licensee has the right to choose one of the above licenses. - * - * Redistributions of source code must retain the above copyright - * notice and one of the license notices. - * - * Redistributions in binary form must reproduce both the above copyright - * notice, one of the license notices in the documentation - * and/or other materials provided with the distribution. - */ - -/********************************************************************** - * - * MODULE: dapl_evd_dequeue.c - * - * PURPOSE: Event Management - * - * Description: Interfaces in this file are completely described in - * the uDAPL 1.1 API, Chapter 6, section 3 - * - * $Id:$ - **********************************************************************/ - -#include "dapl.h" -#include "dapl_ring_buffer_util.h" -#include "dapl_evd_util.h" - -/* - * dapl_evd_dequeue - * - * DAPL Requirements Version xxx, 6.3.2.7 - * - * Remove first element from an event dispatcher - * - * Input: - * evd_handle - * - * Output: - * event - * - * Returns: - * DAT_SUCCESS - * DAT_INVALID_HANDLE - * DAT_INVALID_PARAMETER - * DAT_INVALID_STATE - * DAT_QUEUE_EMPTY - */ - -DAT_RETURN DAT_API dapl_evd_dequeue(IN DAT_EVD_HANDLE evd_handle, - OUT DAT_EVENT * event) -{ - DAPL_EVD *evd_ptr; - DAT_EVENT *local_event; - DAT_RETURN dat_status; - - dapl_dbg_log(DAPL_DBG_TYPE_API, - "dapl_evd_dequeue (%p, %p)\n", evd_handle, event); - - evd_ptr = (DAPL_EVD *) evd_handle; - dat_status = DAT_SUCCESS; - - if (DAPL_BAD_HANDLE(evd_handle, DAPL_MAGIC_EVD)) { - dat_status = DAT_ERROR(DAT_INVALID_HANDLE, 0); - goto bail; - } - - if (event == NULL) { - dat_status = DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG2); - goto bail; - } - DAPL_CNTR(evd_ptr, DCNT_EVD_DEQUEUE); - - /* - * We need to dequeue under lock, as the IB OS Access API - * restricts us from having multiple threads in CQ poll, and the - * DAPL 1.1 API allows multiple threads in dat_evd_dequeue() - */ - dapl_os_lock(&evd_ptr->header.lock); - - /* - * Make sure there are no other waiters and the evd is active. - * Currently this means only the OPEN state is allowed. - */ - if (evd_ptr->evd_state != DAPL_EVD_STATE_OPEN || - evd_ptr->catastrophic_overflow) { - dapl_os_unlock(&evd_ptr->header.lock); - dat_status = DAT_ERROR(DAT_INVALID_STATE, 0); - goto bail; - } - - /* - * Try the EVD rbuf first; poll from the CQ only if that's empty. - * This keeps events in order if dat_evd_wait() has copied events - * from CQ to EVD. - */ - local_event = - (DAT_EVENT *) dapls_rbuf_remove(&evd_ptr->pending_event_queue); - if (local_event != NULL) { - *event = *local_event; - dat_status = dapls_rbuf_add(&evd_ptr->free_event_queue, - local_event); - DAPL_CNTR(evd_ptr, DCNT_EVD_DEQUEUE_FOUND); - - } else if (evd_ptr->ib_cq_handle != IB_INVALID_HANDLE) { - dat_status = dapls_evd_cq_poll_to_event(evd_ptr, event); - DAPL_CNTR(evd_ptr, DCNT_EVD_DEQUEUE_POLL); - } else { - dat_status = DAT_ERROR(DAT_QUEUE_EMPTY, 0); - DAPL_CNTR(evd_ptr, DCNT_EVD_DEQUEUE_NOT_FOUND); - } - - dapl_os_unlock(&evd_ptr->header.lock); - bail: - dapl_dbg_log(DAPL_DBG_TYPE_RTN, - "dapl_evd_dequeue () returns 0x%x\n", dat_status); - - return dat_status; -} +/* + * Copyright (c) 2002-2003, Network Appliance, Inc. All rights reserved. + * + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + */ + +/********************************************************************** + * + * MODULE: dapl_evd_dequeue.c + * + * PURPOSE: Event Management + * + * Description: Interfaces in this file are completely described in + * the uDAPL 1.1 API, Chapter 6, section 3 + * + * $Id:$ + **********************************************************************/ + +#include "dapl.h" +#include "dapl_ring_buffer_util.h" +#include "dapl_evd_util.h" + +/* + * dapl_evd_dequeue + * + * DAPL Requirements Version xxx, 6.3.2.7 + * + * Remove first element from an event dispatcher + * + * Input: + * evd_handle + * + * Output: + * event + * + * Returns: + * DAT_SUCCESS + * DAT_INVALID_HANDLE + * DAT_INVALID_PARAMETER + * DAT_INVALID_STATE + * DAT_QUEUE_EMPTY + */ + +DAT_RETURN DAT_API dapl_evd_dequeue(IN DAT_EVD_HANDLE evd_handle, + OUT DAT_EVENT * event) +{ + DAPL_EVD *evd_ptr; + DAT_EVENT *local_event; + DAT_RETURN dat_status; + + evd_ptr = (DAPL_EVD *) evd_handle; + dat_status = DAT_SUCCESS; + + if (DAPL_BAD_HANDLE(evd_handle, DAPL_MAGIC_EVD)) { + dat_status = DAT_ERROR(DAT_INVALID_HANDLE, 0); + goto bail; + } + + if (event == NULL) { + dat_status = DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG2); + goto bail; + } + DAPL_CNTR(evd_ptr, DCNT_EVD_DEQUEUE); + + /* + * We need to dequeue under lock, as the IB OS Access API + * restricts us from having multiple threads in CQ poll, and the + * DAPL 1.1 API allows multiple threads in dat_evd_dequeue() + */ + dapl_os_lock(&evd_ptr->header.lock); + + /* + * Make sure there are no other waiters and the evd is active. + * Currently this means only the OPEN state is allowed. + */ + if (evd_ptr->evd_state != DAPL_EVD_STATE_OPEN || + evd_ptr->catastrophic_overflow) { + dapl_os_unlock(&evd_ptr->header.lock); + dat_status = DAT_ERROR(DAT_INVALID_STATE, 0); + goto bail; + } + + /* + * Try the EVD rbuf first; poll from the CQ only if that's empty. + * This keeps events in order if dat_evd_wait() has copied events + * from CQ to EVD. + */ + local_event = + (DAT_EVENT *) dapls_rbuf_remove(&evd_ptr->pending_event_queue); + if (local_event != NULL) { + *event = *local_event; + dat_status = dapls_rbuf_add(&evd_ptr->free_event_queue, + local_event); + DAPL_CNTR(evd_ptr, DCNT_EVD_DEQUEUE_FOUND); + + } else if (evd_ptr->ib_cq_handle != IB_INVALID_HANDLE) { + dat_status = dapls_evd_cq_poll_to_event(evd_ptr, event); + DAPL_CNTR(evd_ptr, DCNT_EVD_DEQUEUE_POLL); + } else { + dat_status = DAT_ERROR(DAT_QUEUE_EMPTY, 0); + DAPL_CNTR(evd_ptr, DCNT_EVD_DEQUEUE_NOT_FOUND); + } + +#ifdef DAPL_DBG + if (dat_status == DAT_SUCCESS) + dapl_dbg_log(DAPL_DBG_TYPE_EVD, + "dapl_evd_dequeue() Event(%p) = 0x%x\n", + event->evd_handle, event->event_number); +#endif + dapl_os_unlock(&evd_ptr->header.lock); + bail: + + return dat_status; +} diff --git a/trunk/ulp/dapl2/dapl/ibal/dapl_ibal_qp.c b/trunk/ulp/dapl2/dapl/ibal/dapl_ibal_qp.c index e843829d..1dc9ab84 100644 --- a/trunk/ulp/dapl2/dapl/ibal/dapl_ibal_qp.c +++ b/trunk/ulp/dapl2/dapl/ibal/dapl_ibal_qp.c @@ -1,705 +1,713 @@ - -/* - * Copyright (c) 2005-2007 Intel Corporation. All rights reserved. - * Copyright (c) 2002, Network Appliance, Inc. All rights reserved. - * - * This Software is licensed under the terms of the "Common Public - * License" a copy of which is in the file LICENSE.txt in the root - * directory. The license is also available from the Open Source - * Initiative, see http://www.opensource.org/licenses/cpl.php. - * - */ - -/********************************************************************** - * - * MODULE: dapl_ibal_qp.c - * - * PURPOSE: IB QP routines for access to IBAL APIs - * - * $Id: dapl_ibal_qp.c 33 2005-07-11 19:51:17Z ftillier $ - * - **********************************************************************/ - -#include "dapl.h" -#include "dapl_adapter_util.h" -#include "dapl_evd_util.h" -#include "dapl_ibal_util.h" -#include "dapl_ep_util.h" - -#define DAPL_IBAL_QKEY 0 -#define DAPL_IBAL_START_PSN 0 - -extern DAT_RETURN -dapls_ib_cq_late_alloc ( IN ib_pd_handle_t pd_handle, - IN DAPL_EVD *evd_ptr ); - -static void -dapli_ib_qp_async_error_cb( IN ib_async_event_rec_t* p_err_rec ) -{ - DAPL_EP *ep_ptr = (DAPL_EP *)p_err_rec->context; - DAPL_EVD *evd_ptr; - DAPL_IA *ia_ptr; - dapl_ibal_ca_t *p_ca; - dapl_ibal_evd_cb_t *evd_cb; - - dapl_dbg_log (DAPL_DBG_TYPE_ERR,"--> DiQpAEC QP event %s qp ctx %p\n", - ib_get_async_event_str(p_err_rec->code), p_err_rec->context); - dapl_dbg_log (DAPL_DBG_TYPE_ERR,"--> DiQpAEC qp_handle %p qpn %u\n", - ((DAPL_EP *)p_err_rec->context)->qp_handle, - ((DAPL_EP *)p_err_rec->context)->qpn); - - /* - * Verify handles EP, EVD, and hca_handle - */ - if (DAPL_BAD_HANDLE (ep_ptr, DAPL_MAGIC_EP ) || - DAPL_BAD_HANDLE (ep_ptr->param.connect_evd_handle, DAPL_MAGIC_EVD)) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> DiQpAEC: invalid EP %p \n", ep_ptr); - return; - } - ia_ptr = ep_ptr->header.owner_ia; - evd_ptr = ia_ptr->async_error_evd; - - if (DAPL_BAD_HANDLE (evd_ptr, DAPL_MAGIC_EVD) || - ! (evd_ptr->evd_flags & DAT_EVD_ASYNC_FLAG)) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> DiQpAEC: invalid EVD %p \n", evd_ptr); - return; - } - p_ca = (dapl_ibal_ca_t *) ia_ptr->hca_ptr->ib_hca_handle; - if (p_ca == NULL) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> DiQpAEC: can't find %s HCA\n", - (ia_ptr->header.provider)->device_name); - return; - } - - /* find QP error callback using ia_ptr for context */ - evd_cb = dapli_find_evd_cb_by_context (ia_ptr, p_ca); - if ((evd_cb == NULL) || (evd_cb->pfn_async_qp_err_cb == NULL)) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> DiQpAEC: no ERROR cb on p_ca %p found\n", p_ca); - return; - } - - dapl_os_lock (&ep_ptr->header.lock); - ep_ptr->param.ep_state = DAT_EP_STATE_DISCONNECT_PENDING; - dapl_os_unlock (&ep_ptr->header.lock); - - /* force disconnect, QP error state, to insure DTO's get flushed */ - dapls_ib_disconnect ( ep_ptr, DAT_CLOSE_ABRUPT_FLAG ); - - /* maps to dapl_evd_qp_async_error_callback(), context is EP */ - evd_cb->pfn_async_qp_err_cb( (ib_hca_handle_t)p_ca, - ep_ptr->qp_handle, - (ib_error_record_t*)&p_err_rec->code, - ep_ptr ); -} - -/* - * dapls_ib_qp_alloc - * - * Alloc a QP - * - * Input: - * *ia_ptr pointer to DAPL IA - * *ep_ptr pointer to DAPL EP - * *ep_ctx_ptr pointer to DAPL EP context - * - * Output: - * none - * - * Returns: - * DAT_SUCCESS - * DAT_INSUFFICIENT_RESOURCES - * - */ -DAT_RETURN -dapls_ib_qp_alloc ( - IN DAPL_IA *ia_ptr, - IN DAPL_EP *ep_ptr, - IN DAPL_EP *ep_ctx_ptr) -{ - DAT_EP_ATTR *attr; - DAPL_EVD *recv_evd_ptr, *request_evd_ptr; - DAT_RETURN dat_status; - ib_api_status_t ib_status; - ib_qp_create_t qp_create; - ib_pd_handle_t ib_pd_handle; - ib_cq_handle_t cq_recv; - ib_cq_handle_t cq_send; - dapl_ibal_ca_t *p_ca; - dapl_ibal_port_t *p_active_port; - ib_qp_attr_t qp_attr; - dp_ib_cm_handle_t cm_ptr; - - attr = &ep_ptr->param.ep_attr; - - dapl_os_assert ( ep_ptr->param.pz_handle != NULL ); - - ib_pd_handle = ((DAPL_PZ *)ep_ptr->param.pz_handle)->pd_handle; - dapl_os_assert(ib_pd_handle); - recv_evd_ptr = (DAPL_EVD *) ep_ptr->param.recv_evd_handle; - request_evd_ptr = (DAPL_EVD *) ep_ptr->param.request_evd_handle; - - cq_recv = IB_INVALID_HANDLE; - cq_send = IB_INVALID_HANDLE; - - dapl_os_assert ( recv_evd_ptr != DAT_HANDLE_NULL ); - { - cq_recv = (ib_cq_handle_t) recv_evd_ptr->ib_cq_handle; - - if ((cq_recv == IB_INVALID_HANDLE) && - ( 0 != (recv_evd_ptr->evd_flags & ~DAT_EVD_SOFTWARE_FLAG) )) - { - dat_status = dapls_ib_cq_late_alloc ( ib_pd_handle, recv_evd_ptr); - if (dat_status != DAT_SUCCESS) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> %s: failed to create CQ\n","DsQA"); - return (dat_status); - } - - dat_status = dapls_set_cq_notify (ia_ptr, recv_evd_ptr); - - if (dat_status != DAT_SUCCESS) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> %s: failed to enable notify CQ\n","DsQA"); - return (dat_status); - } - - cq_recv = (ib_cq_handle_t) recv_evd_ptr->ib_cq_handle; - dapl_dbg_log (DAPL_DBG_TYPE_EP, - "--> DsQA: alloc_recv_CQ = %p\n", cq_recv); - - } - } - - dapl_os_assert ( request_evd_ptr != DAT_HANDLE_NULL ); - { - cq_send = (ib_cq_handle_t) request_evd_ptr->ib_cq_handle; - - if ((cq_send == IB_INVALID_HANDLE) && - ( 0 != (request_evd_ptr->evd_flags & ~DAT_EVD_SOFTWARE_FLAG) )) - { - dat_status = dapls_ib_cq_late_alloc (ib_pd_handle, request_evd_ptr); - if (dat_status != DAT_SUCCESS) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> %s: failed to create CQ\n","DsQA"); - return (dat_status); - } - - dat_status = dapls_set_cq_notify (ia_ptr, request_evd_ptr); - - if (dat_status != DAT_SUCCESS) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> %s: failed to enable notify CQ\n","DsQA"); - return (dat_status); - } - - cq_send = (ib_cq_handle_t) request_evd_ptr->ib_cq_handle; - dapl_dbg_log (DAPL_DBG_TYPE_EP, - "--> DsQA: alloc_send_CQ = %p\n", cq_send); - } - } - - /* - * Get the CA structure - */ - p_ca = (dapl_ibal_ca_t *) ia_ptr->hca_ptr->ib_hca_handle; - - dapl_os_memzero (&qp_create, sizeof (qp_create)); - qp_create.qp_type = IB_QPT_RELIABLE_CONN; - qp_create.sq_depth = attr->max_request_dtos; - qp_create.rq_depth = attr->max_recv_dtos; - qp_create.sq_sge = attr->max_recv_iov; - qp_create.rq_sge = attr->max_request_iov; - qp_create.h_sq_cq = cq_send; - qp_create.h_rq_cq = cq_recv; - qp_create.sq_signaled = FALSE; - - dapl_dbg_log (DAPL_DBG_TYPE_EP, - "--> DsQA: sqd,iov=%d,%d rqd,iov=%d,%d\n", - attr->max_request_dtos, attr->max_request_iov, - attr->max_recv_dtos, attr->max_recv_iov); - - ib_status = ib_create_qp ( - ib_pd_handle, - &qp_create, - (void *) ep_ctx_ptr /* context */, - dapli_ib_qp_async_error_cb, - &ep_ptr->qp_handle); - - if (ib_status != IB_SUCCESS) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, "--> DsQA: Create QP failed = %s\n", - ib_get_err_str(ib_status)); - return (DAT_INSUFFICIENT_RESOURCES); - } - /* EP-CM linking */ - cm_ptr = ibal_cm_alloc(); - if (!cm_ptr) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, "--> DsQA: Create CM failed\n"); - return (DAT_INSUFFICIENT_RESOURCES); - } - cm_ptr->ib_cm.h_qp = ep_ptr->qp_handle; - cm_ptr->ep = ep_ptr; - dapl_ep_link_cm(ep_ptr, cm_ptr); - - dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsQA: EP=%p, tEVD=%p, rEVD=%p QP=%p\n", - ep_ptr, ep_ptr->param.request_evd_handle, - ep_ptr->param.recv_evd_handle, - ep_ptr->qp_handle ); - - ep_ptr->qp_state = IB_QPS_RESET; - - p_active_port = dapli_ibal_get_port(p_ca,(uint8_t)ia_ptr->hca_ptr->port_num); - - if (NULL == p_active_port) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> DsQA: Port %d is not available = %d\n", - ia_ptr->hca_ptr->port_num, __LINE__); - return (DAT_INVALID_STATE); - } - - ib_status = dapls_modify_qp_state_to_init ( ep_ptr->qp_handle, - &ep_ptr->param.ep_attr, - p_active_port ); - - if ( ib_status != IB_SUCCESS ) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> DsQA: Change QP state to INIT failed = %s\n", - ib_get_err_str(ib_status)); - return (DAT_INVALID_HANDLE); - } - ib_status = ib_query_qp ( ep_ptr->qp_handle, &qp_attr ); - - ep_ptr->qp_state = qp_attr.state; - ep_ptr->qpn = qp_attr.num; - - dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsQAQA: EP:%p new_QP %p state %s\n", - ep_ptr, - ep_ptr->qp_handle, - ib_get_port_state_str(ep_ptr->qp_state)); - - return (DAT_SUCCESS); -} - - -/* - * dapls_ib_qp_free - * - * Free a QP - * - * Input: - * *ia_ptr pointer to IA structure - * *ep_ptr pointer to EP structure - * - * Output: - * none - * - * Returns: - * none - * - */ -DAT_RETURN -dapls_ib_qp_free ( - IN DAPL_IA *ia_ptr, - IN DAPL_EP *ep_ptr ) -{ - - UNREFERENCED_PARAMETER(ia_ptr); - - dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsQF: free %p, state %s\n", - ep_ptr->qp_handle, - ib_get_port_state_str(ep_ptr->qp_state)); - - dapl_os_lock(&ep_ptr->header.lock); - if (( ep_ptr->qp_handle != IB_INVALID_HANDLE )) - { - ib_destroy_qp ( ep_ptr->qp_handle, ib_sync_destroy ); - dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsQF: freed QP %p\n", - ep_ptr->qp_handle ); - ep_ptr->qp_handle = IB_INVALID_HANDLE; - } - dapl_os_unlock(&ep_ptr->header.lock); - - return DAT_SUCCESS; -} - - -/* - * dapls_ib_qp_modify - * - * Set the QP to the parameters specified in an EP_PARAM - * - * We can't be sure what state the QP is in so we first obtain the state - * from the driver. The EP_PARAM structure that is provided has been - * sanitized such that only non-zero values are valid. - * - * Input: - * *ia_ptr pointer to DAPL IA - * *ep_ptr pointer to DAPL EP - * *ep_attr pointer to DAT EP attribute - * - * Output: - * none - * - * Returns: - * DAT_SUCCESS - * DAT_INSUFFICIENT_RESOURCES - * DAT_INVALID_PARAMETER - * - */ -DAT_RETURN -dapls_ib_qp_modify ( - IN DAPL_IA *ia_ptr, - IN DAPL_EP *ep_ptr, - IN DAT_EP_ATTR *ep_attr ) -{ - ib_qp_attr_t qp_attr; - ib_api_status_t ib_status; - ib_qp_handle_t qp_handle; - ib_qp_state_t qp_state; - ib_qp_mod_t qp_mod; - ib_av_attr_t *p_av_attr; - ib_qp_opts_t *p_qp_opts; - uint32_t *p_sq_depth, *p_rq_depth; - DAT_BOOLEAN need_modify; - DAT_RETURN dat_status; - - qp_handle = ep_ptr->qp_handle; - need_modify = DAT_FALSE; - dat_status = DAT_SUCCESS; - if ( ia_ptr == NULL || ia_ptr->header.magic != DAPL_MAGIC_IA ) - { - dat_status = DAT_INVALID_HANDLE; - goto bail; - } - /* - * Query the QP to get the current state. - */ - ib_status = ib_query_qp ( qp_handle, &qp_attr ); - - if ( ib_status != IB_SUCCESS ) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, "--> DsIQM: Query QP failed = %s\n", - ib_get_err_str(ib_status)); - dat_status = DAT_INTERNAL_ERROR; - goto bail; - } - - qp_state = qp_attr.state; - - dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsIQM: modify qp state=%d\n",qp_state); - /* - * Check if we have the right qp_state or not - */ - if ( (qp_state != IB_QPS_RTR ) && (qp_state != IB_QPS_RTS ) ) - { - dapl_dbg_log (DAPL_DBG_TYPE_EP, - "--> DsIQM: postpone to modify qp to EP values later\n"); - dat_status = DAT_SUCCESS; - goto bail; - } - - dapl_os_memzero (&qp_mod, sizeof (ib_qp_mod_t)); - - if (qp_state == IB_QPS_RTR) - { - p_av_attr = &qp_mod.state.rtr.primary_av; - p_qp_opts = &qp_mod.state.rtr.opts; - p_sq_depth = &qp_mod.state.rtr.sq_depth; - p_rq_depth = &qp_mod.state.rtr.rq_depth; - } - else - { - /* - * RTS does not have primary_av field - */ - p_av_attr = &qp_mod.state.rts.alternate_av; - p_qp_opts = &qp_mod.state.rts.opts; - p_sq_depth = &qp_mod.state.rts.sq_depth; - p_rq_depth = &qp_mod.state.rts.rq_depth; - } - - if ( (ep_attr->max_recv_dtos > 0) && - ((DAT_UINT32)ep_attr->max_recv_dtos != qp_attr.rq_depth) ) - { - dapl_dbg_log (DAPL_DBG_TYPE_EP,"--> DsIQM: rq_depth modified (%d,%d)\n", - qp_attr.rq_depth, ep_attr->max_recv_dtos); - - *p_rq_depth = ep_attr->max_recv_dtos; - *p_qp_opts |= IB_MOD_QP_RQ_DEPTH; - need_modify = DAT_TRUE; - } - - if ( (ep_attr->max_request_dtos > 0) && - ((DAT_UINT32)ep_attr->max_request_dtos != qp_attr.sq_depth) ) - { - dapl_dbg_log (DAPL_DBG_TYPE_EP, - "--> DsIQM: sq_depth modified (%d,%d)\n", - qp_attr.sq_depth, ep_attr->max_request_dtos); - - *p_sq_depth = ep_attr->max_request_dtos; - *p_qp_opts |= IB_MOD_QP_SQ_DEPTH; - need_modify = DAT_TRUE; - } - - qp_mod.req_state = qp_state; - - if ( need_modify == DAT_TRUE ) - { - ib_status = ib_modify_qp (qp_handle, &qp_mod); - if ( ib_status != IB_SUCCESS) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, "--> %s: ib_status = %d\n", - "DsIQM", ib_status); - dat_status = DAT_INTERNAL_ERROR; - } - } - -bail: - - return dat_status; -} - - -ib_api_status_t -dapls_modify_qp_state_to_error ( ib_qp_handle_t qp_handle ) -{ - ib_qp_mod_t qp_mod; - ib_api_status_t ib_status; - - dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsIQM_Err: QP state change --> Err\n"); - - dapl_os_memzero (&qp_mod, sizeof (ib_qp_mod_t)); - - qp_mod.req_state = IB_QPS_ERROR; - - ib_status = ib_modify_qp (qp_handle, &qp_mod); - - return (ib_status); -} - - -ib_api_status_t -dapls_modify_qp_state_to_reset ( ib_qp_handle_t qp_handle ) -{ - ib_qp_mod_t qp_mod; - ib_api_status_t ib_status; - - dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsIQM_RESET: QP state change\n"); - - dapl_os_memzero (&qp_mod, sizeof (ib_qp_mod_t)); - - qp_mod.req_state = IB_QPS_RESET; - - ib_status = ib_modify_qp (qp_handle, &qp_mod); - - return (ib_status); -} - - -ib_api_status_t -dapls_modify_qp_state_to_init ( - IN ib_qp_handle_t qp_handle, - IN DAT_EP_ATTR *p_attr, - IN dapl_ibal_port_t *p_port ) -{ - ib_qp_mod_t qp_mod; - ib_api_status_t ib_status; - - dapl_os_memzero (&qp_mod, sizeof (ib_qp_mod_t)); - - qp_mod.req_state = IB_QPS_INIT; - qp_mod.state.init.primary_port = p_port->p_attr->port_num; - qp_mod.state.init.qkey = DAPL_IBAL_QKEY; - qp_mod.state.init.pkey_index = 0; - qp_mod.state.init.access_ctrl = IB_AC_LOCAL_WRITE | - IB_AC_RDMA_WRITE | - IB_AC_MW_BIND | - IB_AC_ATOMIC; - if ((p_attr->max_rdma_read_in > 0) || (p_attr->max_rdma_read_out > 0)) - { - qp_mod.state.init.access_ctrl |= IB_AC_RDMA_READ; - } - ib_status = ib_modify_qp (qp_handle, &qp_mod); - - dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsIQM_INIT: QP(%p) state change, %s\n", - qp_handle, ib_get_err_str(ib_status)); - - return (ib_status); -} - - -ib_api_status_t -dapls_modify_qp_state_to_rtr ( - ib_qp_handle_t qp_handle, - ib_net32_t dest_qp, - ib_lid_t dest_lid, - dapl_ibal_port_t *p_port) -{ - ib_qp_mod_t qp_mod; - ib_api_status_t ib_status; - - dapl_os_memzero (&qp_mod, sizeof (ib_qp_mod_t)); - - qp_mod.req_state = IB_QPS_RTR; - qp_mod.state.rtr.rq_psn = DAPL_IBAL_START_PSN; - qp_mod.state.rtr.dest_qp = dest_qp; - qp_mod.state.rtr.primary_av.port_num = p_port->p_attr->port_num; - qp_mod.state.rtr.primary_av.sl = 0; - qp_mod.state.rtr.primary_av.dlid = dest_lid; - qp_mod.state.rtr.primary_av.grh_valid = 0; /* FALSE */ - qp_mod.state.rtr.primary_av.static_rate = IB_PATH_RECORD_RATE_10_GBS; - qp_mod.state.rtr.primary_av.path_bits = 0; - qp_mod.state.rtr.primary_av.conn.path_mtu = p_port->p_attr->mtu; - qp_mod.state.rtr.primary_av.conn.local_ack_timeout = 7; - qp_mod.state.rtr.primary_av.conn.seq_err_retry_cnt = 7; - qp_mod.state.rtr.primary_av.conn.rnr_retry_cnt = IB_RNR_RETRY_CNT; - qp_mod.state.rtr.resp_res = 4; // in-flight RDMAs - qp_mod.state.rtr.rnr_nak_timeout = IB_RNR_NAK_TIMEOUT; - - ib_status = ib_modify_qp (qp_handle, &qp_mod); - - dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsIQM_RTR: QP(%p) state change %s\n", - qp_handle, ib_get_err_str(ib_status)); - - return (ib_status); -} - -ib_api_status_t -dapls_modify_qp_state_to_rts ( ib_qp_handle_t qp_handle ) -{ - ib_qp_mod_t qp_mod; - ib_api_status_t ib_status; - - dapl_os_memzero (&qp_mod, sizeof (ib_qp_mod_t)); - - qp_mod.req_state = IB_QPS_RTS; - qp_mod.state.rts.sq_psn = DAPL_IBAL_START_PSN; - qp_mod.state.rts.retry_cnt = 7; - qp_mod.state.rts.rnr_retry_cnt = IB_RNR_RETRY_CNT; - qp_mod.state.rtr.rnr_nak_timeout = IB_RNR_NAK_TIMEOUT; - qp_mod.state.rts.local_ack_timeout = 7; - qp_mod.state.rts.init_depth = 4; - - ib_status = ib_modify_qp (qp_handle, &qp_mod); - - dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsIQM_RTS: QP(%p) state change %s\n", - qp_handle, ib_get_err_str(ib_status)); - - return (ib_status); -} - - -/* - * dapls_ib_reinit_ep - * - * Move the QP to INIT state again. - * - * Input: - * ep_ptr DAPL_EP - * - * Output: - * none - * - * Returns: - * void - * - */ -void -dapls_ib_reinit_ep ( IN DAPL_EP *ep_ptr ) -{ - DAPL_IA *ia_ptr; - ib_api_status_t ib_status; - dapl_ibal_ca_t *p_ca; - dapl_ibal_port_t *p_active_port; - - dapl_dbg_log (DAPL_DBG_TYPE_EP, - "--> DsIQM_REINIT: EP(%p) QP(%p) state change\n", - ep_ptr, ep_ptr->qp_handle ); - - if ( ep_ptr->param.ep_state != DAT_EP_STATE_DISCONNECTED ) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, "--> DsIRE: EP invalid state(%d)\n", - ep_ptr->param.ep_state); - return /*DAT_INVALID_STATE*/; - } - - ia_ptr = ep_ptr->header.owner_ia; - - /* Re-create QP if cleaned up, alloc will return init state */ - if ( ep_ptr->qp_handle == IB_INVALID_HANDLE ) - { - dapl_dbg_log (DAPL_DBG_TYPE_EP, - "--> DsIRE: !EP(%p)->qp_handle, re-create QP\n",ep_ptr); - ib_status = dapls_ib_qp_alloc ( ia_ptr, ep_ptr, ep_ptr ); - if ( ib_status != IB_SUCCESS ) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> DsIRE: failed to move qp to RESET status = %s\n", - ib_get_err_str(ib_status)); - } - return /*ib_status*/; - } - - ib_status = dapls_modify_qp_state_to_reset (ep_ptr->qp_handle); - - if ( ib_status != IB_SUCCESS ) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> DsIRE: failed to move qp to RESET status = %s\n", - ib_get_err_str(ib_status)); - return /*DAT_INTERNAL_ERROR*/; - } - - ep_ptr->qp_state = IB_QPS_RESET; - - p_ca = (dapl_ibal_ca_t *) ia_ptr->hca_ptr->ib_hca_handle; - p_active_port = dapli_ibal_get_port ( p_ca, - (uint8_t)ia_ptr->hca_ptr->port_num ); - if (NULL == p_active_port) - { - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> DsIRE: Port %d is not available = %d\n", - ia_ptr->hca_ptr->port_num, __LINE__); - return /*DAT_INTERNAL_ERROR*/; - } - - /* May fail if QP still RESET and in timewait, keep in reset state */ - ib_status = dapls_modify_qp_state_to_init ( ep_ptr->qp_handle, - &ep_ptr->param.ep_attr, - p_active_port); - if ( ib_status != IB_SUCCESS ) - { - ep_ptr->qp_state = IB_QPS_RESET; - - dapl_dbg_log (DAPL_DBG_TYPE_ERR, - "--> DsIRE: failed to move qp to INIT status %s\n", - ib_get_err_str(ib_status)); - return /*DAT_INTERNAL_ERROR*/; - } - ep_ptr->qp_state = IB_QPS_INIT; -} - - -/* - * Local variables: - * c-indent-level: 4 - * c-basic-offset: 4 - * tab-width: 8 - * End: - */ - + +/* + * Copyright (c) 2005-2007 Intel Corporation. All rights reserved. + * Copyright (c) 2002, Network Appliance, Inc. All rights reserved. + * + * This Software is licensed under the terms of the "Common Public + * License" a copy of which is in the file LICENSE.txt in the root + * directory. The license is also available from the Open Source + * Initiative, see http://www.opensource.org/licenses/cpl.php. + * + */ + +/********************************************************************** + * + * MODULE: dapl_ibal_qp.c + * + * PURPOSE: IB QP routines for access to IBAL APIs + * + * $Id: dapl_ibal_qp.c 33 2005-07-11 19:51:17Z ftillier $ + * + **********************************************************************/ + +#include "dapl.h" +#include "dapl_adapter_util.h" +#include "dapl_evd_util.h" +#include "dapl_ibal_util.h" +#include "dapl_ep_util.h" + +#define DAPL_IBAL_QKEY 0 +#define DAPL_IBAL_START_PSN 0 + +extern DAT_RETURN +dapls_ib_cq_late_alloc ( IN ib_pd_handle_t pd_handle, + IN DAPL_EVD *evd_ptr ); + +static void +dapli_ib_qp_async_error_cb( IN ib_async_event_rec_t* p_err_rec ) +{ + DAPL_EP *ep_ptr = (DAPL_EP *)p_err_rec->context; + DAPL_EVD *evd_ptr; + DAPL_IA *ia_ptr; + dapl_ibal_ca_t *p_ca; + dapl_ibal_evd_cb_t *evd_cb; + + dapl_dbg_log (DAPL_DBG_TYPE_ERR,"--> DiQpAEC QP event %s qp ctx %p\n", + ib_get_async_event_str(p_err_rec->code), p_err_rec->context); + dapl_dbg_log (DAPL_DBG_TYPE_ERR,"--> DiQpAEC qp_handle %p qpn %u\n", + ((DAPL_EP *)p_err_rec->context)->qp_handle, + ((DAPL_EP *)p_err_rec->context)->qpn); + + /* + * Verify handles EP, EVD, and hca_handle + */ + if (DAPL_BAD_HANDLE (ep_ptr, DAPL_MAGIC_EP ) || + DAPL_BAD_HANDLE (ep_ptr->param.connect_evd_handle, DAPL_MAGIC_EVD)) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> DiQpAEC: invalid EP %p \n", ep_ptr); + return; + } + ia_ptr = ep_ptr->header.owner_ia; + evd_ptr = ia_ptr->async_error_evd; + + if (DAPL_BAD_HANDLE (evd_ptr, DAPL_MAGIC_EVD) || + ! (evd_ptr->evd_flags & DAT_EVD_ASYNC_FLAG)) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> DiQpAEC: invalid EVD %p \n", evd_ptr); + return; + } + p_ca = (dapl_ibal_ca_t *) ia_ptr->hca_ptr->ib_hca_handle; + if (p_ca == NULL) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> DiQpAEC: can't find %s HCA\n", + (ia_ptr->header.provider)->device_name); + return; + } + + /* find QP error callback using ia_ptr for context */ + evd_cb = dapli_find_evd_cb_by_context (ia_ptr, p_ca); + if ((evd_cb == NULL) || (evd_cb->pfn_async_qp_err_cb == NULL)) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> DiQpAEC: no ERROR cb on p_ca %p found\n", p_ca); + return; + } + + dapl_os_lock (&ep_ptr->header.lock); + ep_ptr->param.ep_state = DAT_EP_STATE_DISCONNECT_PENDING; + dapl_os_unlock (&ep_ptr->header.lock); + + /* force disconnect, QP error state, to insure DTO's get flushed */ + dapls_ib_disconnect ( ep_ptr, DAT_CLOSE_ABRUPT_FLAG ); + + /* maps to dapl_evd_qp_async_error_callback(), context is EP */ + evd_cb->pfn_async_qp_err_cb( (ib_hca_handle_t)p_ca, + ep_ptr->qp_handle, + (ib_error_record_t*)&p_err_rec->code, + ep_ptr ); +} + +/* + * dapls_ib_qp_alloc + * + * Alloc a QP + * + * Input: + * *ia_ptr pointer to DAPL IA + * *ep_ptr pointer to DAPL EP + * *ep_ctx_ptr pointer to DAPL EP context + * + * Output: + * none + * + * Returns: + * DAT_SUCCESS + * DAT_INSUFFICIENT_RESOURCES + * + */ +DAT_RETURN +dapls_ib_qp_alloc ( + IN DAPL_IA *ia_ptr, + IN DAPL_EP *ep_ptr, + IN DAPL_EP *ep_ctx_ptr) +{ + DAT_EP_ATTR *attr; + DAPL_EVD *recv_evd_ptr, *request_evd_ptr; + DAT_RETURN dat_status; + ib_api_status_t ib_status; + ib_qp_create_t qp_create; + ib_pd_handle_t ib_pd_handle; + ib_cq_handle_t cq_recv; + ib_cq_handle_t cq_send; + dapl_ibal_ca_t *p_ca; + dapl_ibal_port_t *p_active_port; + ib_qp_attr_t qp_attr; + dp_ib_cm_handle_t cm_ptr; + + attr = &ep_ptr->param.ep_attr; + + dapl_os_assert ( ep_ptr->param.pz_handle != NULL ); + + ib_pd_handle = ((DAPL_PZ *)ep_ptr->param.pz_handle)->pd_handle; + dapl_os_assert(ib_pd_handle); + recv_evd_ptr = (DAPL_EVD *) ep_ptr->param.recv_evd_handle; + request_evd_ptr = (DAPL_EVD *) ep_ptr->param.request_evd_handle; + + cq_recv = IB_INVALID_HANDLE; + cq_send = IB_INVALID_HANDLE; + + dapl_os_assert ( recv_evd_ptr != DAT_HANDLE_NULL ); + { + cq_recv = (ib_cq_handle_t) recv_evd_ptr->ib_cq_handle; + + if ((cq_recv == IB_INVALID_HANDLE) && + ( 0 != (recv_evd_ptr->evd_flags & ~DAT_EVD_SOFTWARE_FLAG) )) + { + dat_status = dapls_ib_cq_late_alloc ( ib_pd_handle, recv_evd_ptr); + if (dat_status != DAT_SUCCESS) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> %s: failed to create CQ\n","DsQA"); + return (dat_status); + } + + dat_status = dapls_set_cq_notify (ia_ptr, recv_evd_ptr); + + if (dat_status != DAT_SUCCESS) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> %s: failed to enable notify CQ\n","DsQA"); + return (dat_status); + } + + cq_recv = (ib_cq_handle_t) recv_evd_ptr->ib_cq_handle; + dapl_dbg_log (DAPL_DBG_TYPE_EP, + "--> DsQA: alloc_recv_CQ = %p\n", cq_recv); + + } + } + + dapl_os_assert ( request_evd_ptr != DAT_HANDLE_NULL ); + { + cq_send = (ib_cq_handle_t) request_evd_ptr->ib_cq_handle; + + if ((cq_send == IB_INVALID_HANDLE) && + ( 0 != (request_evd_ptr->evd_flags & ~DAT_EVD_SOFTWARE_FLAG) )) + { + dat_status = dapls_ib_cq_late_alloc (ib_pd_handle, request_evd_ptr); + if (dat_status != DAT_SUCCESS) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> %s: failed to create CQ\n","DsQA"); + return (dat_status); + } + + dat_status = dapls_set_cq_notify (ia_ptr, request_evd_ptr); + + if (dat_status != DAT_SUCCESS) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> %s: failed to enable notify CQ\n","DsQA"); + return (dat_status); + } + + cq_send = (ib_cq_handle_t) request_evd_ptr->ib_cq_handle; + dapl_dbg_log (DAPL_DBG_TYPE_EP, + "--> DsQA: alloc_send_CQ = %p\n", cq_send); + } + } + + /* + * Get the CA structure + */ + p_ca = (dapl_ibal_ca_t *) ia_ptr->hca_ptr->ib_hca_handle; + + dapl_os_memzero (&qp_create, sizeof (qp_create)); + qp_create.qp_type = IB_QPT_RELIABLE_CONN; + qp_create.sq_depth = attr->max_request_dtos; + qp_create.rq_depth = attr->max_recv_dtos; + qp_create.sq_sge = attr->max_recv_iov; + qp_create.rq_sge = attr->max_request_iov; + qp_create.h_sq_cq = cq_send; + qp_create.h_rq_cq = cq_recv; + qp_create.sq_signaled = FALSE; + + dapl_dbg_log (DAPL_DBG_TYPE_EP, + "--> DsQA: sqd,iov=%d,%d rqd,iov=%d,%d\n", + attr->max_request_dtos, attr->max_request_iov, + attr->max_recv_dtos, attr->max_recv_iov); + + ib_status = ib_create_qp ( + ib_pd_handle, + &qp_create, + (void *) ep_ctx_ptr /* context */, + dapli_ib_qp_async_error_cb, + &ep_ptr->qp_handle); + + if (ib_status != IB_SUCCESS) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, "--> DsQA: Create QP failed = %s\n", + ib_get_err_str(ib_status)); + return (DAT_INSUFFICIENT_RESOURCES); + } + /* EP-CM linking */ + cm_ptr = ibal_cm_alloc(); + if (!cm_ptr) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, "--> DsQA: Create CM failed\n"); + return (DAT_INSUFFICIENT_RESOURCES); + } + cm_ptr->ib_cm.h_qp = ep_ptr->qp_handle; + cm_ptr->ep = ep_ptr; + dapl_ep_link_cm(ep_ptr, cm_ptr); + + dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsQA: EP=%p, tEVD=%p, rEVD=%p QP=%p\n", + ep_ptr, ep_ptr->param.request_evd_handle, + ep_ptr->param.recv_evd_handle, + ep_ptr->qp_handle ); + + ep_ptr->qp_state = IB_QPS_RESET; + + p_active_port = dapli_ibal_get_port(p_ca,(uint8_t)ia_ptr->hca_ptr->port_num); + + if (NULL == p_active_port) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> DsQA: Port %d is not available = %d\n", + ia_ptr->hca_ptr->port_num, __LINE__); + return (DAT_INVALID_STATE); + } + + ib_status = dapls_modify_qp_state_to_init ( ep_ptr->qp_handle, + &ep_ptr->param.ep_attr, + p_active_port ); + + if ( ib_status != IB_SUCCESS ) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> DsQA: Change QP state to INIT failed = %s\n", + ib_get_err_str(ib_status)); + return (DAT_INVALID_HANDLE); + } + ib_status = ib_query_qp ( ep_ptr->qp_handle, &qp_attr ); + + ep_ptr->qp_state = qp_attr.state; + ep_ptr->qpn = qp_attr.num; + + dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsQAQA: EP:%p new_QP %p state %s\n", + ep_ptr, + ep_ptr->qp_handle, + ib_get_port_state_str(ep_ptr->qp_state)); + + return (DAT_SUCCESS); +} + + +/* + * dapls_ib_qp_free + * + * Free a QP + * + * Input: + * *ia_ptr pointer to IA structure + * *ep_ptr pointer to EP structure + * + * Output: + * none + * + * Returns: + * none + * + */ +DAT_RETURN +dapls_ib_qp_free ( + IN DAPL_IA *ia_ptr, + IN DAPL_EP *ep_ptr ) +{ + ib_qp_handle_t qp; + + UNREFERENCED_PARAMETER(ia_ptr); + + dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsQF: free %p, state %s\n", + ep_ptr->qp_handle, + ib_get_port_state_str(ep_ptr->qp_state)); + + dapl_os_lock(&ep_ptr->header.lock); + if (( ep_ptr->qp_handle != IB_INVALID_HANDLE )) + { + qp = ep_ptr->qp_handle; + ep_ptr->qp_handle = IB_INVALID_HANDLE; + dapl_os_unlock(&ep_ptr->header.lock); + + dapls_modify_qp_state_to_error(qp); + dapls_ep_flush_cqs(ep_ptr); + + ib_destroy_qp ( qp, ib_sync_destroy ); + dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsQF: freed QP %p\n", + ep_ptr->qp_handle ); + } else { + dapl_os_unlock(&ep_ptr->header.lock); + } + + return DAT_SUCCESS; +} + + +/* + * dapls_ib_qp_modify + * + * Set the QP to the parameters specified in an EP_PARAM + * + * We can't be sure what state the QP is in so we first obtain the state + * from the driver. The EP_PARAM structure that is provided has been + * sanitized such that only non-zero values are valid. + * + * Input: + * *ia_ptr pointer to DAPL IA + * *ep_ptr pointer to DAPL EP + * *ep_attr pointer to DAT EP attribute + * + * Output: + * none + * + * Returns: + * DAT_SUCCESS + * DAT_INSUFFICIENT_RESOURCES + * DAT_INVALID_PARAMETER + * + */ +DAT_RETURN +dapls_ib_qp_modify ( + IN DAPL_IA *ia_ptr, + IN DAPL_EP *ep_ptr, + IN DAT_EP_ATTR *ep_attr ) +{ + ib_qp_attr_t qp_attr; + ib_api_status_t ib_status; + ib_qp_handle_t qp_handle; + ib_qp_state_t qp_state; + ib_qp_mod_t qp_mod; + ib_av_attr_t *p_av_attr; + ib_qp_opts_t *p_qp_opts; + uint32_t *p_sq_depth, *p_rq_depth; + DAT_BOOLEAN need_modify; + DAT_RETURN dat_status; + + qp_handle = ep_ptr->qp_handle; + need_modify = DAT_FALSE; + dat_status = DAT_SUCCESS; + if ( ia_ptr == NULL || ia_ptr->header.magic != DAPL_MAGIC_IA ) + { + dat_status = DAT_INVALID_HANDLE; + goto bail; + } + /* + * Query the QP to get the current state. + */ + ib_status = ib_query_qp ( qp_handle, &qp_attr ); + + if ( ib_status != IB_SUCCESS ) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, "--> DsIQM: Query QP failed = %s\n", + ib_get_err_str(ib_status)); + dat_status = DAT_INTERNAL_ERROR; + goto bail; + } + + qp_state = qp_attr.state; + + dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsIQM: modify qp state=%d\n",qp_state); + /* + * Check if we have the right qp_state or not + */ + if ( (qp_state != IB_QPS_RTR ) && (qp_state != IB_QPS_RTS ) ) + { + dapl_dbg_log (DAPL_DBG_TYPE_EP, + "--> DsIQM: postpone to modify qp to EP values later\n"); + dat_status = DAT_SUCCESS; + goto bail; + } + + dapl_os_memzero (&qp_mod, sizeof (ib_qp_mod_t)); + + if (qp_state == IB_QPS_RTR) + { + p_av_attr = &qp_mod.state.rtr.primary_av; + p_qp_opts = &qp_mod.state.rtr.opts; + p_sq_depth = &qp_mod.state.rtr.sq_depth; + p_rq_depth = &qp_mod.state.rtr.rq_depth; + } + else + { + /* + * RTS does not have primary_av field + */ + p_av_attr = &qp_mod.state.rts.alternate_av; + p_qp_opts = &qp_mod.state.rts.opts; + p_sq_depth = &qp_mod.state.rts.sq_depth; + p_rq_depth = &qp_mod.state.rts.rq_depth; + } + + if ( (ep_attr->max_recv_dtos > 0) && + ((DAT_UINT32)ep_attr->max_recv_dtos != qp_attr.rq_depth) ) + { + dapl_dbg_log (DAPL_DBG_TYPE_EP,"--> DsIQM: rq_depth modified (%d,%d)\n", + qp_attr.rq_depth, ep_attr->max_recv_dtos); + + *p_rq_depth = ep_attr->max_recv_dtos; + *p_qp_opts |= IB_MOD_QP_RQ_DEPTH; + need_modify = DAT_TRUE; + } + + if ( (ep_attr->max_request_dtos > 0) && + ((DAT_UINT32)ep_attr->max_request_dtos != qp_attr.sq_depth) ) + { + dapl_dbg_log (DAPL_DBG_TYPE_EP, + "--> DsIQM: sq_depth modified (%d,%d)\n", + qp_attr.sq_depth, ep_attr->max_request_dtos); + + *p_sq_depth = ep_attr->max_request_dtos; + *p_qp_opts |= IB_MOD_QP_SQ_DEPTH; + need_modify = DAT_TRUE; + } + + qp_mod.req_state = qp_state; + + if ( need_modify == DAT_TRUE ) + { + ib_status = ib_modify_qp (qp_handle, &qp_mod); + if ( ib_status != IB_SUCCESS) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, "--> %s: ib_status = %d\n", + "DsIQM", ib_status); + dat_status = DAT_INTERNAL_ERROR; + } + } + +bail: + + return dat_status; +} + + +ib_api_status_t +dapls_modify_qp_state_to_error ( ib_qp_handle_t qp_handle ) +{ + ib_qp_mod_t qp_mod; + ib_api_status_t ib_status; + + dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsIQM_Err: QP state change --> Err\n"); + + dapl_os_memzero (&qp_mod, sizeof (ib_qp_mod_t)); + + qp_mod.req_state = IB_QPS_ERROR; + + ib_status = ib_modify_qp (qp_handle, &qp_mod); + + return (ib_status); +} + + +ib_api_status_t +dapls_modify_qp_state_to_reset ( ib_qp_handle_t qp_handle ) +{ + ib_qp_mod_t qp_mod; + ib_api_status_t ib_status; + + dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsIQM_RESET: QP state change\n"); + + dapl_os_memzero (&qp_mod, sizeof (ib_qp_mod_t)); + + qp_mod.req_state = IB_QPS_RESET; + + ib_status = ib_modify_qp (qp_handle, &qp_mod); + + return (ib_status); +} + + +ib_api_status_t +dapls_modify_qp_state_to_init ( + IN ib_qp_handle_t qp_handle, + IN DAT_EP_ATTR *p_attr, + IN dapl_ibal_port_t *p_port ) +{ + ib_qp_mod_t qp_mod; + ib_api_status_t ib_status; + + dapl_os_memzero (&qp_mod, sizeof (ib_qp_mod_t)); + + qp_mod.req_state = IB_QPS_INIT; + qp_mod.state.init.primary_port = p_port->p_attr->port_num; + qp_mod.state.init.qkey = DAPL_IBAL_QKEY; + qp_mod.state.init.pkey_index = 0; + qp_mod.state.init.access_ctrl = IB_AC_LOCAL_WRITE | + IB_AC_RDMA_WRITE | + IB_AC_MW_BIND | + IB_AC_ATOMIC; + if ((p_attr->max_rdma_read_in > 0) || (p_attr->max_rdma_read_out > 0)) + { + qp_mod.state.init.access_ctrl |= IB_AC_RDMA_READ; + } + ib_status = ib_modify_qp (qp_handle, &qp_mod); + + dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsIQM_INIT: QP(%p) state change, %s\n", + qp_handle, ib_get_err_str(ib_status)); + + return (ib_status); +} + + +ib_api_status_t +dapls_modify_qp_state_to_rtr ( + ib_qp_handle_t qp_handle, + ib_net32_t dest_qp, + ib_lid_t dest_lid, + dapl_ibal_port_t *p_port) +{ + ib_qp_mod_t qp_mod; + ib_api_status_t ib_status; + + dapl_os_memzero (&qp_mod, sizeof (ib_qp_mod_t)); + + qp_mod.req_state = IB_QPS_RTR; + qp_mod.state.rtr.rq_psn = DAPL_IBAL_START_PSN; + qp_mod.state.rtr.dest_qp = dest_qp; + qp_mod.state.rtr.primary_av.port_num = p_port->p_attr->port_num; + qp_mod.state.rtr.primary_av.sl = 0; + qp_mod.state.rtr.primary_av.dlid = dest_lid; + qp_mod.state.rtr.primary_av.grh_valid = 0; /* FALSE */ + qp_mod.state.rtr.primary_av.static_rate = IB_PATH_RECORD_RATE_10_GBS; + qp_mod.state.rtr.primary_av.path_bits = 0; + qp_mod.state.rtr.primary_av.conn.path_mtu = p_port->p_attr->mtu; + qp_mod.state.rtr.primary_av.conn.local_ack_timeout = 7; + qp_mod.state.rtr.primary_av.conn.seq_err_retry_cnt = 7; + qp_mod.state.rtr.primary_av.conn.rnr_retry_cnt = IB_RNR_RETRY_CNT; + qp_mod.state.rtr.resp_res = 4; // in-flight RDMAs + qp_mod.state.rtr.rnr_nak_timeout = IB_RNR_NAK_TIMEOUT; + + ib_status = ib_modify_qp (qp_handle, &qp_mod); + + dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsIQM_RTR: QP(%p) state change %s\n", + qp_handle, ib_get_err_str(ib_status)); + + return (ib_status); +} + +ib_api_status_t +dapls_modify_qp_state_to_rts ( ib_qp_handle_t qp_handle ) +{ + ib_qp_mod_t qp_mod; + ib_api_status_t ib_status; + + dapl_os_memzero (&qp_mod, sizeof (ib_qp_mod_t)); + + qp_mod.req_state = IB_QPS_RTS; + qp_mod.state.rts.sq_psn = DAPL_IBAL_START_PSN; + qp_mod.state.rts.retry_cnt = 7; + qp_mod.state.rts.rnr_retry_cnt = IB_RNR_RETRY_CNT; + qp_mod.state.rtr.rnr_nak_timeout = IB_RNR_NAK_TIMEOUT; + qp_mod.state.rts.local_ack_timeout = 7; + qp_mod.state.rts.init_depth = 4; + + ib_status = ib_modify_qp (qp_handle, &qp_mod); + + dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsIQM_RTS: QP(%p) state change %s\n", + qp_handle, ib_get_err_str(ib_status)); + + return (ib_status); +} + + +/* + * dapls_ib_reinit_ep + * + * Move the QP to INIT state again. + * + * Input: + * ep_ptr DAPL_EP + * + * Output: + * none + * + * Returns: + * void + * + */ +void +dapls_ib_reinit_ep ( IN DAPL_EP *ep_ptr ) +{ + DAPL_IA *ia_ptr; + ib_api_status_t ib_status; + dapl_ibal_ca_t *p_ca; + dapl_ibal_port_t *p_active_port; + + dapl_dbg_log (DAPL_DBG_TYPE_EP, + "--> DsIQM_REINIT: EP(%p) QP(%p) state change\n", + ep_ptr, ep_ptr->qp_handle ); + + if ( ep_ptr->param.ep_state != DAT_EP_STATE_DISCONNECTED ) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, "--> DsIRE: EP invalid state(%d)\n", + ep_ptr->param.ep_state); + return /*DAT_INVALID_STATE*/; + } + + ia_ptr = ep_ptr->header.owner_ia; + + /* Re-create QP if cleaned up, alloc will return init state */ + if ( ep_ptr->qp_handle == IB_INVALID_HANDLE ) + { + dapl_dbg_log (DAPL_DBG_TYPE_EP, + "--> DsIRE: !EP(%p)->qp_handle, re-create QP\n",ep_ptr); + ib_status = dapls_ib_qp_alloc ( ia_ptr, ep_ptr, ep_ptr ); + if ( ib_status != IB_SUCCESS ) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> DsIRE: failed to move qp to RESET status = %s\n", + ib_get_err_str(ib_status)); + } + return /*ib_status*/; + } + + ib_status = dapls_modify_qp_state_to_reset (ep_ptr->qp_handle); + + if ( ib_status != IB_SUCCESS ) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> DsIRE: failed to move qp to RESET status = %s\n", + ib_get_err_str(ib_status)); + return /*DAT_INTERNAL_ERROR*/; + } + + ep_ptr->qp_state = IB_QPS_RESET; + + p_ca = (dapl_ibal_ca_t *) ia_ptr->hca_ptr->ib_hca_handle; + p_active_port = dapli_ibal_get_port ( p_ca, + (uint8_t)ia_ptr->hca_ptr->port_num ); + if (NULL == p_active_port) + { + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> DsIRE: Port %d is not available = %d\n", + ia_ptr->hca_ptr->port_num, __LINE__); + return /*DAT_INTERNAL_ERROR*/; + } + + /* May fail if QP still RESET and in timewait, keep in reset state */ + ib_status = dapls_modify_qp_state_to_init ( ep_ptr->qp_handle, + &ep_ptr->param.ep_attr, + p_active_port); + if ( ib_status != IB_SUCCESS ) + { + ep_ptr->qp_state = IB_QPS_RESET; + + dapl_dbg_log (DAPL_DBG_TYPE_ERR, + "--> DsIRE: failed to move qp to INIT status %s\n", + ib_get_err_str(ib_status)); + return /*DAT_INTERNAL_ERROR*/; + } + ep_ptr->qp_state = IB_QPS_INIT; +} + + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * tab-width: 8 + * End: + */ + diff --git a/trunk/ulp/dapl2/dapl/openib_cma/device.c b/trunk/ulp/dapl2/dapl/openib_cma/device.c index e4ff22eb..454c394f 100644 --- a/trunk/ulp/dapl2/dapl/openib_cma/device.c +++ b/trunk/ulp/dapl2/dapl/openib_cma/device.c @@ -289,7 +289,6 @@ DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, IN DAPL_HCA * hca_ptr) dapl_log(DAPL_DBG_TYPE_ERR, " open_hca: rdma_bind ERR %s." " Is %s configured?\n", strerror(errno), hca_name); - rdma_destroy_id(cm_id); return DAT_INVALID_ADDRESS; } diff --git a/trunk/ulp/dapl2/dapl/openib_common/dapl_ib_common.h b/trunk/ulp/dapl2/dapl/openib_common/dapl_ib_common.h index d3cf2e05..c9423179 100644 --- a/trunk/ulp/dapl2/dapl/openib_common/dapl_ib_common.h +++ b/trunk/ulp/dapl2/dapl/openib_common/dapl_ib_common.h @@ -1,403 +1,404 @@ -/* - * Copyright (c) 2009 Intel Corporation. All rights reserved. - * - * This Software is licensed under one of the following licenses: - * - * 1) under the terms of the "Common Public License 1.0" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/cpl.php. - * - * 2) under the terms of the "The BSD License" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/bsd-license.php. - * - * 3) under the terms of the "GNU General Public License (GPL) Version 2" a - * copy of which is available from the Open Source Initiative, see - * http://www.opensource.org/licenses/gpl-license.php. - * - * Licensee has the right to choose one of the above licenses. - * - * Redistributions of source code must retain the above copyright - * notice and one of the license notices. - * - * Redistributions in binary form must reproduce both the above copyright - * notice, one of the license notices in the documentation - * and/or other materials provided with the distribution. - */ - -/* - * Definitions common to all OpenIB providers, cma, scm, ucm - */ - -#ifndef _DAPL_IB_COMMON_H_ -#define _DAPL_IB_COMMON_H_ - -#include - -#ifdef DAT_EXTENSIONS -#include -#endif - -#ifndef __cplusplus -#define false 0 -#define true 1 -#endif /*__cplusplus */ - -/* Typedefs to map common DAPL provider types to IB verbs */ -typedef struct ibv_qp *ib_qp_handle_t; -typedef struct ibv_cq *ib_cq_handle_t; -typedef struct ibv_pd *ib_pd_handle_t; -typedef struct ibv_mr *ib_mr_handle_t; -typedef struct ibv_mw *ib_mw_handle_t; -typedef struct ibv_wc ib_work_completion_t; -typedef struct ibv_ah *ib_ah_handle_t; -typedef union ibv_gid *ib_gid_handle_t; - -/* HCA context type maps to IB verbs */ -typedef struct ibv_context *ib_hca_handle_t; -typedef ib_hca_handle_t dapl_ibal_ca_t; - -/* QP info to exchange, wire protocol version for these CM's */ -#define DCM_VER 6 - -/* CM private data areas, same for all operations */ -#define DCM_MAX_PDATA_SIZE 118 - -/* - * UCM DAPL IB/QP address (lid, qp_num, gid) mapping to - * DAT_IA_ADDRESS_PTR, DAT_SOCK_ADDR2 (28 bytes) - * For applications, like MPI, that exchange IA_ADDRESS - * across the fabric before connecting, it eliminates the - * overhead of name and address resolution to the destination's - * CM services. UCM provider uses the following for - * DAT_IA_ADDRESS. Note: family == AF_INET6 to insure proper - * callee storage for address. - */ -union dcm_addr { - DAT_SOCK_ADDR6 so; - struct { - uint16_t family; /* sin6_family */ - uint16_t lid; /* sin6_port */ - uint32_t qpn; /* sin6_flowinfo */ - uint8_t gid[16]; /* sin6_addr */ - uint16_t port; /* sin6_scope_id */ - uint8_t sl; - uint8_t qp_type; - } ib; -}; - -/* 256 bytes total; default max_inline_send, min IB MTU size */ -typedef struct _ib_cm_msg -{ - uint16_t ver; - uint16_t op; - uint16_t sport; /* src cm port */ - uint16_t dport; /* dst cm port */ - uint32_t sqpn; /* src cm qpn */ - uint32_t dqpn; /* dst cm qpn */ - uint16_t p_size; - uint8_t resv[14]; - union dcm_addr saddr; - union dcm_addr daddr; - union dcm_addr saddr_alt; - union dcm_addr daddr_alt; - uint8_t p_data[DCM_MAX_PDATA_SIZE]; - -} ib_cm_msg_t; - -/* CM events */ -typedef enum { - IB_CME_CONNECTED, - IB_CME_DISCONNECTED, - IB_CME_DISCONNECTED_ON_LINK_DOWN, - IB_CME_CONNECTION_REQUEST_PENDING, - IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA, - IB_CME_CONNECTION_REQUEST_ACKED, - IB_CME_DESTINATION_REJECT, - IB_CME_DESTINATION_REJECT_PRIVATE_DATA, - IB_CME_DESTINATION_UNREACHABLE, - IB_CME_TOO_MANY_CONNECTION_REQUESTS, - IB_CME_LOCAL_FAILURE, - IB_CME_BROKEN, - IB_CME_TIMEOUT -} ib_cm_events_t; - -/* Operation and state mappings */ -typedef int ib_send_op_type_t; -typedef struct ibv_sge ib_data_segment_t; -typedef enum ibv_qp_state ib_qp_state_t; -typedef enum ibv_event_type ib_async_event_type; -typedef struct ibv_async_event ib_error_record_t; - -/* CQ notifications */ -typedef enum -{ - IB_NOTIFY_ON_NEXT_COMP, - IB_NOTIFY_ON_SOLIC_COMP - -} ib_notification_type_t; - -/* other mappings */ -typedef int ib_bool_t; -typedef union ibv_gid GID; -typedef char *IB_HCA_NAME; -typedef uint16_t ib_hca_port_t; - -/* Definitions */ -#define IB_INVALID_HANDLE NULL - -/* inline send rdma threshold */ -#define INLINE_SEND_IWARP_DEFAULT 64 -#define INLINE_SEND_IB_DEFAULT 256 - -/* qkey for UD QP's */ -#define DAT_UD_QKEY 0x78654321 - -/* RC timer - retry count defaults */ -#define DCM_ACK_TIMER 16 /* 5 bits, 4.096us*2^ack_timer. 16== 268ms */ -#define DCM_ACK_RETRY 7 /* 3 bits, 7 * 268ms = 1.8 seconds */ -#define DCM_RNR_TIMER 12 /* 5 bits, 12 =.64ms, 28 =163ms, 31 =491ms */ -#define DCM_RNR_RETRY 7 /* 3 bits, 7 == infinite */ -#define DCM_IB_MTU 2048 - -/* Global routing defaults */ -#define DCM_GLOBAL 0 /* global routing is disabled */ -#define DCM_HOP_LIMIT 0xff -#define DCM_TCLASS 0 - -/* DAPL uCM timers, default queue sizes */ -#define DCM_RETRY_CNT 15 -#define DCM_REP_TIME 800 /* reply timeout in m_secs */ -#define DCM_RTU_TIME 400 /* rtu timeout in m_secs */ -#define DCM_QP_SIZE 500 /* uCM tx, rx qp size */ -#define DCM_CQ_SIZE 500 /* uCM cq size */ - -/* DTO OPs, ordered for DAPL ENUM definitions */ -#define OP_RDMA_WRITE IBV_WR_RDMA_WRITE -#define OP_RDMA_WRITE_IMM IBV_WR_RDMA_WRITE_WITH_IMM -#define OP_SEND IBV_WR_SEND -#define OP_SEND_IMM IBV_WR_SEND_WITH_IMM -#define OP_RDMA_READ IBV_WR_RDMA_READ -#define OP_COMP_AND_SWAP IBV_WR_ATOMIC_CMP_AND_SWP -#define OP_FETCH_AND_ADD IBV_WR_ATOMIC_FETCH_AND_ADD -#define OP_RECEIVE 7 /* internal op */ -#define OP_RECEIVE_IMM 8 /* rdma write with immed, internel op */ -#define OP_RECEIVE_MSG_IMM 9 /* recv msg with immed, internel op */ -#define OP_BIND_MW 10 /* internal op */ -#define OP_SEND_UD 11 /* internal op */ -#define OP_RECV_UD 12 /* internal op */ -#define OP_INVALID 0xff - -/* Definitions to map QP state */ -#define IB_QP_STATE_RESET IBV_QPS_RESET -#define IB_QP_STATE_INIT IBV_QPS_INIT -#define IB_QP_STATE_RTR IBV_QPS_RTR -#define IB_QP_STATE_RTS IBV_QPS_RTS -#define IB_QP_STATE_SQD IBV_QPS_SQD -#define IB_QP_STATE_SQE IBV_QPS_SQE -#define IB_QP_STATE_ERROR IBV_QPS_ERR - -/* Definitions for ibverbs/mthca return codes, should be defined in verbs.h */ -/* some are errno and some are -n values */ - -/** - * ibv_get_device_name - Return kernel device name - * ibv_get_device_guid - Return device's node GUID - * ibv_open_device - Return ibv_context or NULL - * ibv_close_device - Return 0, (errno?) - * ibv_get_async_event - Return 0, -1 - * ibv_alloc_pd - Return ibv_pd, NULL - * ibv_dealloc_pd - Return 0, errno - * ibv_reg_mr - Return ibv_mr, NULL - * ibv_dereg_mr - Return 0, errno - * ibv_create_cq - Return ibv_cq, NULL - * ibv_destroy_cq - Return 0, errno - * ibv_get_cq_event - Return 0 & ibv_cq/context, int - * ibv_poll_cq - Return n & ibv_wc, 0 ok, -1 empty, -2 error - * ibv_req_notify_cq - Return 0 (void?) - * ibv_create_qp - Return ibv_qp, NULL - * ibv_modify_qp - Return 0, errno - * ibv_destroy_qp - Return 0, errno - * ibv_post_send - Return 0, -1 & bad_wr - * ibv_post_recv - Return 0, -1 & bad_wr - */ - -/* async handler for DTO, CQ, QP, and unafiliated */ -typedef void (*ib_async_dto_handler_t)( - IN ib_hca_handle_t ib_hca_handle, - IN ib_error_record_t *err_code, - IN void *context); - -typedef void (*ib_async_cq_handler_t)( - IN ib_hca_handle_t ib_hca_handle, - IN ib_cq_handle_t ib_cq_handle, - IN ib_error_record_t *err_code, - IN void *context); - -typedef void (*ib_async_qp_handler_t)( - IN ib_hca_handle_t ib_hca_handle, - IN ib_qp_handle_t ib_qp_handle, - IN ib_error_record_t *err_code, - IN void *context); - -typedef void (*ib_async_handler_t)( - IN ib_hca_handle_t ib_hca_handle, - IN ib_error_record_t *err_code, - IN void *context); - -typedef enum -{ - IB_THREAD_INIT, - IB_THREAD_CREATE, - IB_THREAD_RUN, - IB_THREAD_CANCEL, - IB_THREAD_EXIT - -} ib_thread_state_t; - -typedef enum dapl_cm_op -{ - DCM_REQ = 1, - DCM_REP, - DCM_REJ_USER, /* user reject */ - DCM_REJ_CM, /* cm reject, no SID */ - DCM_RTU, - DCM_DREQ, - DCM_DREP - -} DAPL_CM_OP; - -typedef enum dapl_cm_state -{ - DCM_INIT, - DCM_LISTEN, - DCM_CONN_PENDING, - DCM_REP_PENDING, - DCM_ACCEPTING, - DCM_ACCEPTING_DATA, - DCM_ACCEPTED, - DCM_REJECTING, - DCM_REJECTED, - DCM_CONNECTED, - DCM_RELEASE, - DCM_DISC_PENDING, - DCM_DISCONNECTED, - DCM_DESTROY, - DCM_RTU_PENDING, - DCM_DISC_RECV, - DCM_FREE, - -} DAPL_CM_STATE; - -/* provider specfic fields for shared memory support */ -typedef uint32_t ib_shm_transport_t; - -/* prototypes */ -int32_t dapls_ib_init(void); -int32_t dapls_ib_release(void); - -/* util.c */ -enum ibv_mtu dapl_ib_mtu(int mtu); -char *dapl_ib_mtu_str(enum ibv_mtu mtu); -int getipaddr_netdev(char *name, char *addr, int addr_len); -DAT_RETURN getlocalipaddr(char *addr, int addr_len); - -/* qp.c */ -DAT_RETURN dapls_modify_qp_ud(IN DAPL_HCA *hca, IN ib_qp_handle_t qp); -DAT_RETURN dapls_modify_qp_state(IN ib_qp_handle_t qp_handle, - IN ib_qp_state_t qp_state, - IN uint32_t qpn, - IN uint16_t lid, - IN ib_gid_handle_t gid); -ib_ah_handle_t dapls_create_ah( IN DAPL_HCA *hca, - IN ib_pd_handle_t pd, - IN ib_qp_handle_t qp, - IN uint16_t lid, - IN ib_gid_handle_t gid); - -/* inline functions */ -STATIC _INLINE_ IB_HCA_NAME dapl_ib_convert_name (IN char *name) -{ - /* use ascii; name of local device */ - return dapl_os_strdup(name); -} - -STATIC _INLINE_ void dapl_ib_release_name (IN IB_HCA_NAME name) -{ - return; -} - -/* - * Convert errno to DAT_RETURN values - */ -STATIC _INLINE_ DAT_RETURN -dapl_convert_errno( IN int err, IN const char *str ) -{ - if (!err) return DAT_SUCCESS; - -#if DAPL_DBG - if ((err != EAGAIN) && (err != ETIMEDOUT)) - dapl_dbg_log (DAPL_DBG_TYPE_ERR," %s %s\n", str, strerror(err)); -#endif - - switch( err ) - { - case EOVERFLOW : return DAT_LENGTH_ERROR; - case EACCES : return DAT_PRIVILEGES_VIOLATION; - case EPERM : return DAT_PROTECTION_VIOLATION; - case EINVAL : return DAT_INVALID_HANDLE; - case EISCONN : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_CONNECTED; - case ECONNREFUSED : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_NOTREADY; - case ETIMEDOUT : return DAT_TIMEOUT_EXPIRED; - case ENETUNREACH: return DAT_INVALID_ADDRESS | DAT_INVALID_ADDRESS_UNREACHABLE; - case EADDRINUSE : return DAT_CONN_QUAL_IN_USE; - case EALREADY : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_ACTCONNPENDING; - case ENOMEM : return DAT_INSUFFICIENT_RESOURCES; - case EAGAIN : return DAT_QUEUE_EMPTY; - case EINTR : return DAT_INTERRUPTED_CALL; - case EAFNOSUPPORT : return DAT_INVALID_ADDRESS | DAT_INVALID_ADDRESS_MALFORMED; - case EFAULT : - default : return DAT_INTERNAL_ERROR; - } - } - -STATIC _INLINE_ char * dapl_cm_state_str(IN int st) -{ - static char *state[] = { - "CM_INIT", - "CM_LISTEN", - "CM_CONN_PENDING", - "CM_REP_PENDING", - "CM_ACCEPTING", - "CM_ACCEPTING_DATA", - "CM_ACCEPTED", - "CM_REJECTING", - "CM_REJECTED", - "CM_CONNECTED", - "CM_RELEASE", - "CM_DISC_PENDING", - "CM_DISCONNECTED", - "CM_DESTROY", - "CM_RTU_PENDING", - "CM_DISC_RECV", - "CM_FREE" - }; - return ((st < 0 || st > 16) ? "Invalid CM state?" : state[st]); -} - -STATIC _INLINE_ char * dapl_cm_op_str(IN int op) -{ - static char *ops[] = { - "INVALID", - "REQ", - "REP", - "REJ_USER", - "REJ_CM", - "RTU", - "DREQ", - "DREP", - }; - return ((op < 1 || op > 7) ? "Invalid OP?" : ops[op]); -} - -#endif /* _DAPL_IB_COMMON_H_ */ +/* + * Copyright (c) 2009 Intel Corporation. All rights reserved. + * + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + */ + +/* + * Definitions common to all OpenIB providers, cma, scm, ucm + */ + +#ifndef _DAPL_IB_COMMON_H_ +#define _DAPL_IB_COMMON_H_ + +#include + +#ifdef DAT_EXTENSIONS +#include +#endif + +#ifndef __cplusplus +#define false 0 +#define true 1 +#endif /*__cplusplus */ + +/* Typedefs to map common DAPL provider types to IB verbs */ +typedef struct ibv_qp *ib_qp_handle_t; +typedef struct ibv_cq *ib_cq_handle_t; +typedef struct ibv_pd *ib_pd_handle_t; +typedef struct ibv_mr *ib_mr_handle_t; +typedef struct ibv_mw *ib_mw_handle_t; +typedef struct ibv_wc ib_work_completion_t; +typedef struct ibv_ah *ib_ah_handle_t; +typedef union ibv_gid *ib_gid_handle_t; + +/* HCA context type maps to IB verbs */ +typedef struct ibv_context *ib_hca_handle_t; +typedef ib_hca_handle_t dapl_ibal_ca_t; + +/* QP info to exchange, wire protocol version for these CM's */ +#define DCM_VER 6 + +/* CM private data areas, same for all operations */ +#define DCM_MAX_PDATA_SIZE 118 + +/* + * UCM DAPL IB/QP address (lid, qp_num, gid) mapping to + * DAT_IA_ADDRESS_PTR, DAT_SOCK_ADDR2 (28 bytes) + * For applications, like MPI, that exchange IA_ADDRESS + * across the fabric before connecting, it eliminates the + * overhead of name and address resolution to the destination's + * CM services. UCM provider uses the following for + * DAT_IA_ADDRESS. Note: family == AF_INET6 to insure proper + * callee storage for address. + */ +union dcm_addr { + DAT_SOCK_ADDR6 so; + struct { + uint16_t family; /* sin6_family */ + uint16_t lid; /* sin6_port */ + uint32_t qpn; /* sin6_flowinfo */ + uint8_t gid[16]; /* sin6_addr */ + uint16_t port; /* sin6_scope_id */ + uint8_t sl; + uint8_t qp_type; + } ib; +}; + +/* 256 bytes total; default max_inline_send, min IB MTU size */ +typedef struct _ib_cm_msg +{ + uint16_t ver; + uint16_t op; + uint16_t sport; /* src cm port */ + uint16_t dport; /* dst cm port */ + uint32_t sqpn; /* src cm qpn */ + uint32_t dqpn; /* dst cm qpn */ + uint16_t p_size; + uint8_t resv[14]; + union dcm_addr saddr; + union dcm_addr daddr; + union dcm_addr saddr_alt; + union dcm_addr daddr_alt; + uint8_t p_data[DCM_MAX_PDATA_SIZE]; + +} ib_cm_msg_t; + +/* CM events */ +typedef enum { + IB_CME_CONNECTED, + IB_CME_DISCONNECTED, + IB_CME_DISCONNECTED_ON_LINK_DOWN, + IB_CME_CONNECTION_REQUEST_PENDING, + IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA, + IB_CME_CONNECTION_REQUEST_ACKED, + IB_CME_DESTINATION_REJECT, + IB_CME_DESTINATION_REJECT_PRIVATE_DATA, + IB_CME_DESTINATION_UNREACHABLE, + IB_CME_TOO_MANY_CONNECTION_REQUESTS, + IB_CME_LOCAL_FAILURE, + IB_CME_BROKEN, + IB_CME_TIMEOUT +} ib_cm_events_t; + +/* Operation and state mappings */ +typedef int ib_send_op_type_t; +typedef struct ibv_sge ib_data_segment_t; +typedef enum ibv_qp_state ib_qp_state_t; +typedef enum ibv_event_type ib_async_event_type; +typedef struct ibv_async_event ib_error_record_t; + +/* CQ notifications */ +typedef enum +{ + IB_NOTIFY_ON_NEXT_COMP, + IB_NOTIFY_ON_SOLIC_COMP + +} ib_notification_type_t; + +/* other mappings */ +typedef int ib_bool_t; +typedef union ibv_gid GID; +typedef char *IB_HCA_NAME; +typedef uint16_t ib_hca_port_t; + +/* Definitions */ +#define IB_INVALID_HANDLE NULL + +/* inline send rdma threshold */ +#define INLINE_SEND_IWARP_DEFAULT 64 +#define INLINE_SEND_IB_DEFAULT 256 + +/* qkey for UD QP's */ +#define DAT_UD_QKEY 0x78654321 + +/* RC timer - retry count defaults */ +#define DCM_ACK_TIMER 16 /* 5 bits, 4.096us*2^ack_timer. 16== 268ms */ +#define DCM_ACK_RETRY 7 /* 3 bits, 7 * 268ms = 1.8 seconds */ +#define DCM_RNR_TIMER 12 /* 5 bits, 12 =.64ms, 28 =163ms, 31 =491ms */ +#define DCM_RNR_RETRY 7 /* 3 bits, 7 == infinite */ +#define DCM_IB_MTU 2048 + +/* Global routing defaults */ +#define DCM_GLOBAL 0 /* global routing is disabled */ +#define DCM_HOP_LIMIT 0xff +#define DCM_TCLASS 0 + +/* DAPL uCM timers, default queue sizes */ +#define DCM_RETRY_CNT 15 +#define DCM_REP_TIME 800 /* reply timeout in m_secs */ +#define DCM_RTU_TIME 400 /* rtu timeout in m_secs */ +#define DCM_QP_SIZE 500 /* uCM tx, rx qp size */ +#define DCM_CQ_SIZE 500 /* uCM cq size */ +#define DCM_TX_BURST 50 /* uCM signal, every TX burst msgs posted */ + +/* DTO OPs, ordered for DAPL ENUM definitions */ +#define OP_RDMA_WRITE IBV_WR_RDMA_WRITE +#define OP_RDMA_WRITE_IMM IBV_WR_RDMA_WRITE_WITH_IMM +#define OP_SEND IBV_WR_SEND +#define OP_SEND_IMM IBV_WR_SEND_WITH_IMM +#define OP_RDMA_READ IBV_WR_RDMA_READ +#define OP_COMP_AND_SWAP IBV_WR_ATOMIC_CMP_AND_SWP +#define OP_FETCH_AND_ADD IBV_WR_ATOMIC_FETCH_AND_ADD +#define OP_RECEIVE 7 /* internal op */ +#define OP_RECEIVE_IMM 8 /* rdma write with immed, internel op */ +#define OP_RECEIVE_MSG_IMM 9 /* recv msg with immed, internel op */ +#define OP_BIND_MW 10 /* internal op */ +#define OP_SEND_UD 11 /* internal op */ +#define OP_RECV_UD 12 /* internal op */ +#define OP_INVALID 0xff + +/* Definitions to map QP state */ +#define IB_QP_STATE_RESET IBV_QPS_RESET +#define IB_QP_STATE_INIT IBV_QPS_INIT +#define IB_QP_STATE_RTR IBV_QPS_RTR +#define IB_QP_STATE_RTS IBV_QPS_RTS +#define IB_QP_STATE_SQD IBV_QPS_SQD +#define IB_QP_STATE_SQE IBV_QPS_SQE +#define IB_QP_STATE_ERROR IBV_QPS_ERR + +/* Definitions for ibverbs/mthca return codes, should be defined in verbs.h */ +/* some are errno and some are -n values */ + +/** + * ibv_get_device_name - Return kernel device name + * ibv_get_device_guid - Return device's node GUID + * ibv_open_device - Return ibv_context or NULL + * ibv_close_device - Return 0, (errno?) + * ibv_get_async_event - Return 0, -1 + * ibv_alloc_pd - Return ibv_pd, NULL + * ibv_dealloc_pd - Return 0, errno + * ibv_reg_mr - Return ibv_mr, NULL + * ibv_dereg_mr - Return 0, errno + * ibv_create_cq - Return ibv_cq, NULL + * ibv_destroy_cq - Return 0, errno + * ibv_get_cq_event - Return 0 & ibv_cq/context, int + * ibv_poll_cq - Return n & ibv_wc, 0 ok, -1 empty, -2 error + * ibv_req_notify_cq - Return 0 (void?) + * ibv_create_qp - Return ibv_qp, NULL + * ibv_modify_qp - Return 0, errno + * ibv_destroy_qp - Return 0, errno + * ibv_post_send - Return 0, -1 & bad_wr + * ibv_post_recv - Return 0, -1 & bad_wr + */ + +/* async handler for DTO, CQ, QP, and unafiliated */ +typedef void (*ib_async_dto_handler_t)( + IN ib_hca_handle_t ib_hca_handle, + IN ib_error_record_t *err_code, + IN void *context); + +typedef void (*ib_async_cq_handler_t)( + IN ib_hca_handle_t ib_hca_handle, + IN ib_cq_handle_t ib_cq_handle, + IN ib_error_record_t *err_code, + IN void *context); + +typedef void (*ib_async_qp_handler_t)( + IN ib_hca_handle_t ib_hca_handle, + IN ib_qp_handle_t ib_qp_handle, + IN ib_error_record_t *err_code, + IN void *context); + +typedef void (*ib_async_handler_t)( + IN ib_hca_handle_t ib_hca_handle, + IN ib_error_record_t *err_code, + IN void *context); + +typedef enum +{ + IB_THREAD_INIT, + IB_THREAD_CREATE, + IB_THREAD_RUN, + IB_THREAD_CANCEL, + IB_THREAD_EXIT + +} ib_thread_state_t; + +typedef enum dapl_cm_op +{ + DCM_REQ = 1, + DCM_REP, + DCM_REJ_USER, /* user reject */ + DCM_REJ_CM, /* cm reject, no SID */ + DCM_RTU, + DCM_DREQ, + DCM_DREP + +} DAPL_CM_OP; + +typedef enum dapl_cm_state +{ + DCM_INIT, + DCM_LISTEN, + DCM_CONN_PENDING, + DCM_REP_PENDING, + DCM_ACCEPTING, + DCM_ACCEPTING_DATA, + DCM_ACCEPTED, + DCM_REJECTING, + DCM_REJECTED, + DCM_CONNECTED, + DCM_RELEASE, + DCM_DISC_PENDING, + DCM_DISCONNECTED, + DCM_DESTROY, + DCM_RTU_PENDING, + DCM_DISC_RECV, + DCM_FREE, + +} DAPL_CM_STATE; + +/* provider specfic fields for shared memory support */ +typedef uint32_t ib_shm_transport_t; + +/* prototypes */ +int32_t dapls_ib_init(void); +int32_t dapls_ib_release(void); + +/* util.c */ +enum ibv_mtu dapl_ib_mtu(int mtu); +char *dapl_ib_mtu_str(enum ibv_mtu mtu); +int getipaddr_netdev(char *name, char *addr, int addr_len); +DAT_RETURN getlocalipaddr(char *addr, int addr_len); + +/* qp.c */ +DAT_RETURN dapls_modify_qp_ud(IN DAPL_HCA *hca, IN ib_qp_handle_t qp); +DAT_RETURN dapls_modify_qp_state(IN ib_qp_handle_t qp_handle, + IN ib_qp_state_t qp_state, + IN uint32_t qpn, + IN uint16_t lid, + IN ib_gid_handle_t gid); +ib_ah_handle_t dapls_create_ah( IN DAPL_HCA *hca, + IN ib_pd_handle_t pd, + IN ib_qp_handle_t qp, + IN uint16_t lid, + IN ib_gid_handle_t gid); + +/* inline functions */ +STATIC _INLINE_ IB_HCA_NAME dapl_ib_convert_name (IN char *name) +{ + /* use ascii; name of local device */ + return dapl_os_strdup(name); +} + +STATIC _INLINE_ void dapl_ib_release_name (IN IB_HCA_NAME name) +{ + return; +} + +/* + * Convert errno to DAT_RETURN values + */ +STATIC _INLINE_ DAT_RETURN +dapl_convert_errno( IN int err, IN const char *str ) +{ + if (!err) return DAT_SUCCESS; + +#if DAPL_DBG + if ((err != EAGAIN) && (err != ETIMEDOUT)) + dapl_dbg_log (DAPL_DBG_TYPE_ERR," %s %s\n", str, strerror(err)); +#endif + + switch( err ) + { + case EOVERFLOW : return DAT_LENGTH_ERROR; + case EACCES : return DAT_PRIVILEGES_VIOLATION; + case EPERM : return DAT_PROTECTION_VIOLATION; + case EINVAL : return DAT_INVALID_HANDLE; + case EISCONN : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_CONNECTED; + case ECONNREFUSED : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_NOTREADY; + case ETIMEDOUT : return DAT_TIMEOUT_EXPIRED; + case ENETUNREACH: return DAT_INVALID_ADDRESS | DAT_INVALID_ADDRESS_UNREACHABLE; + case EADDRINUSE : return DAT_CONN_QUAL_IN_USE; + case EALREADY : return DAT_INVALID_STATE | DAT_INVALID_STATE_EP_ACTCONNPENDING; + case ENOMEM : return DAT_INSUFFICIENT_RESOURCES; + case EAGAIN : return DAT_QUEUE_EMPTY; + case EINTR : return DAT_INTERRUPTED_CALL; + case EAFNOSUPPORT : return DAT_INVALID_ADDRESS | DAT_INVALID_ADDRESS_MALFORMED; + case EFAULT : + default : return DAT_INTERNAL_ERROR; + } + } + +STATIC _INLINE_ char * dapl_cm_state_str(IN int st) +{ + static char *state[] = { + "CM_INIT", + "CM_LISTEN", + "CM_CONN_PENDING", + "CM_REP_PENDING", + "CM_ACCEPTING", + "CM_ACCEPTING_DATA", + "CM_ACCEPTED", + "CM_REJECTING", + "CM_REJECTED", + "CM_CONNECTED", + "CM_RELEASE", + "CM_DISC_PENDING", + "CM_DISCONNECTED", + "CM_DESTROY", + "CM_RTU_PENDING", + "CM_DISC_RECV", + "CM_FREE" + }; + return ((st < 0 || st > 16) ? "Invalid CM state?" : state[st]); +} + +STATIC _INLINE_ char * dapl_cm_op_str(IN int op) +{ + static char *ops[] = { + "INVALID", + "REQ", + "REP", + "REJ_USER", + "REJ_CM", + "RTU", + "DREQ", + "DREP", + }; + return ((op < 1 || op > 7) ? "Invalid OP?" : ops[op]); +} + +#endif /* _DAPL_IB_COMMON_H_ */ diff --git a/trunk/ulp/dapl2/dapl/openib_common/qp.c b/trunk/ulp/dapl2/dapl/openib_common/qp.c index 179eef0e..2d326357 100644 --- a/trunk/ulp/dapl2/dapl/openib_common/qp.c +++ b/trunk/ulp/dapl2/dapl/openib_common/qp.c @@ -1,614 +1,664 @@ -/* - * This Software is licensed under one of the following licenses: - * - * 1) under the terms of the "Common Public License 1.0" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/cpl.php. - * - * 2) under the terms of the "The BSD License" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/bsd-license.php. - * - * 3) under the terms of the "GNU General Public License (GPL) Version 2" a - * copy of which is available from the Open Source Initiative, see - * http://www.opensource.org/licenses/gpl-license.php. - * - * Licensee has the right to choose one of the above licenses. - * - * Redistributions of source code must retain the above copyright - * notice and one of the license notices. - * - * Redistributions in binary form must reproduce both the above copyright - * notice, one of the license notices in the documentation - * and/or other materials provided with the distribution. - */ -#include "dapl.h" -#include "dapl_adapter_util.h" -#include "dapl_ep_util.h" - -/* - * dapl_ib_qp_alloc - * - * Alloc a QP - * - * Input: - * *ep_ptr pointer to EP INFO - * ib_hca_handle provider HCA handle - * ib_pd_handle provider protection domain handle - * cq_recv provider recv CQ handle - * cq_send provider send CQ handle - * - * Output: - * none - * - * Returns: - * DAT_SUCCESS - * DAT_INSUFFICIENT_RESOURCES - * DAT_INTERNAL_ERROR - * - */ -DAT_RETURN -dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr, - IN DAPL_EP * ep_ptr, IN DAPL_EP * ep_ctx_ptr) -{ - DAT_EP_ATTR *attr; - DAPL_EVD *rcv_evd, *req_evd; - ib_cq_handle_t rcv_cq, req_cq; - ib_pd_handle_t ib_pd_handle; - struct ibv_qp_init_attr qp_create; -#ifdef _OPENIB_CMA_ - dp_ib_cm_handle_t conn; -#endif - dapl_dbg_log(DAPL_DBG_TYPE_EP, - " qp_alloc: ia_ptr %p ep_ptr %p ep_ctx_ptr %p\n", - ia_ptr, ep_ptr, ep_ctx_ptr); - - attr = &ep_ptr->param.ep_attr; - ib_pd_handle = ((DAPL_PZ *) ep_ptr->param.pz_handle)->pd_handle; - rcv_evd = (DAPL_EVD *) ep_ptr->param.recv_evd_handle; - req_evd = (DAPL_EVD *) ep_ptr->param.request_evd_handle; - - /* - * DAT allows usage model of EP's with no EVD's but IB does not. - * Create a CQ with zero entries under the covers to support and - * catch any invalid posting. - */ - if (rcv_evd != DAT_HANDLE_NULL) - rcv_cq = rcv_evd->ib_cq_handle; - else if (!ia_ptr->hca_ptr->ib_trans.ib_cq_empty) - rcv_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty; - else { - struct ibv_comp_channel *channel; - - channel = ibv_create_comp_channel(ia_ptr->hca_ptr->ib_hca_handle); - if (!channel) - return (dapl_convert_errno(ENOMEM, "create_cq")); - - /* Call IB verbs to create CQ */ - rcv_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle, - 0, NULL, channel, 0); - - if (rcv_cq == IB_INVALID_HANDLE) { - ibv_destroy_comp_channel(channel); - return (dapl_convert_errno(ENOMEM, "create_cq")); - } - - ia_ptr->hca_ptr->ib_trans.ib_cq_empty = rcv_cq; - } - if (req_evd != DAT_HANDLE_NULL) - req_cq = req_evd->ib_cq_handle; - else - req_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty; - - /* - * IMPLEMENTATION NOTE: - * uDAPL allows consumers to post buffers on the EP after creation - * and before a connect request (outbound and inbound). This forces - * a binding to a device during the hca_open call and requires the - * consumer to predetermine which device to listen on or connect from. - * This restriction eliminates any option of listening or connecting - * over multiple devices. uDAPL should add API's to resolve addresses - * and bind to the device at the approriate time (before connect - * and after CR arrives). Discovery should happen at connection time - * based on addressing and not on static configuration during open. - */ - -#ifdef _OPENIB_CMA_ - /* Allocate CM and initialize lock */ - if ((conn = dapls_ib_cm_create(ep_ptr)) == NULL) - return (dapl_convert_errno(ENOMEM, "cm_create")); - - /* open identifies the local device; per DAT specification */ - if (rdma_bind_addr(conn->cm_id, - (struct sockaddr *)&ia_ptr->hca_ptr->hca_address)) { - dapls_cm_free(conn); - return (dapl_convert_errno(EAFNOSUPPORT, "rdma_bind_addr")); - } -#endif - /* Setup attributes and create qp */ - dapl_os_memzero((void *)&qp_create, sizeof(qp_create)); - qp_create.send_cq = req_cq; - qp_create.cap.max_send_wr = attr->max_request_dtos; - qp_create.cap.max_send_sge = attr->max_request_iov; - qp_create.cap.max_inline_data = - ia_ptr->hca_ptr->ib_trans.max_inline_send; - qp_create.qp_type = IBV_QPT_RC; - qp_create.qp_context = (void *)ep_ptr; - -#ifdef DAT_EXTENSIONS - if (attr->service_type == DAT_IB_SERVICE_TYPE_UD) { -#ifdef _OPENIB_CMA_ - return (DAT_NOT_IMPLEMENTED); -#endif - qp_create.qp_type = IBV_QPT_UD; - if (attr->max_message_size > - (128 << ia_ptr->hca_ptr->ib_trans.mtu)) { - return (DAT_INVALID_PARAMETER | DAT_INVALID_ARG6); - } - } -#endif - - /* ibv assumes rcv_cq is never NULL, set to req_cq */ - if (rcv_cq == NULL) { - qp_create.recv_cq = req_cq; - qp_create.cap.max_recv_wr = 0; - qp_create.cap.max_recv_sge = 0; - } else { - qp_create.recv_cq = rcv_cq; - qp_create.cap.max_recv_wr = attr->max_recv_dtos; - qp_create.cap.max_recv_sge = attr->max_recv_iov; - } - -#ifdef _OPENIB_CMA_ - if (rdma_create_qp(conn->cm_id, ib_pd_handle, &qp_create)) { - dapls_cm_free(conn); - return (dapl_convert_errno(errno, "rdma_create_qp")); - } - ep_ptr->qp_handle = conn->cm_id->qp; - ep_ptr->qp_state = IBV_QPS_INIT; - - ep_ptr->param.local_port_qual = rdma_get_src_port(conn->cm_id); -#else - ep_ptr->qp_handle = ibv_create_qp(ib_pd_handle, &qp_create); - if (!ep_ptr->qp_handle) - return (dapl_convert_errno(ENOMEM, "create_qp")); - - /* Setup QP attributes for INIT state on the way out */ - if (dapls_modify_qp_state(ep_ptr->qp_handle, - IBV_QPS_INIT, 0, 0, 0) != DAT_SUCCESS) { - ibv_destroy_qp(ep_ptr->qp_handle); - ep_ptr->qp_handle = IB_INVALID_HANDLE; - return DAT_INTERNAL_ERROR; - } -#endif - dapl_dbg_log(DAPL_DBG_TYPE_EP, - " qp_alloc: qpn %p type %d sq %d,%d rq %d,%d\n", - ep_ptr->qp_handle->qp_num, ep_ptr->qp_handle->qp_type, - qp_create.cap.max_send_wr, qp_create.cap.max_send_sge, - qp_create.cap.max_recv_wr, qp_create.cap.max_recv_sge); - - return DAT_SUCCESS; -} - -/* - * dapl_ib_qp_free - * - * Free a QP - * - * Input: - * ia_handle IA handle - * *ep_ptr pointer to EP INFO - * - * Output: - * none - * - * Returns: - * DAT_SUCCESS - * dapl_convert_errno - * - */ -DAT_RETURN dapls_ib_qp_free(IN DAPL_IA * ia_ptr, IN DAPL_EP * ep_ptr) -{ -#ifdef _OPENIB_CMA_ - dp_ib_cm_handle_t cm_ptr = dapl_get_cm_from_ep(ep_ptr); - - dapl_os_lock(&ep_ptr->header.lock); - if (cm_ptr && cm_ptr->cm_id->qp) { - rdma_destroy_qp(cm_ptr->cm_id); - cm_ptr->cm_id->qp = NULL; - ep_ptr->qp_handle = NULL; - } -#else - dapl_os_lock(&ep_ptr->header.lock); - if (ep_ptr->qp_handle != NULL) { - /* force error state to flush queue, then destroy */ - dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, 0,0,0); - - if (ibv_destroy_qp(ep_ptr->qp_handle)) { - dapl_log(DAPL_DBG_TYPE_ERR, - " qp_free: ibv_destroy_qp error - %s\n", - strerror(errno)); - } - ep_ptr->qp_handle = NULL; - } -#endif - dapl_os_unlock(&ep_ptr->header.lock); - return DAT_SUCCESS; -} - -/* - * dapl_ib_qp_modify - * - * Set the QP to the parameters specified in an EP_PARAM - * - * The EP_PARAM structure that is provided has been - * sanitized such that only non-zero values are valid. - * - * Input: - * ib_hca_handle HCA handle - * qp_handle QP handle - * ep_attr Sanitized EP Params - * - * Output: - * none - * - * Returns: - * DAT_SUCCESS - * DAT_INSUFFICIENT_RESOURCES - * DAT_INVALID_PARAMETER - * - */ -DAT_RETURN -dapls_ib_qp_modify(IN DAPL_IA * ia_ptr, - IN DAPL_EP * ep_ptr, IN DAT_EP_ATTR * attr) -{ - struct ibv_qp_attr qp_attr; - - if (ep_ptr->qp_handle == IB_INVALID_HANDLE) - return DAT_INVALID_PARAMETER; - - /* - * EP state, qp_handle state should be an indication - * of current state but the only way to be sure is with - * a user mode ibv_query_qp call which is NOT available - */ - - /* move to error state if necessary */ - if ((ep_ptr->qp_state == IB_QP_STATE_ERROR) && - (ep_ptr->qp_handle->state != IBV_QPS_ERR)) { - return (dapls_modify_qp_state(ep_ptr->qp_handle, - IBV_QPS_ERR, 0, 0, 0)); - } - - /* - * Check if we have the right qp_state to modify attributes - */ - if ((ep_ptr->qp_handle->state != IBV_QPS_RTR) && - (ep_ptr->qp_handle->state != IBV_QPS_RTS)) - return DAT_INVALID_STATE; - - /* Adjust to current EP attributes */ - dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr)); - qp_attr.cap.max_send_wr = attr->max_request_dtos; - qp_attr.cap.max_recv_wr = attr->max_recv_dtos; - qp_attr.cap.max_send_sge = attr->max_request_iov; - qp_attr.cap.max_recv_sge = attr->max_recv_iov; - - dapl_dbg_log(DAPL_DBG_TYPE_EP, - "modify_qp: qp %p sq %d,%d, rq %d,%d\n", - ep_ptr->qp_handle, - qp_attr.cap.max_send_wr, qp_attr.cap.max_send_sge, - qp_attr.cap.max_recv_wr, qp_attr.cap.max_recv_sge); - - if (ibv_modify_qp(ep_ptr->qp_handle, &qp_attr, IBV_QP_CAP)) { - dapl_dbg_log(DAPL_DBG_TYPE_ERR, - "modify_qp: modify ep %p qp %p failed\n", - ep_ptr, ep_ptr->qp_handle); - return (dapl_convert_errno(errno, "modify_qp_state")); - } - - return DAT_SUCCESS; -} - -/* - * dapls_ib_reinit_ep - * - * Move the QP to INIT state again. - * - * Input: - * ep_ptr DAPL_EP - * - * Output: - * none - * - * Returns: - * void - * - */ -#if defined(_WIN32) || defined(_WIN64) || defined(_OPENIB_CMA_) -void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr) -{ - dp_ib_cm_handle_t cm_ptr, next_cm_ptr; - - /* work around bug in low level driver - 3/24/09 */ - /* RTS -> RESET -> INIT -> ERROR QP transition crashes system */ - if (ep_ptr->qp_handle != IB_INVALID_HANDLE) { - dapls_ib_qp_free(ep_ptr->header.owner_ia, ep_ptr); - - /* free any CM object's created */ - cm_ptr = (dapl_llist_is_empty(&ep_ptr->cm_list_head) - ? NULL : dapl_llist_peek_head(&ep_ptr->cm_list_head)); - while (cm_ptr != NULL) { - next_cm_ptr = dapl_llist_next_entry(&ep_ptr->cm_list_head, - &cm_ptr->list_entry); - dapls_cm_free(cm_ptr); - cm_ptr = next_cm_ptr; - } - dapls_ib_qp_alloc(ep_ptr->header.owner_ia, ep_ptr, ep_ptr); - } -} -#else // _WIN32 || _WIN64 -void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr) -{ - if (ep_ptr->qp_handle != IB_INVALID_HANDLE && - ep_ptr->qp_handle->qp_type != IBV_QPT_UD) { - /* move to RESET state and then to INIT */ - dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_RESET,0,0,0); - dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_INIT,0,0,0); - } -} -#endif // _WIN32 || _WIN64 - -/* - * Generic QP modify for init, reset, error, RTS, RTR - * For UD, create_ah on RTR, qkey on INIT - * CM msg provides QP attributes, info in network order - */ -DAT_RETURN -dapls_modify_qp_state(IN ib_qp_handle_t qp_handle, - IN ib_qp_state_t qp_state, - IN uint32_t qpn, - IN uint16_t lid, - IN ib_gid_handle_t gid) -{ - struct ibv_qp_attr qp_attr; - enum ibv_qp_attr_mask mask = IBV_QP_STATE; - DAPL_EP *ep_ptr = (DAPL_EP *) qp_handle->qp_context; - DAPL_IA *ia_ptr = ep_ptr->header.owner_ia; - int ret; - - dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr)); - qp_attr.qp_state = qp_state; - - switch (qp_state) { - case IBV_QPS_RTR: - dapl_dbg_log(DAPL_DBG_TYPE_EP, - " QPS_RTR: type %d qpn 0x%x gid %p (%d) lid 0x%x" - " port %d ep %p qp_state %d \n", - qp_handle->qp_type, ntohl(qpn), gid, - ia_ptr->hca_ptr->ib_trans.global, - ntohs(lid), ia_ptr->hca_ptr->port_num, - ep_ptr, ep_ptr->qp_state); - - mask |= IBV_QP_AV | - IBV_QP_PATH_MTU | - IBV_QP_DEST_QPN | - IBV_QP_RQ_PSN | - IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER; - - qp_attr.dest_qp_num = ntohl(qpn); - qp_attr.rq_psn = 1; - qp_attr.path_mtu = ia_ptr->hca_ptr->ib_trans.mtu; - qp_attr.max_dest_rd_atomic = - ep_ptr->param.ep_attr.max_rdma_read_out; - qp_attr.min_rnr_timer = - ia_ptr->hca_ptr->ib_trans.rnr_timer; - - /* address handle. RC and UD */ - qp_attr.ah_attr.dlid = ntohs(lid); - if (gid && ia_ptr->hca_ptr->ib_trans.global) { - dapl_dbg_log(DAPL_DBG_TYPE_EP, - " QPS_RTR: GID Subnet 0x" F64x " ID 0x" F64x "\n", - (unsigned long long)htonll(gid->global.subnet_prefix), - (unsigned long long)htonll(gid->global.interface_id)); - - qp_attr.ah_attr.is_global = 1; - qp_attr.ah_attr.grh.dgid.global.subnet_prefix = - gid->global.subnet_prefix; - qp_attr.ah_attr.grh.dgid.global.interface_id = - gid->global.interface_id; - qp_attr.ah_attr.grh.hop_limit = - ia_ptr->hca_ptr->ib_trans.hop_limit; - qp_attr.ah_attr.grh.traffic_class = - ia_ptr->hca_ptr->ib_trans.tclass; - } - qp_attr.ah_attr.sl = ia_ptr->hca_ptr->ib_trans.sl; - qp_attr.ah_attr.src_path_bits = 0; - qp_attr.ah_attr.port_num = ia_ptr->hca_ptr->port_num; - - /* UD: already in RTR, RTS state */ - if (qp_handle->qp_type == IBV_QPT_UD) { - mask = IBV_QP_STATE; - if (ep_ptr->qp_state == IBV_QPS_RTR || - ep_ptr->qp_state == IBV_QPS_RTS) - return DAT_SUCCESS; - } - break; - case IBV_QPS_RTS: - if (qp_handle->qp_type == IBV_QPT_RC) { - mask |= IBV_QP_SQ_PSN | - IBV_QP_TIMEOUT | - IBV_QP_RETRY_CNT | - IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC; - qp_attr.timeout = - ia_ptr->hca_ptr->ib_trans.ack_timer; - qp_attr.retry_cnt = - ia_ptr->hca_ptr->ib_trans.ack_retry; - qp_attr.rnr_retry = - ia_ptr->hca_ptr->ib_trans.rnr_retry; - qp_attr.max_rd_atomic = - ep_ptr->param.ep_attr.max_rdma_read_out; - } - /* RC and UD */ - qp_attr.qp_state = IBV_QPS_RTS; - qp_attr.sq_psn = 1; - - dapl_dbg_log(DAPL_DBG_TYPE_EP, - " QPS_RTS: psn %x rd_atomic %d ack %d " - " retry %d rnr_retry %d ep %p qp_state %d\n", - qp_attr.sq_psn, qp_attr.max_rd_atomic, - qp_attr.timeout, qp_attr.retry_cnt, - qp_attr.rnr_retry, ep_ptr, - ep_ptr->qp_state); - - if (qp_handle->qp_type == IBV_QPT_UD) { - /* already RTS, multi remote AH's on QP */ - if (ep_ptr->qp_state == IBV_QPS_RTS) - return DAT_SUCCESS; - else - mask = IBV_QP_STATE | IBV_QP_SQ_PSN; - } - break; - case IBV_QPS_INIT: - mask |= IBV_QP_PKEY_INDEX | IBV_QP_PORT; - if (qp_handle->qp_type == IBV_QPT_RC) { - mask |= IBV_QP_ACCESS_FLAGS; - qp_attr.qp_access_flags = - IBV_ACCESS_LOCAL_WRITE | - IBV_ACCESS_REMOTE_WRITE | - IBV_ACCESS_REMOTE_READ | - IBV_ACCESS_REMOTE_ATOMIC | - IBV_ACCESS_MW_BIND; - } - - if (qp_handle->qp_type == IBV_QPT_UD) { - /* already INIT, multi remote AH's on QP */ - if (ep_ptr->qp_state == IBV_QPS_INIT) - return DAT_SUCCESS; - mask |= IBV_QP_QKEY; - qp_attr.qkey = DAT_UD_QKEY; - } - - qp_attr.pkey_index = ia_ptr->hca_ptr->ib_trans.pkey_idx; - qp_attr.port_num = ia_ptr->hca_ptr->port_num; - - dapl_dbg_log(DAPL_DBG_TYPE_EP, - " QPS_INIT: pi %x port %x acc %x qkey 0x%x\n", - qp_attr.pkey_index, qp_attr.port_num, - qp_attr.qp_access_flags, qp_attr.qkey); - break; - default: - break; - } - - ret = ibv_modify_qp(qp_handle, &qp_attr, mask); - if (ret == 0) { - ep_ptr->qp_state = qp_state; - return DAT_SUCCESS; - } else { - return (dapl_convert_errno(errno, "modify_qp_state")); - } -} - -/* Modify UD type QP from init, rtr, rts, info network order */ -DAT_RETURN -dapls_modify_qp_ud(IN DAPL_HCA *hca, IN ib_qp_handle_t qp) -{ - struct ibv_qp_attr qp_attr; - - /* modify QP, setup and prepost buffers */ - dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr)); - qp_attr.qp_state = IBV_QPS_INIT; - qp_attr.pkey_index = hca->ib_trans.pkey_idx; - qp_attr.port_num = hca->port_num; - qp_attr.qkey = DAT_UD_QKEY; - if (ibv_modify_qp(qp, &qp_attr, - IBV_QP_STATE | - IBV_QP_PKEY_INDEX | - IBV_QP_PORT | - IBV_QP_QKEY)) { - dapl_log(DAPL_DBG_TYPE_ERR, - " modify_ud_qp INIT: ERR %s\n", strerror(errno)); - return (dapl_convert_errno(errno, "modify_qp")); - } - dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr)); - qp_attr.qp_state = IBV_QPS_RTR; - if (ibv_modify_qp(qp, &qp_attr,IBV_QP_STATE)) { - dapl_log(DAPL_DBG_TYPE_ERR, - " modify_ud_qp RTR: ERR %s\n", strerror(errno)); - return (dapl_convert_errno(errno, "modify_qp")); - } - dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr)); - qp_attr.qp_state = IBV_QPS_RTS; - qp_attr.sq_psn = 1; - if (ibv_modify_qp(qp, &qp_attr, - IBV_QP_STATE | IBV_QP_SQ_PSN)) { - dapl_log(DAPL_DBG_TYPE_ERR, - " modify_ud_qp RTS: ERR %s\n", strerror(errno)); - return (dapl_convert_errno(errno, "modify_qp")); - } - return DAT_SUCCESS; -} - -/* Create address handle for remote QP, info in network order */ -ib_ah_handle_t -dapls_create_ah(IN DAPL_HCA *hca, - IN ib_pd_handle_t pd, - IN ib_qp_handle_t qp, - IN uint16_t lid, - IN ib_gid_handle_t gid) -{ - struct ibv_qp_attr qp_attr; - ib_ah_handle_t ah; - - if (qp->qp_type != IBV_QPT_UD) { - dapl_log(DAPL_DBG_TYPE_ERR, - " create_ah ERR: QP_type != UD\n"); - return NULL; - } - - dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr)); - qp_attr.qp_state = IBV_QP_STATE; - - /* address handle. RC and UD */ - qp_attr.ah_attr.dlid = ntohs(lid); - if (gid != NULL) { - dapl_log(DAPL_DBG_TYPE_CM, "dapl_create_ah: with GID\n"); - qp_attr.ah_attr.is_global = 1; - qp_attr.ah_attr.grh.dgid.global.subnet_prefix = - ntohll(gid->global.subnet_prefix); - qp_attr.ah_attr.grh.dgid.global.interface_id = - ntohll(gid->global.interface_id); - qp_attr.ah_attr.grh.hop_limit = hca->ib_trans.hop_limit; - qp_attr.ah_attr.grh.traffic_class = hca->ib_trans.tclass; - } - qp_attr.ah_attr.sl = hca->ib_trans.sl; - qp_attr.ah_attr.src_path_bits = 0; - qp_attr.ah_attr.port_num = hca->port_num; - - dapl_log(DAPL_DBG_TYPE_CM, - " dapls_create_ah: port %x lid %x pd %p ctx %p handle 0x%x\n", - hca->port_num,qp_attr.ah_attr.dlid, pd, pd->context, pd->handle); - - /* UD: create AH for remote side */ - ah = ibv_create_ah(pd, &qp_attr.ah_attr); - if (!ah) { - dapl_log(DAPL_DBG_TYPE_ERR, - " create_ah: ERR %s\n", strerror(errno)); - return NULL; - } - - dapl_log(DAPL_DBG_TYPE_CM, - " dapls_create_ah: AH %p for lid %x\n", - ah, qp_attr.ah_attr.dlid); - - return ah; -} - -/* - * Local variables: - * c-indent-level: 4 - * c-basic-offset: 4 - * tab-width: 8 - * End: - */ +/* + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "dapl.h" +#include "dapl_adapter_util.h" +#include "dapl_ep_util.h" +#include +#include + +/* + * dapl_ib_qp_alloc + * + * Alloc a QP + * + * Input: + * *ep_ptr pointer to EP INFO + * ib_hca_handle provider HCA handle + * ib_pd_handle provider protection domain handle + * cq_recv provider recv CQ handle + * cq_send provider send CQ handle + * + * Output: + * none + * + * Returns: + * DAT_SUCCESS + * DAT_INSUFFICIENT_RESOURCES + * DAT_INTERNAL_ERROR + * + */ +DAT_RETURN +dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr, + IN DAPL_EP * ep_ptr, IN DAPL_EP * ep_ctx_ptr) +{ + DAT_EP_ATTR *attr; + DAPL_EVD *rcv_evd, *req_evd; + ib_cq_handle_t rcv_cq, req_cq; + ib_pd_handle_t ib_pd_handle; + struct ibv_qp_init_attr qp_create; +#ifdef _OPENIB_CMA_ + dp_ib_cm_handle_t conn; +#endif + dapl_dbg_log(DAPL_DBG_TYPE_EP, + " qp_alloc: ia_ptr %p ep_ptr %p ep_ctx_ptr %p\n", + ia_ptr, ep_ptr, ep_ctx_ptr); + + attr = &ep_ptr->param.ep_attr; + ib_pd_handle = ((DAPL_PZ *) ep_ptr->param.pz_handle)->pd_handle; + rcv_evd = (DAPL_EVD *) ep_ptr->param.recv_evd_handle; + req_evd = (DAPL_EVD *) ep_ptr->param.request_evd_handle; + + /* + * DAT allows usage model of EP's with no EVD's but IB does not. + * Create a CQ with zero entries under the covers to support and + * catch any invalid posting. + */ + if (rcv_evd != DAT_HANDLE_NULL) + rcv_cq = rcv_evd->ib_cq_handle; + else if (!ia_ptr->hca_ptr->ib_trans.ib_cq_empty) + rcv_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty; + else { + struct ibv_comp_channel *channel; + + channel = ibv_create_comp_channel(ia_ptr->hca_ptr->ib_hca_handle); + if (!channel) + return (dapl_convert_errno(ENOMEM, "create_cq")); + + /* Call IB verbs to create CQ */ + rcv_cq = ibv_create_cq(ia_ptr->hca_ptr->ib_hca_handle, + 0, NULL, channel, 0); + + if (rcv_cq == IB_INVALID_HANDLE) { + ibv_destroy_comp_channel(channel); + return (dapl_convert_errno(ENOMEM, "create_cq")); + } + + ia_ptr->hca_ptr->ib_trans.ib_cq_empty = rcv_cq; + } + if (req_evd != DAT_HANDLE_NULL) + req_cq = req_evd->ib_cq_handle; + else + req_cq = ia_ptr->hca_ptr->ib_trans.ib_cq_empty; + + /* + * IMPLEMENTATION NOTE: + * uDAPL allows consumers to post buffers on the EP after creation + * and before a connect request (outbound and inbound). This forces + * a binding to a device during the hca_open call and requires the + * consumer to predetermine which device to listen on or connect from. + * This restriction eliminates any option of listening or connecting + * over multiple devices. uDAPL should add API's to resolve addresses + * and bind to the device at the approriate time (before connect + * and after CR arrives). Discovery should happen at connection time + * based on addressing and not on static configuration during open. + */ + +#ifdef _OPENIB_CMA_ + /* Allocate CM and initialize lock */ + if ((conn = dapls_ib_cm_create(ep_ptr)) == NULL) + return (dapl_convert_errno(ENOMEM, "cm_create")); + + /* open identifies the local device; per DAT specification */ + if (rdma_bind_addr(conn->cm_id, + (struct sockaddr *)&ia_ptr->hca_ptr->hca_address)) { + dapls_cm_free(conn); + return (dapl_convert_errno(EAFNOSUPPORT, "rdma_bind_addr")); + } +#endif + /* Setup attributes and create qp */ + dapl_os_memzero((void *)&qp_create, sizeof(qp_create)); + qp_create.send_cq = req_cq; + qp_create.cap.max_send_wr = attr->max_request_dtos; + qp_create.cap.max_send_sge = attr->max_request_iov; + qp_create.cap.max_inline_data = + ia_ptr->hca_ptr->ib_trans.max_inline_send; + qp_create.qp_type = IBV_QPT_RC; + qp_create.qp_context = (void *)ep_ptr; + +#ifdef DAT_EXTENSIONS + if (attr->service_type == DAT_IB_SERVICE_TYPE_UD) { +#ifdef _OPENIB_CMA_ + return (DAT_NOT_IMPLEMENTED); +#endif + qp_create.qp_type = IBV_QPT_UD; + if (attr->max_message_size > + (128 << ia_ptr->hca_ptr->ib_trans.mtu)) { + return (DAT_INVALID_PARAMETER | DAT_INVALID_ARG6); + } + } +#endif + + /* ibv assumes rcv_cq is never NULL, set to req_cq */ + if (rcv_cq == NULL) { + qp_create.recv_cq = req_cq; + qp_create.cap.max_recv_wr = 0; + qp_create.cap.max_recv_sge = 0; + } else { + qp_create.recv_cq = rcv_cq; + qp_create.cap.max_recv_wr = attr->max_recv_dtos; + qp_create.cap.max_recv_sge = attr->max_recv_iov; + } + +#ifdef _OPENIB_CMA_ + if (rdma_create_qp(conn->cm_id, ib_pd_handle, &qp_create)) { + dapls_cm_free(conn); + return (dapl_convert_errno(errno, "rdma_create_qp")); + } + ep_ptr->qp_handle = conn->cm_id->qp; + ep_ptr->qp_state = IBV_QPS_INIT; + + ep_ptr->param.local_port_qual = rdma_get_src_port(conn->cm_id); +#else + ep_ptr->qp_handle = ibv_create_qp(ib_pd_handle, &qp_create); + if (!ep_ptr->qp_handle) + return (dapl_convert_errno(ENOMEM, "create_qp")); + + /* Setup QP attributes for INIT state on the way out */ + if (dapls_modify_qp_state(ep_ptr->qp_handle, + IBV_QPS_INIT, 0, 0, 0) != DAT_SUCCESS) { + ibv_destroy_qp(ep_ptr->qp_handle); + ep_ptr->qp_handle = IB_INVALID_HANDLE; + return DAT_INTERNAL_ERROR; + } +#endif + dapl_dbg_log(DAPL_DBG_TYPE_EP, + " qp_alloc: qpn %p type %d sq %d,%d rq %d,%d\n", + ep_ptr->qp_handle->qp_num, ep_ptr->qp_handle->qp_type, + qp_create.cap.max_send_wr, qp_create.cap.max_send_sge, + qp_create.cap.max_recv_wr, qp_create.cap.max_recv_sge); + + return DAT_SUCCESS; +} + +/* + * dapl_ib_qp_free + * + * Free a QP + * + * Input: + * ia_handle IA handle + * *ep_ptr pointer to EP INFO + * + * Output: + * none + * + * Returns: + * DAT_SUCCESS + * dapl_convert_errno + * + */ +DAT_RETURN dapls_ib_qp_free(IN DAPL_IA * ia_ptr, IN DAPL_EP * ep_ptr) +{ + struct ibv_qp *qp; + struct ibv_qp_attr qp_attr; + +#ifdef _OPENIB_CMA_ + dp_ib_cm_handle_t cm_ptr = dapl_get_cm_from_ep(ep_ptr); + if (!cm_ptr) + return DAT_SUCCESS; +#endif + + dapl_os_lock(&ep_ptr->header.lock); + if (ep_ptr->qp_handle != NULL) { + qp = ep_ptr->qp_handle; + dapl_os_unlock(&ep_ptr->header.lock); + + qp_attr.qp_state = IBV_QPS_ERR; + ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE); + dapls_ep_flush_cqs(ep_ptr); + + ep_ptr->qp_handle = NULL; +#ifdef _OPENIB_CMA_ + rdma_destroy_qp(cm_ptr->cm_id); + cm_ptr->cm_id->qp = NULL; +#else + if (ibv_destroy_qp(qp)) { + dapl_log(DAPL_DBG_TYPE_ERR, + " qp_free: ibv_destroy_qp error - %s\n", + strerror(errno)); + } +#endif + } else { + dapl_os_unlock(&ep_ptr->header.lock); + } + return DAT_SUCCESS; +} + +/* + * dapl_ib_qp_modify + * + * Set the QP to the parameters specified in an EP_PARAM + * + * The EP_PARAM structure that is provided has been + * sanitized such that only non-zero values are valid. + * + * Input: + * ib_hca_handle HCA handle + * qp_handle QP handle + * ep_attr Sanitized EP Params + * + * Output: + * none + * + * Returns: + * DAT_SUCCESS + * DAT_INSUFFICIENT_RESOURCES + * DAT_INVALID_PARAMETER + * + */ +DAT_RETURN +dapls_ib_qp_modify(IN DAPL_IA * ia_ptr, + IN DAPL_EP * ep_ptr, IN DAT_EP_ATTR * attr) +{ + struct ibv_qp_attr qp_attr; + + if (ep_ptr->qp_handle == IB_INVALID_HANDLE) + return DAT_INVALID_PARAMETER; + + /* + * EP state, qp_handle state should be an indication + * of current state but the only way to be sure is with + * a user mode ibv_query_qp call which is NOT available + */ + + /* move to error state if necessary */ + if ((ep_ptr->qp_state == IB_QP_STATE_ERROR) && + (ep_ptr->qp_handle->state != IBV_QPS_ERR)) { + return (dapls_modify_qp_state(ep_ptr->qp_handle, + IBV_QPS_ERR, 0, 0, 0)); + } + + /* + * Check if we have the right qp_state to modify attributes + */ + if ((ep_ptr->qp_handle->state != IBV_QPS_RTR) && + (ep_ptr->qp_handle->state != IBV_QPS_RTS)) + return DAT_INVALID_STATE; + + /* Adjust to current EP attributes */ + dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr)); + qp_attr.cap.max_send_wr = attr->max_request_dtos; + qp_attr.cap.max_recv_wr = attr->max_recv_dtos; + qp_attr.cap.max_send_sge = attr->max_request_iov; + qp_attr.cap.max_recv_sge = attr->max_recv_iov; + + dapl_dbg_log(DAPL_DBG_TYPE_EP, + "modify_qp: qp %p sq %d,%d, rq %d,%d\n", + ep_ptr->qp_handle, + qp_attr.cap.max_send_wr, qp_attr.cap.max_send_sge, + qp_attr.cap.max_recv_wr, qp_attr.cap.max_recv_sge); + + if (ibv_modify_qp(ep_ptr->qp_handle, &qp_attr, IBV_QP_CAP)) { + dapl_dbg_log(DAPL_DBG_TYPE_ERR, + "modify_qp: modify ep %p qp %p failed\n", + ep_ptr, ep_ptr->qp_handle); + return (dapl_convert_errno(errno, "modify_qp_state")); + } + + return DAT_SUCCESS; +} + +/* + * dapls_ib_reinit_ep + * + * Move the QP to INIT state again. + * + * Input: + * ep_ptr DAPL_EP + * + * Output: + * none + * + * Returns: + * void + * + */ +#if defined(_WIN32) || defined(_WIN64) || defined(_OPENIB_CMA_) +void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr) +{ + dp_ib_cm_handle_t cm_ptr, next_cm_ptr; + + /* work around bug in low level driver - 3/24/09 */ + /* RTS -> RESET -> INIT -> ERROR QP transition crashes system */ + if (ep_ptr->qp_handle != IB_INVALID_HANDLE) { + dapls_ib_qp_free(ep_ptr->header.owner_ia, ep_ptr); + + /* free any CM object's created */ + cm_ptr = (dapl_llist_is_empty(&ep_ptr->cm_list_head) + ? NULL : dapl_llist_peek_head(&ep_ptr->cm_list_head)); + while (cm_ptr != NULL) { + next_cm_ptr = dapl_llist_next_entry(&ep_ptr->cm_list_head, + &cm_ptr->list_entry); + dapls_cm_free(cm_ptr); + cm_ptr = next_cm_ptr; + } + dapls_ib_qp_alloc(ep_ptr->header.owner_ia, ep_ptr, ep_ptr); + } +} +#else // _WIN32 || _WIN64 +void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr) +{ + if (ep_ptr->qp_handle != IB_INVALID_HANDLE && + ep_ptr->qp_handle->qp_type != IBV_QPT_UD) { + /* move to RESET state and then to INIT */ + dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_RESET,0,0,0); + dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_INIT,0,0,0); + } +} +#endif // _WIN32 || _WIN64 + +#if DAPL_USE_IBACM +uint8_t dapls_get_sl(DAPL_HCA *hca_ptr, uint16_t dlid) +{ + struct rdma_addrinfo hint, *res; + struct ibv_path_record path; + uint8_t sl = hca_ptr->ib_trans.sl; + int ret; + + memset(&path, 0, sizeof path); + path.reversible_numpath = IBV_PATH_RECORD_REVERSIBLE | 1; + path.slid = hca_ptr->ib_trans.lid; + path.dlid = dlid; + + memset(&hint, 0, sizeof hint); + hint.ai_route = &path; + hint.ai_route_len = sizeof(path); + + ret = rdma_getaddrinfo(NULL, NULL, &hint, &res); + if (ret) + goto out; + + if (res->ai_route_len) + sl = ntohs(((struct ibv_path_record *) res->ai_route)-> + qosclass_sl) & 0xF; + + rdma_freeaddrinfo(res); +out: + return sl; +} +#else +uint8_t dapls_get_sl(DAPL_HCA *hca_ptr, uint16_t dlid) +{ + return hca_ptr->ib_trans.sl; +} +#endif + +/* + * Generic QP modify for init, reset, error, RTS, RTR + * For UD, create_ah on RTR, qkey on INIT + * CM msg provides QP attributes, info in network order + */ +DAT_RETURN +dapls_modify_qp_state(IN ib_qp_handle_t qp_handle, + IN ib_qp_state_t qp_state, + IN uint32_t qpn, + IN uint16_t lid, + IN ib_gid_handle_t gid) +{ + struct ibv_qp_attr qp_attr; + enum ibv_qp_attr_mask mask = IBV_QP_STATE; + DAPL_EP *ep_ptr = (DAPL_EP *) qp_handle->qp_context; + DAPL_IA *ia_ptr = ep_ptr->header.owner_ia; + int ret; + + dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr)); + qp_attr.qp_state = qp_state; + + switch (qp_state) { + case IBV_QPS_RTR: + dapl_dbg_log(DAPL_DBG_TYPE_EP, + " QPS_RTR: type %d qpn 0x%x gid %p (%d) lid 0x%x" + " port %d ep %p qp_state %d \n", + qp_handle->qp_type, ntohl(qpn), gid, + ia_ptr->hca_ptr->ib_trans.global, + ntohs(lid), ia_ptr->hca_ptr->port_num, + ep_ptr, ep_ptr->qp_state); + + mask |= IBV_QP_AV | + IBV_QP_PATH_MTU | + IBV_QP_DEST_QPN | + IBV_QP_RQ_PSN | + IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER; + + qp_attr.dest_qp_num = ntohl(qpn); + qp_attr.rq_psn = 1; + qp_attr.path_mtu = ia_ptr->hca_ptr->ib_trans.mtu; + qp_attr.max_dest_rd_atomic = + ep_ptr->param.ep_attr.max_rdma_read_out; + qp_attr.min_rnr_timer = + ia_ptr->hca_ptr->ib_trans.rnr_timer; + + /* address handle. RC and UD */ + qp_attr.ah_attr.dlid = ntohs(lid); + if (gid && ia_ptr->hca_ptr->ib_trans.global) { + dapl_dbg_log(DAPL_DBG_TYPE_EP, + " QPS_RTR: GID Subnet 0x" F64x " ID 0x" F64x "\n", + (unsigned long long)htonll(gid->global.subnet_prefix), + (unsigned long long)htonll(gid->global.interface_id)); + + qp_attr.ah_attr.is_global = 1; + qp_attr.ah_attr.grh.dgid.global.subnet_prefix = + gid->global.subnet_prefix; + qp_attr.ah_attr.grh.dgid.global.interface_id = + gid->global.interface_id; + qp_attr.ah_attr.grh.hop_limit = + ia_ptr->hca_ptr->ib_trans.hop_limit; + qp_attr.ah_attr.grh.traffic_class = + ia_ptr->hca_ptr->ib_trans.tclass; + } + qp_attr.ah_attr.sl = dapls_get_sl(ia_ptr->hca_ptr, lid); + qp_attr.ah_attr.src_path_bits = 0; + qp_attr.ah_attr.port_num = ia_ptr->hca_ptr->port_num; + + /* UD: already in RTR, RTS state */ + if (qp_handle->qp_type == IBV_QPT_UD) { + mask = IBV_QP_STATE; + if (ep_ptr->qp_state == IBV_QPS_RTR || + ep_ptr->qp_state == IBV_QPS_RTS) + return DAT_SUCCESS; + } + break; + case IBV_QPS_RTS: + if (qp_handle->qp_type == IBV_QPT_RC) { + mask |= IBV_QP_SQ_PSN | + IBV_QP_TIMEOUT | + IBV_QP_RETRY_CNT | + IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC; + qp_attr.timeout = + ia_ptr->hca_ptr->ib_trans.ack_timer; + qp_attr.retry_cnt = + ia_ptr->hca_ptr->ib_trans.ack_retry; + qp_attr.rnr_retry = + ia_ptr->hca_ptr->ib_trans.rnr_retry; + qp_attr.max_rd_atomic = + ep_ptr->param.ep_attr.max_rdma_read_out; + } + /* RC and UD */ + qp_attr.qp_state = IBV_QPS_RTS; + qp_attr.sq_psn = 1; + + dapl_dbg_log(DAPL_DBG_TYPE_EP, + " QPS_RTS: psn %x rd_atomic %d ack %d " + " retry %d rnr_retry %d ep %p qp_state %d\n", + qp_attr.sq_psn, qp_attr.max_rd_atomic, + qp_attr.timeout, qp_attr.retry_cnt, + qp_attr.rnr_retry, ep_ptr, + ep_ptr->qp_state); + + if (qp_handle->qp_type == IBV_QPT_UD) { + /* already RTS, multi remote AH's on QP */ + if (ep_ptr->qp_state == IBV_QPS_RTS) + return DAT_SUCCESS; + else + mask = IBV_QP_STATE | IBV_QP_SQ_PSN; + } + break; + case IBV_QPS_INIT: + mask |= IBV_QP_PKEY_INDEX | IBV_QP_PORT; + if (qp_handle->qp_type == IBV_QPT_RC) { + mask |= IBV_QP_ACCESS_FLAGS; + qp_attr.qp_access_flags = + IBV_ACCESS_LOCAL_WRITE | + IBV_ACCESS_REMOTE_WRITE | + IBV_ACCESS_REMOTE_READ | + IBV_ACCESS_REMOTE_ATOMIC | + IBV_ACCESS_MW_BIND; + } + + if (qp_handle->qp_type == IBV_QPT_UD) { + /* already INIT, multi remote AH's on QP */ + if (ep_ptr->qp_state == IBV_QPS_INIT) + return DAT_SUCCESS; + mask |= IBV_QP_QKEY; + qp_attr.qkey = DAT_UD_QKEY; + } + + qp_attr.pkey_index = ia_ptr->hca_ptr->ib_trans.pkey_idx; + qp_attr.port_num = ia_ptr->hca_ptr->port_num; + + dapl_dbg_log(DAPL_DBG_TYPE_EP, + " QPS_INIT: pi %x port %x acc %x qkey 0x%x\n", + qp_attr.pkey_index, qp_attr.port_num, + qp_attr.qp_access_flags, qp_attr.qkey); + break; + default: + break; + } + + ret = ibv_modify_qp(qp_handle, &qp_attr, mask); + if (ret == 0) { + ep_ptr->qp_state = qp_state; + return DAT_SUCCESS; + } else { + return (dapl_convert_errno(errno, "modify_qp_state")); + } +} + +/* Modify UD type QP from init, rtr, rts, info network order */ +DAT_RETURN +dapls_modify_qp_ud(IN DAPL_HCA *hca, IN ib_qp_handle_t qp) +{ + struct ibv_qp_attr qp_attr; + + /* modify QP, setup and prepost buffers */ + dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr)); + qp_attr.qp_state = IBV_QPS_INIT; + qp_attr.pkey_index = hca->ib_trans.pkey_idx; + qp_attr.port_num = hca->port_num; + qp_attr.qkey = DAT_UD_QKEY; + if (ibv_modify_qp(qp, &qp_attr, + IBV_QP_STATE | + IBV_QP_PKEY_INDEX | + IBV_QP_PORT | + IBV_QP_QKEY)) { + dapl_log(DAPL_DBG_TYPE_ERR, + " modify_ud_qp INIT: ERR %s\n", strerror(errno)); + return (dapl_convert_errno(errno, "modify_qp")); + } + dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr)); + qp_attr.qp_state = IBV_QPS_RTR; + if (ibv_modify_qp(qp, &qp_attr,IBV_QP_STATE)) { + dapl_log(DAPL_DBG_TYPE_ERR, + " modify_ud_qp RTR: ERR %s\n", strerror(errno)); + return (dapl_convert_errno(errno, "modify_qp")); + } + dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr)); + qp_attr.qp_state = IBV_QPS_RTS; + qp_attr.sq_psn = 1; + if (ibv_modify_qp(qp, &qp_attr, + IBV_QP_STATE | IBV_QP_SQ_PSN)) { + dapl_log(DAPL_DBG_TYPE_ERR, + " modify_ud_qp RTS: ERR %s\n", strerror(errno)); + return (dapl_convert_errno(errno, "modify_qp")); + } + return DAT_SUCCESS; +} + +/* Create address handle for remote QP, info in network order */ +ib_ah_handle_t +dapls_create_ah(IN DAPL_HCA *hca, + IN ib_pd_handle_t pd, + IN ib_qp_handle_t qp, + IN uint16_t lid, + IN ib_gid_handle_t gid) +{ + struct ibv_qp_attr qp_attr; + ib_ah_handle_t ah; + + if (qp->qp_type != IBV_QPT_UD) { + dapl_log(DAPL_DBG_TYPE_ERR, + " create_ah ERR: QP_type != UD\n"); + return NULL; + } + + dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr)); + qp_attr.qp_state = IBV_QP_STATE; + + /* address handle. RC and UD */ + qp_attr.ah_attr.dlid = ntohs(lid); + if (gid != NULL) { + dapl_log(DAPL_DBG_TYPE_CM, "dapl_create_ah: with GID\n"); + qp_attr.ah_attr.is_global = 1; + qp_attr.ah_attr.grh.dgid.global.subnet_prefix = + ntohll(gid->global.subnet_prefix); + qp_attr.ah_attr.grh.dgid.global.interface_id = + ntohll(gid->global.interface_id); + qp_attr.ah_attr.grh.hop_limit = hca->ib_trans.hop_limit; + qp_attr.ah_attr.grh.traffic_class = hca->ib_trans.tclass; + } + qp_attr.ah_attr.sl = dapls_get_sl(hca, lid); + qp_attr.ah_attr.src_path_bits = 0; + qp_attr.ah_attr.port_num = hca->port_num; + + dapl_log(DAPL_DBG_TYPE_CM, + " dapls_create_ah: port %x lid %x pd %p ctx %p handle 0x%x\n", + hca->port_num,qp_attr.ah_attr.dlid, pd, pd->context, pd->handle); + + /* UD: create AH for remote side */ + ah = ibv_create_ah(pd, &qp_attr.ah_attr); + if (!ah) { + dapl_log(DAPL_DBG_TYPE_ERR, + " create_ah: ERR %s\n", strerror(errno)); + return NULL; + } + + dapl_log(DAPL_DBG_TYPE_CM, + " dapls_create_ah: AH %p for lid %x\n", + ah, qp_attr.ah_attr.dlid); + + return ah; +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * tab-width: 8 + * End: + */ diff --git a/trunk/ulp/dapl2/dapl/openib_scm/SOURCES b/trunk/ulp/dapl2/dapl/openib_scm/SOURCES index d4470dee..109f9afa 100644 --- a/trunk/ulp/dapl2/dapl/openib_scm/SOURCES +++ b/trunk/ulp/dapl2/dapl/openib_scm/SOURCES @@ -26,7 +26,8 @@ SOURCES = \ INCLUDES = ..\include;..\openib_common\;..\common;windows;..\..\dat\include;\ ..\..\dat\udat\windows;..\udapl\windows;..\..\..\..\inc\user\linux;\ - ..\..\..\..\inc;..\..\..\..\inc\user;..\..\..\libibverbs\include + ..\..\..\..\inc;..\..\..\..\inc\user; ..\..\..\libibverbs\include;\ + ..\..\..\librdmacm\include; DAPL_OPTS = -DEXPORT_DAPL_SYMBOLS -DDAT_EXTENSIONS -DSOCK_CM -DOPENIB -DCQ_WAIT_OBJECT diff --git a/trunk/ulp/dapl2/dapl/openib_scm/cm.c b/trunk/ulp/dapl2/dapl/openib_scm/cm.c index 56d4c73e..5e60b540 100644 --- a/trunk/ulp/dapl2/dapl/openib_scm/cm.c +++ b/trunk/ulp/dapl2/dapl/openib_scm/cm.c @@ -1,1927 +1,1931 @@ -/* - * This Software is licensed under one of the following licenses: - * - * 1) under the terms of the "Common Public License 1.0" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/cpl.php. - * - * 2) under the terms of the "The BSD License" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/bsd-license.php. - * - * 3) under the terms of the "GNU General Public License (GPL) Version 2" a - * copy of which is available from the Open Source Initiative, see - * http://www.opensource.org/licenses/gpl-license.php. - * - * Licensee has the right to choose one of the above licenses. - * - * Redistributions of source code must retain the above copyright - * notice and one of the license notices. - * - * Redistributions in binary form must reproduce both the above copyright - * notice, one of the license notices in the documentation - * and/or other materials provided with the distribution. - */ - -/*************************************************************************** - * - * Module: uDAPL - * - * Filename: dapl_ib_cm.c - * - * Author: Arlin Davis - * - * Created: 3/10/2005 - * - * Description: - * - * The uDAPL openib provider - connection management - * - **************************************************************************** - * Source Control System Information - * - * $Id: $ - * - * Copyright (c) 2005 Intel Corporation. All rights reserved. - * - **************************************************************************/ - -#if defined(_WIN32) -#define FD_SETSIZE 1024 -#define DAPL_FD_SETSIZE FD_SETSIZE -#endif - -#include "dapl.h" -#include "dapl_adapter_util.h" -#include "dapl_evd_util.h" -#include "dapl_cr_util.h" -#include "dapl_name_service.h" -#include "dapl_ib_util.h" -#include "dapl_ep_util.h" -#include "dapl_osd.h" - -/* forward declarations */ -static DAT_RETURN -dapli_socket_connect(DAPL_EP * ep_ptr, - DAT_IA_ADDRESS_PTR r_addr, - DAT_CONN_QUAL r_qual, DAT_COUNT p_size, DAT_PVOID p_data); - -#ifdef DAPL_DBG -/* Check for EP linking to IA and proper connect state */ -void dapli_ep_check(DAPL_EP *ep) -{ - DAPL_IA *ia_ptr = ep->header.owner_ia; - DAPL_EP *ep_ptr, *next_ep_ptr; - int found = 0; - - dapl_os_lock(&ia_ptr->header.lock); - ep_ptr = (dapl_llist_is_empty (&ia_ptr->ep_list_head) - ? NULL : dapl_llist_peek_head (&ia_ptr->ep_list_head)); - - while (ep_ptr != NULL) { - next_ep_ptr = - dapl_llist_next_entry(&ia_ptr->ep_list_head, - &ep_ptr->header.ia_list_entry); - if (ep == ep_ptr) { - found++; - if ((ep->cr_ptr && ep->param.ep_state - != DAT_EP_STATE_COMPLETION_PENDING) || - (!ep->cr_ptr && ep->param.ep_state - != DAT_EP_STATE_ACTIVE_CONNECTION_PENDING)) - goto err; - else - goto match; - } - ep_ptr = next_ep_ptr; - } -err: - dapl_log(DAPL_DBG_TYPE_ERR, - " dapli_ep_check ERR: %s %s ep=%p state=%d magic=0x%x\n", - ep->cr_ptr ? "PASSIVE":"ACTIVE", - found ? "WRONG_STATE":"NOT_FOUND" , - ep, ep->param.ep_state, ep->header.magic); -match: - dapl_os_unlock(&ia_ptr->header.lock); - return; -} -#else -#define dapli_ep_check(ep) -#endif - -#if defined(_WIN32) || defined(_WIN64) -enum DAPL_FD_EVENTS { - DAPL_FD_READ = 0x1, - DAPL_FD_WRITE = 0x2, - DAPL_FD_ERROR = 0x4 -}; - -static int dapl_config_socket(DAPL_SOCKET s) -{ - unsigned long nonblocking = 1; - int ret, opt = 1; - - ret = ioctlsocket(s, FIONBIO, &nonblocking); - - /* no delay for small packets */ - if (!ret) - ret = setsockopt(s, IPPROTO_TCP, TCP_NODELAY, - (char *)&opt, sizeof(opt)); - return ret; -} - -static int dapl_connect_socket(DAPL_SOCKET s, struct sockaddr *addr, - int addrlen) -{ - int err; - - err = connect(s, addr, addrlen); - if (err == SOCKET_ERROR) - err = WSAGetLastError(); - return (err == WSAEWOULDBLOCK) ? EAGAIN : err; -} - -struct dapl_fd_set { - struct fd_set set[3]; -}; - -static struct dapl_fd_set *dapl_alloc_fd_set(void) -{ - return dapl_os_alloc(sizeof(struct dapl_fd_set)); -} - -static void dapl_fd_zero(struct dapl_fd_set *set) -{ - FD_ZERO(&set->set[0]); - FD_ZERO(&set->set[1]); - FD_ZERO(&set->set[2]); -} - -static int dapl_fd_set(DAPL_SOCKET s, struct dapl_fd_set *set, - enum DAPL_FD_EVENTS event) -{ - FD_SET(s, &set->set[(event == DAPL_FD_READ) ? 0 : 1]); - FD_SET(s, &set->set[2]); - return 0; -} - -static enum DAPL_FD_EVENTS dapl_poll(DAPL_SOCKET s, enum DAPL_FD_EVENTS event) -{ - struct fd_set rw_fds; - struct fd_set err_fds; - struct timeval tv; - int ret; - - FD_ZERO(&rw_fds); - FD_ZERO(&err_fds); - FD_SET(s, &rw_fds); - FD_SET(s, &err_fds); - - tv.tv_sec = 0; - tv.tv_usec = 0; - - if (event == DAPL_FD_READ) - ret = select(1, &rw_fds, NULL, &err_fds, &tv); - else - ret = select(1, NULL, &rw_fds, &err_fds, &tv); - - if (ret == 0) - return 0; - else if (ret == SOCKET_ERROR) - return DAPL_FD_ERROR; - else if (FD_ISSET(s, &rw_fds)) - return event; - else - return DAPL_FD_ERROR; -} - -static int dapl_select(struct dapl_fd_set *set) -{ - int ret; - - dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: sleep\n"); - ret = select(0, &set->set[0], &set->set[1], &set->set[2], NULL); - dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: wakeup\n"); - - if (ret == SOCKET_ERROR) - dapl_dbg_log(DAPL_DBG_TYPE_THREAD, - " dapl_select: error 0x%x\n", WSAGetLastError()); - - return ret; -} - -static int dapl_socket_errno(void) -{ - int err; - - err = WSAGetLastError(); - switch (err) { - case WSAEACCES: - case WSAEADDRINUSE: - return EADDRINUSE; - case WSAECONNRESET: - return ECONNRESET; - default: - return err; - } -} -#else // _WIN32 || _WIN64 -enum DAPL_FD_EVENTS { - DAPL_FD_READ = POLLIN, - DAPL_FD_WRITE = POLLOUT, - DAPL_FD_ERROR = POLLERR -}; - -static int dapl_config_socket(DAPL_SOCKET s) -{ - int ret, opt = 1; - - /* non-blocking */ - ret = fcntl(s, F_GETFL); - if (ret >= 0) - ret = fcntl(s, F_SETFL, ret | O_NONBLOCK); - - /* no delay for small packets */ - if (!ret) - ret = setsockopt(s, IPPROTO_TCP, TCP_NODELAY, - (char *)&opt, sizeof(opt)); - return ret; -} - -static int dapl_connect_socket(DAPL_SOCKET s, struct sockaddr *addr, - int addrlen) -{ - int ret; - - ret = connect(s, addr, addrlen); - - return (errno == EINPROGRESS) ? EAGAIN : ret; -} - -struct dapl_fd_set { - int index; - struct pollfd set[DAPL_FD_SETSIZE]; -}; - -static struct dapl_fd_set *dapl_alloc_fd_set(void) -{ - return dapl_os_alloc(sizeof(struct dapl_fd_set)); -} - -static void dapl_fd_zero(struct dapl_fd_set *set) -{ - set->index = 0; -} - -static int dapl_fd_set(DAPL_SOCKET s, struct dapl_fd_set *set, - enum DAPL_FD_EVENTS event) -{ - if (set->index == DAPL_FD_SETSIZE - 1) { - dapl_log(DAPL_DBG_TYPE_ERR, - "SCM ERR: cm_thread exceeded FD_SETSIZE %d\n", - set->index + 1); - return -1; - } - - set->set[set->index].fd = s; - set->set[set->index].revents = 0; - set->set[set->index++].events = event; - return 0; -} - -static enum DAPL_FD_EVENTS dapl_poll(DAPL_SOCKET s, enum DAPL_FD_EVENTS event) -{ - struct pollfd fds; - int ret; - - fds.fd = s; - fds.events = event; - fds.revents = 0; - ret = poll(&fds, 1, 0); - dapl_log(DAPL_DBG_TYPE_THREAD, " dapl_poll: fd=%d ret=%d, evnts=0x%x\n", - s, ret, fds.revents); - if (ret == 0) - return 0; - else if (ret < 0 || (fds.revents & (POLLERR | POLLHUP | POLLNVAL))) - return DAPL_FD_ERROR; - else - return event; -} - -static int dapl_select(struct dapl_fd_set *set) -{ - int ret; - - dapl_dbg_log(DAPL_DBG_TYPE_THREAD, " dapl_select: sleep, fds=%d\n", set->index); - ret = poll(set->set, set->index, -1); - dapl_dbg_log(DAPL_DBG_TYPE_THREAD, " dapl_select: wakeup, ret=0x%x\n", ret); - return ret; -} - -#define dapl_socket_errno() errno -#endif - -static void dapli_cm_thread_signal(dp_ib_cm_handle_t cm_ptr) -{ - if (cm_ptr->hca) - send(cm_ptr->hca->ib_trans.scm[1], "w", sizeof "w", 0); -} - -static void dapli_cm_free(dp_ib_cm_handle_t cm_ptr) -{ - dapl_os_lock(&cm_ptr->lock); - cm_ptr->state = DCM_FREE; - dapl_os_unlock(&cm_ptr->lock); - dapli_cm_thread_signal(cm_ptr); -} - -static void dapli_cm_dealloc(dp_ib_cm_handle_t cm_ptr) -{ - dapl_os_assert(!cm_ptr->ref_count); - - if (cm_ptr->socket != DAPL_INVALID_SOCKET) { - shutdown(cm_ptr->socket, SHUT_RDWR); - closesocket(cm_ptr->socket); - } - if (cm_ptr->ah) - ibv_destroy_ah(cm_ptr->ah); - - dapl_os_lock_destroy(&cm_ptr->lock); - dapl_os_wait_object_destroy(&cm_ptr->event); - dapl_os_free(cm_ptr, sizeof(*cm_ptr)); -} - -void dapls_cm_acquire(dp_ib_cm_handle_t cm_ptr) -{ - dapl_os_lock(&cm_ptr->lock); - cm_ptr->ref_count++; - dapl_os_unlock(&cm_ptr->lock); -} - -void dapls_cm_release(dp_ib_cm_handle_t cm_ptr) -{ - dapl_os_lock(&cm_ptr->lock); - cm_ptr->ref_count--; - if (cm_ptr->ref_count) { - dapl_os_unlock(&cm_ptr->lock); - return; - } - dapl_os_unlock(&cm_ptr->lock); - dapli_cm_dealloc(cm_ptr); -} - -static dp_ib_cm_handle_t dapli_cm_alloc(DAPL_EP *ep_ptr) -{ - dp_ib_cm_handle_t cm_ptr; - - /* Allocate CM, init lock, and initialize */ - if ((cm_ptr = dapl_os_alloc(sizeof(*cm_ptr))) == NULL) - return NULL; - - (void)dapl_os_memzero(cm_ptr, sizeof(*cm_ptr)); - if (dapl_os_lock_init(&cm_ptr->lock)) - goto bail; - - if (dapl_os_wait_object_init(&cm_ptr->event)) { - dapl_os_lock_destroy(&cm_ptr->lock); - goto bail; - } - dapl_llist_init_entry((DAPL_LLIST_ENTRY *)&cm_ptr->list_entry); - dapl_llist_init_entry((DAPL_LLIST_ENTRY *)&cm_ptr->local_entry); - - cm_ptr->msg.ver = htons(DCM_VER); - cm_ptr->socket = DAPL_INVALID_SOCKET; - dapls_cm_acquire(cm_ptr); - - /* Link EP and CM */ - if (ep_ptr != NULL) { - dapl_ep_link_cm(ep_ptr, cm_ptr); /* ref++ */ - cm_ptr->ep = ep_ptr; - cm_ptr->hca = ((DAPL_IA *)ep_ptr->param.ia_handle)->hca_ptr; - } - return cm_ptr; -bail: - dapl_os_free(cm_ptr, sizeof(*cm_ptr)); - return NULL; -} - -/* queue socket for processing CM work */ -static void dapli_cm_queue(dp_ib_cm_handle_t cm_ptr) -{ - /* add to work queue for cr thread processing */ - dapl_os_lock(&cm_ptr->hca->ib_trans.lock); - dapls_cm_acquire(cm_ptr); - dapl_llist_add_tail(&cm_ptr->hca->ib_trans.list, - (DAPL_LLIST_ENTRY *)&cm_ptr->local_entry, cm_ptr); - dapl_os_unlock(&cm_ptr->hca->ib_trans.lock); - dapli_cm_thread_signal(cm_ptr); -} - -/* called with local LIST lock */ -static void dapli_cm_dequeue(dp_ib_cm_handle_t cm_ptr) -{ - /* Remove from work queue, cr thread processing */ - dapl_llist_remove_entry(&cm_ptr->hca->ib_trans.list, - (DAPL_LLIST_ENTRY *)&cm_ptr->local_entry); - dapls_cm_release(cm_ptr); -} - -/* BLOCKING: called from dapl_ep_free, EP link will be last ref */ -void dapls_cm_free(dp_ib_cm_handle_t cm_ptr) -{ - dapl_log(DAPL_DBG_TYPE_CM, - " cm_free: cm %p %s ep %p refs=%d\n", - cm_ptr, dapl_cm_state_str(cm_ptr->state), - cm_ptr->ep, cm_ptr->ref_count); - - /* free from internal workq, wait until EP is last ref */ - dapl_os_lock(&cm_ptr->lock); - cm_ptr->state = DCM_FREE; - while (cm_ptr->ref_count != 1) { - dapli_cm_thread_signal(cm_ptr); - dapl_os_unlock(&cm_ptr->lock); - dapl_os_sleep_usec(10000); - dapl_os_lock(&cm_ptr->lock); - } - dapl_os_unlock(&cm_ptr->lock); - - /* unlink, dequeue from EP. Final ref so release will destroy */ - dapl_ep_unlink_cm(cm_ptr->ep, cm_ptr); -} - -/* - * ACTIVE/PASSIVE: called from CR thread or consumer via ep_disconnect - * or from ep_free. - */ -DAT_RETURN dapli_socket_disconnect(dp_ib_cm_handle_t cm_ptr) -{ - DAT_UINT32 disc_data = htonl(0xdead); - - dapl_os_lock(&cm_ptr->lock); - if (cm_ptr->state != DCM_CONNECTED || - cm_ptr->state == DCM_DISCONNECTED) { - dapl_os_unlock(&cm_ptr->lock); - return DAT_SUCCESS; - } - cm_ptr->state = DCM_DISCONNECTED; - dapl_os_unlock(&cm_ptr->lock); - - /* send disc date, close socket, schedule destroy */ - send(cm_ptr->socket, (char *)&disc_data, sizeof(disc_data), 0); - - /* disconnect events for RC's only */ - if (cm_ptr->ep->param.ep_attr.service_type == DAT_SERVICE_TYPE_RC) { - dapl_os_lock(&cm_ptr->ep->header.lock); - dapls_modify_qp_state(cm_ptr->ep->qp_handle, IBV_QPS_ERR, 0,0,0); - dapl_os_unlock(&cm_ptr->ep->header.lock); - if (cm_ptr->ep->cr_ptr) { - dapls_cr_callback(cm_ptr, - IB_CME_DISCONNECTED, - NULL, 0, cm_ptr->sp); - } else { - dapl_evd_connection_callback(cm_ptr, - IB_CME_DISCONNECTED, - NULL, 0, cm_ptr->ep); - } - } - - /* release from workq */ - dapli_cm_free(cm_ptr); - - /* scheduled destroy via disconnect clean in callback */ - return DAT_SUCCESS; -} - -/* - * ACTIVE: socket connected, send QP information to peer - */ -static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err) -{ - int len, exp; - struct iovec iov[2]; - struct dapl_ep *ep_ptr = cm_ptr->ep; - - if (err) { - dapl_log(DAPL_DBG_TYPE_ERR, - " CONN_PENDING: %s ERR %s -> %s %d - %s\n", - err == -1 ? "POLL" : "SOCKOPT", - err == -1 ? strerror(dapl_socket_errno()) : strerror(err), - inet_ntoa(((struct sockaddr_in *) - &cm_ptr->addr)->sin_addr), - ntohs(((struct sockaddr_in *) - &cm_ptr->addr)->sin_port), - err == ETIMEDOUT ? "RETRYING...":"ABORTING"); - - /* retry a timeout */ - if (err == ETIMEDOUT) { - closesocket(cm_ptr->socket); - cm_ptr->socket = DAPL_INVALID_SOCKET; - dapli_socket_connect(cm_ptr->ep, (DAT_IA_ADDRESS_PTR)&cm_ptr->addr, - ntohs(((struct sockaddr_in *)&cm_ptr->addr)->sin_port) - 1000, - ntohs(cm_ptr->msg.p_size), &cm_ptr->msg.p_data); - dapl_ep_unlink_cm(cm_ptr->ep, cm_ptr); - dapli_cm_free(cm_ptr); - return; - } - - goto bail; - } - - dapl_os_lock(&cm_ptr->lock); - cm_ptr->state = DCM_REP_PENDING; - dapl_os_unlock(&cm_ptr->lock); - - /* send qp info and pdata to remote peer */ - exp = sizeof(ib_cm_msg_t) - DCM_MAX_PDATA_SIZE; - iov[0].iov_base = (void *)&cm_ptr->msg; - iov[0].iov_len = exp; - if (cm_ptr->msg.p_size) { - iov[1].iov_base = cm_ptr->msg.p_data; - iov[1].iov_len = ntohs(cm_ptr->msg.p_size); - len = writev(cm_ptr->socket, iov, 2); - } else { - len = writev(cm_ptr->socket, iov, 1); - } - - if (len != (exp + ntohs(cm_ptr->msg.p_size))) { - int err = dapl_socket_errno(); - dapl_log(DAPL_DBG_TYPE_ERR, - " CONN_PENDING len ERR 0x%x %s, wcnt=%d(%d) -> %s\n", - err, strerror(err), len, - exp + ntohs(cm_ptr->msg.p_size), - inet_ntoa(((struct sockaddr_in *) - ep_ptr->param. - remote_ia_address_ptr)->sin_addr)); - goto bail; - } - - dapl_dbg_log(DAPL_DBG_TYPE_CM, - " CONN_PENDING: sending SRC lid=0x%x," - " qpn=0x%x, psize=%d\n", - ntohs(cm_ptr->msg.saddr.ib.lid), - ntohl(cm_ptr->msg.saddr.ib.qpn), - ntohs(cm_ptr->msg.p_size)); - dapl_dbg_log(DAPL_DBG_TYPE_CM, - " CONN_PENDING: SRC GID subnet %016llx id %016llx\n", - (unsigned long long) - htonll(*(uint64_t*)&cm_ptr->msg.saddr.ib.gid[0]), - (unsigned long long) - htonll(*(uint64_t*)&cm_ptr->msg.saddr.ib.gid[8])); - return; - -bail: - /* mark CM object for cleanup */ - dapli_cm_free(cm_ptr); - dapl_evd_connection_callback(NULL, IB_CME_LOCAL_FAILURE, NULL, 0, ep_ptr); -} - -/* - * ACTIVE: Create socket, connect, defer exchange QP information to CR thread - * to avoid blocking. - */ -static DAT_RETURN -dapli_socket_connect(DAPL_EP * ep_ptr, - DAT_IA_ADDRESS_PTR r_addr, - DAT_CONN_QUAL r_qual, DAT_COUNT p_size, DAT_PVOID p_data) -{ - dp_ib_cm_handle_t cm_ptr; - int ret; - socklen_t sl; - DAPL_IA *ia_ptr = ep_ptr->header.owner_ia; - DAT_RETURN dat_ret = DAT_INSUFFICIENT_RESOURCES; - - dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect: r_qual %d p_size=%d\n", - r_qual, p_size); - - cm_ptr = dapli_cm_alloc(ep_ptr); - if (cm_ptr == NULL) - return dat_ret; - - /* create, connect, sockopt, and exchange QP information */ - if ((cm_ptr->socket = - socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == DAPL_INVALID_SOCKET) { - int err = dapl_socket_errno(); - dapl_log(DAPL_DBG_TYPE_ERR, - " connect: socket create ERR 0x%x %s\n", - err, strerror(err)); - goto bail; - } - - ret = dapl_config_socket(cm_ptr->socket); - if (ret < 0) { - dapl_log(DAPL_DBG_TYPE_ERR, - " connect: config socket %d RET %d ERR 0x%x %s\n", - cm_ptr->socket, ret, - dapl_socket_errno(), strerror(dapl_socket_errno())); - dat_ret = DAT_INTERNAL_ERROR; - goto bail; - } - - /* save remote address */ - dapl_os_memcpy(&cm_ptr->addr, r_addr, sizeof(*r_addr)); - -#ifdef DAPL_DBG - /* DBG: Active PID [0], PASSIVE PID [2]*/ - *(uint16_t*)&cm_ptr->msg.resv[0] = htons((uint16_t)dapl_os_getpid()); - *(uint16_t*)&cm_ptr->msg.resv[2] = ((struct sockaddr_in *)&cm_ptr->addr)->sin_port; -#endif - ((struct sockaddr_in *)&cm_ptr->addr)->sin_port = htons(r_qual + 1000); - ret = dapl_connect_socket(cm_ptr->socket, (struct sockaddr *)&cm_ptr->addr, - sizeof(cm_ptr->addr)); - if (ret && ret != EAGAIN) { - dapl_log(DAPL_DBG_TYPE_ERR, - " connect: dapl_connect_socket RET %d ERR 0x%x %s\n", - ret, dapl_socket_errno(), - strerror(dapl_socket_errno())); - dat_ret = DAT_INVALID_ADDRESS; - goto bail; - } - - /* REQ: QP info in msg.saddr, IA address in msg.daddr, and pdata */ - cm_ptr->hca = ia_ptr->hca_ptr; - cm_ptr->msg.op = ntohs(DCM_REQ); - cm_ptr->msg.saddr.ib.qpn = htonl(ep_ptr->qp_handle->qp_num); - cm_ptr->msg.saddr.ib.qp_type = ep_ptr->qp_handle->qp_type; - cm_ptr->msg.saddr.ib.lid = ia_ptr->hca_ptr->ib_trans.lid; - dapl_os_memcpy(&cm_ptr->msg.saddr.ib.gid[0], - &ia_ptr->hca_ptr->ib_trans.gid, 16); - - /* get local address information from socket */ - sl = sizeof(cm_ptr->msg.daddr.so); - if (getsockname(cm_ptr->socket, (struct sockaddr *)&cm_ptr->msg.daddr.so, &sl)) { - int err = dapl_socket_errno(); - dapl_log(DAPL_DBG_TYPE_ERR, - " connect getsockname ERROR: 0x%x %s -> %s r_qual %d\n", - err, strerror(err), - inet_ntoa(((struct sockaddr_in *)r_addr)->sin_addr), - (unsigned int)r_qual);; - } - - if (p_size) { - cm_ptr->msg.p_size = htons(p_size); - dapl_os_memcpy(cm_ptr->msg.p_data, p_data, p_size); - } - - /* connected or pending, either way results via async event */ - if (ret == 0) - dapli_socket_connected(cm_ptr, 0); - else - cm_ptr->state = DCM_CONN_PENDING; - - dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect: p_data=%p %p\n", - cm_ptr->msg.p_data, cm_ptr->msg.p_data); - - dapl_dbg_log(DAPL_DBG_TYPE_EP, - " connect: %s r_qual %d pending, p_sz=%d, %d %d ...\n", - inet_ntoa(((struct sockaddr_in *)&cm_ptr->addr)->sin_addr), - (unsigned int)r_qual, ntohs(cm_ptr->msg.p_size), - cm_ptr->msg.p_data[0], cm_ptr->msg.p_data[1]); - - /* queue up on work thread */ - dapli_cm_queue(cm_ptr); - return DAT_SUCCESS; -bail: - dapl_log(DAPL_DBG_TYPE_ERR, - " connect ERROR: -> %s r_qual %d\n", - inet_ntoa(((struct sockaddr_in *)r_addr)->sin_addr), - (unsigned int)r_qual); - - /* Never queued, destroy */ - dapls_cm_release(cm_ptr); - return dat_ret; -} - -/* - * ACTIVE: exchange QP information, called from CR thread - */ -static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr) -{ - DAPL_EP *ep_ptr = cm_ptr->ep; - int len, exp = sizeof(ib_cm_msg_t) - DCM_MAX_PDATA_SIZE; - ib_cm_events_t event = IB_CME_LOCAL_FAILURE; - socklen_t sl; - - /* read DST information into cm_ptr, overwrite SRC info */ - dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect_rtu: recv peer QP data\n"); - - len = recv(cm_ptr->socket, (char *)&cm_ptr->msg, exp, 0); - if (len != exp || ntohs(cm_ptr->msg.ver) != DCM_VER) { - int err = dapl_socket_errno(); - dapl_log(DAPL_DBG_TYPE_WARN, - " CONN_RTU read: sk %d ERR 0x%x, rcnt=%d, v=%d -> %s PORT L-%x R-%x PID L-%x R-%x\n", - cm_ptr->socket, err, len, ntohs(cm_ptr->msg.ver), - inet_ntoa(((struct sockaddr_in *)&cm_ptr->addr)->sin_addr), - ntohs(((struct sockaddr_in *)&cm_ptr->msg.daddr.so)->sin_port), - ntohs(((struct sockaddr_in *)&cm_ptr->addr)->sin_port), - ntohs(*(uint16_t*)&cm_ptr->msg.resv[0]), - ntohs(*(uint16_t*)&cm_ptr->msg.resv[2])); - - /* Retry; corner case where server tcp stack resets under load */ - if (err == ECONNRESET) { - closesocket(cm_ptr->socket); - cm_ptr->socket = DAPL_INVALID_SOCKET; - dapli_socket_connect(cm_ptr->ep, (DAT_IA_ADDRESS_PTR)&cm_ptr->addr, - ntohs(((struct sockaddr_in *)&cm_ptr->addr)->sin_port) - 1000, - ntohs(cm_ptr->msg.p_size), &cm_ptr->msg.p_data); - dapl_ep_unlink_cm(cm_ptr->ep, cm_ptr); - dapli_cm_free(cm_ptr); - return; - } - goto bail; - } - - /* keep the QP, address info in network order */ - - /* save remote address information, in msg.daddr */ - dapl_os_memcpy(&cm_ptr->addr, - &cm_ptr->msg.daddr.so, - sizeof(union dcm_addr)); - - /* save local address information from socket */ - sl = sizeof(cm_ptr->addr); - getsockname(cm_ptr->socket,(struct sockaddr *)&cm_ptr->addr, &sl); - - dapl_dbg_log(DAPL_DBG_TYPE_EP, - " CONN_RTU: DST %s %d lid=0x%x," - " qpn=0x%x, qp_type=%d, psize=%d\n", - inet_ntoa(((struct sockaddr_in *) - &cm_ptr->msg.daddr.so)->sin_addr), - ntohs(((struct sockaddr_in *) - &cm_ptr->msg.daddr.so)->sin_port), - ntohs(cm_ptr->msg.saddr.ib.lid), - ntohl(cm_ptr->msg.saddr.ib.qpn), - cm_ptr->msg.saddr.ib.qp_type, - ntohs(cm_ptr->msg.p_size)); - - /* validate private data size before reading */ - if (ntohs(cm_ptr->msg.p_size) > DCM_MAX_PDATA_SIZE) { - dapl_log(DAPL_DBG_TYPE_ERR, - " CONN_RTU read: psize (%d) wrong -> %s\n", - ntohs(cm_ptr->msg.p_size), - inet_ntoa(((struct sockaddr_in *) - ep_ptr->param. - remote_ia_address_ptr)->sin_addr)); - goto bail; - } - - /* read private data into cm_handle if any present */ - dapl_dbg_log(DAPL_DBG_TYPE_EP," CONN_RTU: read private data\n"); - exp = ntohs(cm_ptr->msg.p_size); - if (exp) { - len = recv(cm_ptr->socket, cm_ptr->msg.p_data, exp, 0); - if (len != exp) { - int err = dapl_socket_errno(); - dapl_log(DAPL_DBG_TYPE_ERR, - " CONN_RTU read pdata: ERR 0x%x %s, rcnt=%d -> %s\n", - err, strerror(err), len, - inet_ntoa(((struct sockaddr_in *) - ep_ptr->param. - remote_ia_address_ptr)->sin_addr)); - goto bail; - } - } - - /* check for consumer or protocol stack reject */ - if (ntohs(cm_ptr->msg.op) == DCM_REP) - event = IB_CME_CONNECTED; - else if (ntohs(cm_ptr->msg.op) == DCM_REJ_USER) - event = IB_CME_DESTINATION_REJECT_PRIVATE_DATA; - else - event = IB_CME_DESTINATION_REJECT; - - if (event != IB_CME_CONNECTED) { - dapl_log(DAPL_DBG_TYPE_CM, - " CONN_RTU: reject from %s %x\n", - inet_ntoa(((struct sockaddr_in *) - &cm_ptr->msg.daddr.so)->sin_addr), - ntohs(((struct sockaddr_in *) - &cm_ptr->msg.daddr.so)->sin_port)); - goto bail; - } - - /* modify QP to RTR and then to RTS with remote info */ - dapl_os_lock(&ep_ptr->header.lock); - if (dapls_modify_qp_state(ep_ptr->qp_handle, - IBV_QPS_RTR, - cm_ptr->msg.saddr.ib.qpn, - cm_ptr->msg.saddr.ib.lid, - (ib_gid_handle_t)cm_ptr->msg.saddr.ib.gid) != DAT_SUCCESS) { - dapl_log(DAPL_DBG_TYPE_ERR, - " CONN_RTU: QPS_RTR ERR %s (%d,%d,%x,%x,%x) -> %s %x\n", - strerror(errno), ep_ptr->qp_handle->qp_type, - ep_ptr->qp_state, ep_ptr->qp_handle->qp_num, - ntohl(cm_ptr->msg.saddr.ib.qpn), - ntohs(cm_ptr->msg.saddr.ib.lid), - inet_ntoa(((struct sockaddr_in *) - &cm_ptr->msg.daddr.so)->sin_addr), - ntohs(((struct sockaddr_in *) - &cm_ptr->msg.daddr.so)->sin_port)); - dapl_os_unlock(&ep_ptr->header.lock); - goto bail; - } - if (dapls_modify_qp_state(ep_ptr->qp_handle, - IBV_QPS_RTS, - cm_ptr->msg.saddr.ib.qpn, - cm_ptr->msg.saddr.ib.lid, - NULL) != DAT_SUCCESS) { - dapl_log(DAPL_DBG_TYPE_ERR, - " CONN_RTU: QPS_RTS ERR %s (%d,%d,%x,%x,%x) -> %s %x\n", - strerror(errno), ep_ptr->qp_handle->qp_type, - ep_ptr->qp_state, ep_ptr->qp_handle->qp_num, - ntohl(cm_ptr->msg.saddr.ib.qpn), - ntohs(cm_ptr->msg.saddr.ib.lid), - inet_ntoa(((struct sockaddr_in *) - &cm_ptr->msg.daddr.so)->sin_addr), - ntohs(((struct sockaddr_in *) - &cm_ptr->msg.daddr.so)->sin_port)); - dapl_os_unlock(&ep_ptr->header.lock); - goto bail; - } - dapl_os_unlock(&ep_ptr->header.lock); - dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect_rtu: send RTU\n"); - - /* complete handshake after final QP state change, Just ver+op */ - dapl_os_lock(&cm_ptr->lock); - cm_ptr->state = DCM_CONNECTED; - dapl_os_unlock(&cm_ptr->lock); - - cm_ptr->msg.op = ntohs(DCM_RTU); - if (send(cm_ptr->socket, (char *)&cm_ptr->msg, 4, 0) == -1) { - int err = dapl_socket_errno(); - dapl_log(DAPL_DBG_TYPE_ERR, - " CONN_RTU: write ERR = 0x%x %s\n", - err, strerror(err)); - goto bail; - } - /* post the event with private data */ - event = IB_CME_CONNECTED; - dapl_dbg_log(DAPL_DBG_TYPE_EP, " ACTIVE: connected!\n"); - -#ifdef DAT_EXTENSIONS -ud_bail: - if (cm_ptr->msg.saddr.ib.qp_type == IBV_QPT_UD) { - DAT_IB_EXTENSION_EVENT_DATA xevent; - ib_pd_handle_t pd_handle = - ((DAPL_PZ *)ep_ptr->param.pz_handle)->pd_handle; - - if (event == IB_CME_CONNECTED) { - cm_ptr->ah = dapls_create_ah(cm_ptr->hca, pd_handle, - ep_ptr->qp_handle, - cm_ptr->msg.saddr.ib.lid, - NULL); - if (cm_ptr->ah) { - /* post UD extended EVENT */ - xevent.status = 0; - xevent.type = DAT_IB_UD_REMOTE_AH; - xevent.remote_ah.ah = cm_ptr->ah; - xevent.remote_ah.qpn = ntohl(cm_ptr->msg.saddr.ib.qpn); - dapl_os_memcpy(&xevent.remote_ah.ia_addr, - &ep_ptr->remote_ia_address, - sizeof(union dcm_addr)); - event = DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED; - - dapl_log(DAPL_DBG_TYPE_CM, - " CONN_RTU: UD AH %p for lid 0x%x" - " qpn 0x%x\n", - cm_ptr->ah, - ntohs(cm_ptr->msg.saddr.ib.lid), - ntohl(cm_ptr->msg.saddr.ib.qpn)); - - } else - event = DAT_IB_UD_CONNECTION_ERROR_EVENT; - - } else if (event == IB_CME_LOCAL_FAILURE) { - event = DAT_IB_UD_CONNECTION_ERROR_EVENT; - } else - event = DAT_IB_UD_CONNECTION_REJECT_EVENT; - - dapls_evd_post_connection_event_ext( - (DAPL_EVD *) ep_ptr->param.connect_evd_handle, - event, - (DAT_EP_HANDLE) ep_ptr, - (DAT_COUNT) exp, - (DAT_PVOID *) cm_ptr->msg.p_data, - (DAT_PVOID *) &xevent); - - /* cleanup and release from local list */ - dapli_cm_free(cm_ptr); - - } else -#endif - { - dapli_ep_check(cm_ptr->ep); - dapl_evd_connection_callback(cm_ptr, event, cm_ptr->msg.p_data, - DCM_MAX_PDATA_SIZE, ep_ptr); - } - dapl_log(DAPL_DBG_TYPE_CM_EST, - " SCM ACTIVE CONN: %x -> %s %x\n", - ntohs(((struct sockaddr_in *) &cm_ptr->addr)->sin_port), - inet_ntoa(((struct sockaddr_in *) &cm_ptr->msg.daddr.so)->sin_addr), - ntohs(((struct sockaddr_in *) &cm_ptr->msg.daddr.so)->sin_port)-1000); - return; - -bail: - -#ifdef DAT_EXTENSIONS - if (cm_ptr->msg.saddr.ib.qp_type == IBV_QPT_UD) - goto ud_bail; -#endif - /* close socket, and post error event */ - dapl_os_lock(&cm_ptr->lock); - cm_ptr->state = DCM_REJECTED; - dapl_os_unlock(&cm_ptr->lock); - - dapl_evd_connection_callback(NULL, event, cm_ptr->msg.p_data, - DCM_MAX_PDATA_SIZE, ep_ptr); - dapli_cm_free(cm_ptr); -} - -/* - * PASSIVE: Create socket, listen, accept, exchange QP information - */ -DAT_RETURN -dapli_socket_listen(DAPL_IA * ia_ptr, DAT_CONN_QUAL serviceID, DAPL_SP * sp_ptr) -{ - struct sockaddr_in addr; - ib_cm_srvc_handle_t cm_ptr = NULL; - DAT_RETURN dat_status = DAT_SUCCESS; - int opt = 1; - - dapl_dbg_log(DAPL_DBG_TYPE_CM, - " setup listen(ia_ptr %p ServiceID %d sp_ptr %p)\n", - ia_ptr, serviceID, sp_ptr); - - cm_ptr = dapli_cm_alloc(NULL); - if (cm_ptr == NULL) - return DAT_INSUFFICIENT_RESOURCES; - - cm_ptr->sp = sp_ptr; - cm_ptr->hca = ia_ptr->hca_ptr; - - /* bind, listen, set sockopt, accept, exchange data */ - if ((cm_ptr->socket = - socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == DAPL_INVALID_SOCKET) { - int err = dapl_socket_errno(); - dapl_log(DAPL_DBG_TYPE_ERR, - " listen: socket create: ERR 0x%x %s\n", - err, strerror(err)); - dat_status = DAT_INSUFFICIENT_RESOURCES; - goto bail; - } - - setsockopt(cm_ptr->socket, SOL_SOCKET, SO_REUSEADDR, (char*)&opt, sizeof(opt)); - addr.sin_port = htons(serviceID + 1000); - addr.sin_family = AF_INET; - addr.sin_addr = ((struct sockaddr_in *) &ia_ptr->hca_ptr->hca_address)->sin_addr; - - if ((bind(cm_ptr->socket, (struct sockaddr *)&addr, sizeof(addr)) < 0) - || (listen(cm_ptr->socket, 128) < 0)) { - int err = dapl_socket_errno(); - dapl_log(DAPL_DBG_TYPE_CM, - " listen: ERROR 0x%x %s on port %d\n", - err, strerror(err), serviceID + 1000); - if (err == EADDRINUSE) - dat_status = DAT_CONN_QUAL_IN_USE; - else - dat_status = DAT_CONN_QUAL_UNAVAILABLE; - goto bail; - } - - /* set cm_handle for this service point, save listen socket */ - sp_ptr->cm_srvc_handle = cm_ptr; - dapl_os_memcpy(&cm_ptr->addr, &addr, sizeof(addr)); - - /* queue up listen socket to process inbound CR's */ - cm_ptr->state = DCM_LISTEN; - dapli_cm_queue(cm_ptr); - - dapl_dbg_log(DAPL_DBG_TYPE_CM, - " setup listen: port %d cr %p s_fd %d\n", - serviceID + 1000, cm_ptr, cm_ptr->socket); - - return dat_status; -bail: - /* Never queued, destroy here */ - dapls_cm_release(cm_ptr); - return dat_status; -} - -/* - * PASSIVE: accept socket - */ -static void dapli_socket_accept(ib_cm_srvc_handle_t cm_ptr) -{ - dp_ib_cm_handle_t acm_ptr; - int ret, len, opt = 1; - socklen_t sl; - - /* - * Accept all CR's on this port to avoid half-connection (SYN_RCV) - * stalls with many to one connection storms - */ - do { - /* Allocate accept CM and initialize */ - if ((acm_ptr = dapli_cm_alloc(NULL)) == NULL) - return; - - acm_ptr->sp = cm_ptr->sp; - acm_ptr->hca = cm_ptr->hca; - - len = sizeof(union dcm_addr); - acm_ptr->socket = accept(cm_ptr->socket, - (struct sockaddr *) - &acm_ptr->msg.daddr.so, - (socklen_t *) &len); - if (acm_ptr->socket == DAPL_INVALID_SOCKET) { - int err = dapl_socket_errno(); - dapl_log(DAPL_DBG_TYPE_ERR, - " ACCEPT: ERR 0x%x %s on FD %d l_cr %p\n", - err, strerror(err), cm_ptr->socket, cm_ptr); - dapls_cm_release(acm_ptr); - return; - } - dapl_dbg_log(DAPL_DBG_TYPE_CM, " accepting from %s %x\n", - inet_ntoa(((struct sockaddr_in *) - &acm_ptr->msg.daddr.so)->sin_addr), - ntohs(((struct sockaddr_in *) - &acm_ptr->msg.daddr.so)->sin_port)); - - /* no delay for small packets */ - ret = setsockopt(acm_ptr->socket, IPPROTO_TCP, TCP_NODELAY, - (char *)&opt, sizeof(opt)); - if (ret) { - int err = dapl_socket_errno(); - dapl_log(DAPL_DBG_TYPE_ERR, - " ACCEPT: NODELAY setsockopt:" - " RET %d ERR 0x%x %s\n", - ret, err, strerror(err)); - } - - /* get local address information from socket */ - sl = sizeof(acm_ptr->addr); - getsockname(acm_ptr->socket, (struct sockaddr *)&acm_ptr->addr, &sl); - acm_ptr->state = DCM_ACCEPTING; - dapli_cm_queue(acm_ptr); - - } while (dapl_poll(cm_ptr->socket, DAPL_FD_READ) == DAPL_FD_READ); -} - -/* - * PASSIVE: receive peer QP information, private data, post cr_event - */ -static void dapli_socket_accept_data(ib_cm_srvc_handle_t acm_ptr) -{ - int len, exp = sizeof(ib_cm_msg_t) - DCM_MAX_PDATA_SIZE; - void *p_data = NULL; - - dapl_dbg_log(DAPL_DBG_TYPE_EP, " socket accepted, read QP data\n"); - - /* read in DST QP info, IA address. check for private data */ - len = recv(acm_ptr->socket, (char *)&acm_ptr->msg, exp, 0); - if (len != exp || ntohs(acm_ptr->msg.ver) != DCM_VER) { - int err = dapl_socket_errno(); - dapl_log(DAPL_DBG_TYPE_ERR, - " ACCEPT read: ERR 0x%x %s, rcnt=%d, ver=%d\n", - err, strerror(err), len, ntohs(acm_ptr->msg.ver)); - goto bail; - } - - /* keep the QP, address info in network order */ - - /* validate private data size before reading */ - exp = ntohs(acm_ptr->msg.p_size); - if (exp > DCM_MAX_PDATA_SIZE) { - dapl_log(DAPL_DBG_TYPE_ERR, - " accept read: psize (%d) wrong\n", - acm_ptr->msg.p_size); - goto bail; - } - - /* read private data into cm_handle if any present */ - if (exp) { - len = recv(acm_ptr->socket, acm_ptr->msg.p_data, exp, 0); - if (len != exp) { - int err = dapl_socket_errno(); - dapl_log(DAPL_DBG_TYPE_ERR, - " accept read pdata: ERR 0x%x %s, rcnt=%d\n", - err, strerror(err), len); - goto bail; - } - p_data = acm_ptr->msg.p_data; - } - dapl_os_lock(&acm_ptr->lock); - acm_ptr->state = DCM_ACCEPTING_DATA; - dapl_os_unlock(&acm_ptr->lock); - - dapl_dbg_log(DAPL_DBG_TYPE_CM, - " ACCEPT: DST %s %x lid=0x%x, qpn=0x%x, psz=%d\n", - inet_ntoa(((struct sockaddr_in *) - &acm_ptr->msg.daddr.so)->sin_addr), - ntohs(((struct sockaddr_in *) - &acm_ptr->msg.daddr.so)->sin_port), - ntohs(acm_ptr->msg.saddr.ib.lid), - ntohl(acm_ptr->msg.saddr.ib.qpn), exp); - -#ifdef DAT_EXTENSIONS - if (acm_ptr->msg.saddr.ib.qp_type == IBV_QPT_UD) { - DAT_IB_EXTENSION_EVENT_DATA xevent; - - /* post EVENT, modify_qp created ah */ - xevent.status = 0; - xevent.type = DAT_IB_UD_CONNECT_REQUEST; - - dapls_evd_post_cr_event_ext(acm_ptr->sp, - DAT_IB_UD_CONNECTION_REQUEST_EVENT, - acm_ptr, - (DAT_COUNT) exp, - (DAT_PVOID *) acm_ptr->msg.p_data, - (DAT_PVOID *) &xevent); - } else -#endif - /* trigger CR event and return SUCCESS */ - dapls_cr_callback(acm_ptr, - IB_CME_CONNECTION_REQUEST_PENDING, - p_data, exp, acm_ptr->sp); - return; -bail: - /* mark for destroy, active will see socket close as rej */ - dapli_cm_free(acm_ptr); - return; -} - -/* - * PASSIVE: consumer accept, send local QP information, private data, - * queue on work thread to receive RTU information to avoid blocking - * user thread. - */ -static DAT_RETURN -dapli_socket_accept_usr(DAPL_EP * ep_ptr, - DAPL_CR * cr_ptr, DAT_COUNT p_size, DAT_PVOID p_data) -{ - DAPL_IA *ia_ptr = ep_ptr->header.owner_ia; - dp_ib_cm_handle_t cm_ptr = cr_ptr->ib_cm_handle; - ib_cm_msg_t local; - struct iovec iov[2]; - int len, exp = sizeof(ib_cm_msg_t) - DCM_MAX_PDATA_SIZE; - DAT_RETURN ret = DAT_INTERNAL_ERROR; - socklen_t sl; - - if (p_size > DCM_MAX_PDATA_SIZE) { - dapl_log(DAPL_DBG_TYPE_ERR, - " accept_usr: psize(%d) too large\n", p_size); - return DAT_LENGTH_ERROR; - } - - /* must have a accepted socket */ - if (cm_ptr->socket == DAPL_INVALID_SOCKET) { - dapl_log(DAPL_DBG_TYPE_ERR, - " accept_usr: cm socket invalid\n"); - goto bail; - } - - dapl_dbg_log(DAPL_DBG_TYPE_CM, - " ACCEPT_USR: remote lid=0x%x" - " qpn=0x%x qp_type %d, psize=%d\n", - ntohs(cm_ptr->msg.saddr.ib.lid), - ntohl(cm_ptr->msg.saddr.ib.qpn), - cm_ptr->msg.saddr.ib.qp_type, - ntohs(cm_ptr->msg.p_size)); - -#ifdef DAT_EXTENSIONS - if (cm_ptr->msg.saddr.ib.qp_type == IBV_QPT_UD && - ep_ptr->qp_handle->qp_type != IBV_QPT_UD) { - dapl_log(DAPL_DBG_TYPE_ERR, - " ACCEPT_USR: ERR remote QP is UD," - ", but local QP is not\n"); - ret = (DAT_INVALID_HANDLE | DAT_INVALID_HANDLE_EP); - goto bail; - } -#endif - - /* modify QP to RTR and then to RTS with remote info already read */ - dapl_os_lock(&ep_ptr->header.lock); - if (dapls_modify_qp_state(ep_ptr->qp_handle, - IBV_QPS_RTR, - cm_ptr->msg.saddr.ib.qpn, - cm_ptr->msg.saddr.ib.lid, - (ib_gid_handle_t)cm_ptr->msg.saddr.ib.gid) != DAT_SUCCESS) { - dapl_log(DAPL_DBG_TYPE_ERR, - " ACCEPT_USR: QPS_RTR ERR %s -> %s\n", - strerror(errno), - inet_ntoa(((struct sockaddr_in *) - &cm_ptr->msg.daddr.so)->sin_addr)); - dapl_os_unlock(&ep_ptr->header.lock); - goto bail; - } - if (dapls_modify_qp_state(ep_ptr->qp_handle, - IBV_QPS_RTS, - cm_ptr->msg.saddr.ib.qpn, - cm_ptr->msg.saddr.ib.lid, - NULL) != DAT_SUCCESS) { - dapl_log(DAPL_DBG_TYPE_ERR, - " ACCEPT_USR: QPS_RTS ERR %s -> %s\n", - strerror(errno), - inet_ntoa(((struct sockaddr_in *) - &cm_ptr->msg.daddr.so)->sin_addr)); - dapl_os_unlock(&ep_ptr->header.lock); - goto bail; - } - dapl_os_unlock(&ep_ptr->header.lock); - - /* save remote address information */ - dapl_os_memcpy(&ep_ptr->remote_ia_address, - &cm_ptr->msg.daddr.so, - sizeof(union dcm_addr)); - - /* send our QP info, IA address, pdata. Don't overwrite dst data */ - local.ver = htons(DCM_VER); - local.op = htons(DCM_REP); - local.saddr.ib.qpn = htonl(ep_ptr->qp_handle->qp_num); - local.saddr.ib.qp_type = ep_ptr->qp_handle->qp_type; - local.saddr.ib.lid = ia_ptr->hca_ptr->ib_trans.lid; - dapl_os_memcpy(&local.saddr.ib.gid[0], - &ia_ptr->hca_ptr->ib_trans.gid, 16); - - /* Get local address information from socket */ - sl = sizeof(local.daddr.so); - getsockname(cm_ptr->socket, (struct sockaddr *)&local.daddr.so, &sl); - -#ifdef DAPL_DBG - /* DBG: Active PID [0], PASSIVE PID [2] */ - *(uint16_t*)&cm_ptr->msg.resv[2] = htons((uint16_t)dapl_os_getpid()); - dapl_os_memcpy(local.resv, cm_ptr->msg.resv, 4); -#endif - cm_ptr->hca = ia_ptr->hca_ptr; - dapl_os_lock(&cm_ptr->lock); - cm_ptr->state = DCM_ACCEPTED; - dapl_os_unlock(&cm_ptr->lock); - - /* Link CM to EP, already queued on work thread */ - dapl_ep_link_cm(ep_ptr, cm_ptr); - cm_ptr->ep = ep_ptr; - - local.p_size = htons(p_size); - iov[0].iov_base = (void *)&local; - iov[0].iov_len = exp; - - if (p_size) { - iov[1].iov_base = p_data; - iov[1].iov_len = p_size; - len = writev(cm_ptr->socket, iov, 2); - } else - len = writev(cm_ptr->socket, iov, 1); - - if (len != (p_size + exp)) { - int err = dapl_socket_errno(); - dapl_log(DAPL_DBG_TYPE_ERR, - " ACCEPT_USR: ERR 0x%x %s, wcnt=%d -> %s\n", - err, strerror(err), len, - inet_ntoa(((struct sockaddr_in *) - &cm_ptr->msg.daddr.so)->sin_addr)); - dapl_ep_unlink_cm(ep_ptr, cm_ptr); - cm_ptr->ep = NULL; - goto bail; - } - - dapl_dbg_log(DAPL_DBG_TYPE_CM, - " ACCEPT_USR: local lid=0x%x qpn=0x%x psz=%d\n", - ntohs(local.saddr.ib.lid), - ntohl(local.saddr.ib.qpn), ntohs(local.p_size)); - dapl_dbg_log(DAPL_DBG_TYPE_CM, - " ACCEPT_USR: local GID subnet %016llx id %016llx\n", - (unsigned long long) - htonll(*(uint64_t*)&local.saddr.ib.gid[0]), - (unsigned long long) - htonll(*(uint64_t*)&local.saddr.ib.gid[8])); - - dapl_dbg_log(DAPL_DBG_TYPE_EP, " PASSIVE: accepted!\n"); - - return DAT_SUCCESS; -bail: - /* schedule cleanup from workq */ - dapli_cm_free(cm_ptr); - return ret; -} - -/* - * PASSIVE: read RTU from active peer, post CONN event - */ -static void dapli_socket_accept_rtu(dp_ib_cm_handle_t cm_ptr) -{ - int len; - ib_cm_events_t event = IB_CME_CONNECTED; - - /* complete handshake after final QP state change, VER and OP */ - len = recv(cm_ptr->socket, (char *)&cm_ptr->msg, 4, 0); - if (len != 4 || ntohs(cm_ptr->msg.op) != DCM_RTU) { - dapl_log(DAPL_DBG_TYPE_ERR, - " ACCEPT_RTU: rcv ERR, rcnt=%d op=%x <- %s\n", - len, ntohs(cm_ptr->msg.op), - inet_ntoa(((struct sockaddr_in *) - &cm_ptr->msg.daddr.so)->sin_addr)); - event = IB_CME_DESTINATION_REJECT; - goto bail; - } - - /* save state and reference to EP, queue for disc event */ - dapl_os_lock(&cm_ptr->lock); - cm_ptr->state = DCM_CONNECTED; - dapl_os_unlock(&cm_ptr->lock); - - /* final data exchange if remote QP state is good to go */ - dapl_dbg_log(DAPL_DBG_TYPE_EP, " PASSIVE: connected!\n"); - -#ifdef DAT_EXTENSIONS -ud_bail: - if (cm_ptr->msg.saddr.ib.qp_type == IBV_QPT_UD) { - DAT_IB_EXTENSION_EVENT_DATA xevent; - - ib_pd_handle_t pd_handle = - ((DAPL_PZ *)cm_ptr->ep->param.pz_handle)->pd_handle; - - if (event == IB_CME_CONNECTED) { - cm_ptr->ah = dapls_create_ah(cm_ptr->hca, pd_handle, - cm_ptr->ep->qp_handle, - cm_ptr->msg.saddr.ib.lid, - NULL); - if (cm_ptr->ah) { - /* post EVENT, modify_qp created ah */ - xevent.status = 0; - xevent.type = DAT_IB_UD_PASSIVE_REMOTE_AH; - xevent.remote_ah.ah = cm_ptr->ah; - xevent.remote_ah.qpn = ntohl(cm_ptr->msg.saddr.ib.qpn); - dapl_os_memcpy(&xevent.remote_ah.ia_addr, - &cm_ptr->msg.daddr.so, - sizeof(union dcm_addr)); - event = DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED; - } else - event = DAT_IB_UD_CONNECTION_ERROR_EVENT; - } else - event = DAT_IB_UD_CONNECTION_ERROR_EVENT; - - dapl_log(DAPL_DBG_TYPE_CM, - " CONN_RTU: UD AH %p for lid 0x%x qpn 0x%x\n", - cm_ptr->ah, ntohs(cm_ptr->msg.saddr.ib.lid), - ntohl(cm_ptr->msg.saddr.ib.qpn)); - - dapls_evd_post_connection_event_ext( - (DAPL_EVD *) - cm_ptr->ep->param.connect_evd_handle, - event, - (DAT_EP_HANDLE) cm_ptr->ep, - (DAT_COUNT) ntohs(cm_ptr->msg.p_size), - (DAT_PVOID *) cm_ptr->msg.p_data, - (DAT_PVOID *) &xevent); - - /* cleanup and release from local list, still on EP list */ - dapli_cm_free(cm_ptr); - - } else -#endif - { - dapli_ep_check(cm_ptr->ep); - dapls_cr_callback(cm_ptr, event, NULL, 0, cm_ptr->sp); - } - dapl_log(DAPL_DBG_TYPE_CM_EST, - " SCM PASSIVE CONN: %x <- %s %x\n", - cm_ptr->sp->conn_qual, - inet_ntoa(((struct sockaddr_in *) &cm_ptr->msg.daddr.so)->sin_addr), - ntohs(((struct sockaddr_in *) &cm_ptr->msg.daddr.so)->sin_port)); - return; - -bail: -#ifdef DAT_EXTENSIONS - if (cm_ptr->msg.saddr.ib.qp_type == IBV_QPT_UD) - goto ud_bail; -#endif - dapl_os_lock(&cm_ptr->lock); - cm_ptr->state = DCM_REJECTED; - dapl_os_unlock(&cm_ptr->lock); - - dapls_cr_callback(cm_ptr, event, NULL, 0, cm_ptr->sp); - dapli_cm_free(cm_ptr); -} - -/* - * dapls_ib_connect - * - * Initiate a connection with the passive listener on another node - * - * Input: - * ep_handle, - * remote_ia_address, - * remote_conn_qual, - * prd_size size of private data and structure - * prd_prt pointer to private data structure - * - * Output: - * none - * - * Returns: - * DAT_SUCCESS - * DAT_INSUFFICIENT_RESOURCES - * DAT_INVALID_PARAMETER - * - */ -DAT_RETURN -dapls_ib_connect(IN DAT_EP_HANDLE ep_handle, - IN DAT_IA_ADDRESS_PTR remote_ia_address, - IN DAT_CONN_QUAL remote_conn_qual, - IN DAT_COUNT private_data_size, IN void *private_data) -{ - DAPL_EP *ep_ptr = (DAPL_EP *) ep_handle; - - dapl_dbg_log(DAPL_DBG_TYPE_EP, - " connect(ep_handle %p ....)\n", ep_handle); - - return (dapli_socket_connect(ep_ptr, remote_ia_address, - remote_conn_qual, - private_data_size, private_data)); -} - -/* - * dapls_ib_disconnect - * - * Disconnect an EP - * - * Input: - * ep_handle, - * disconnect_flags - * - * Output: - * none - * - * Returns: - * DAT_SUCCESS - */ -DAT_RETURN -dapls_ib_disconnect(IN DAPL_EP * ep_ptr, IN DAT_CLOSE_FLAGS close_flags) -{ - dp_ib_cm_handle_t cm_ptr = dapl_get_cm_from_ep(ep_ptr); - - dapl_os_lock(&ep_ptr->header.lock); - if (ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECTED || - ep_ptr->param.ep_attr.service_type != DAT_SERVICE_TYPE_RC || - cm_ptr == NULL) { - dapl_os_unlock(&ep_ptr->header.lock); - return DAT_SUCCESS; - } - dapl_os_unlock(&ep_ptr->header.lock); - return (dapli_socket_disconnect(cm_ptr)); -} - -/* - * dapls_ib_disconnect_clean - * - * Clean up outstanding connection data. This routine is invoked - * after the final disconnect callback has occurred. Only on the - * ACTIVE side of a connection. It is also called if dat_ep_connect - * times out using the consumer supplied timeout value. - * - * Input: - * ep_ptr DAPL_EP - * active Indicates active side of connection - * - * Output: - * none - * - * Returns: - * void - * - */ -void -dapls_ib_disconnect_clean(IN DAPL_EP * ep_ptr, - IN DAT_BOOLEAN active, - IN const ib_cm_events_t ib_cm_event) -{ - if (ib_cm_event == IB_CME_TIMEOUT) { - dp_ib_cm_handle_t cm_ptr; - - if ((cm_ptr = dapl_get_cm_from_ep(ep_ptr)) == NULL) - return; - - dapl_log(DAPL_DBG_TYPE_WARN, - "dapls_ib_disc_clean: CONN_TIMEOUT ep %p cm %p %s\n", - ep_ptr, cm_ptr, dapl_cm_state_str(cm_ptr->state)); - - /* schedule release of socket and local resources */ - dapli_cm_free(cm_ptr); - } -} - -/* - * dapl_ib_setup_conn_listener - * - * Have the CM set up a connection listener. - * - * Input: - * ibm_hca_handle HCA handle - * qp_handle QP handle - * - * Output: - * none - * - * Returns: - * DAT_SUCCESS - * DAT_INSUFFICIENT_RESOURCES - * DAT_INTERNAL_ERROR - * DAT_CONN_QUAL_UNAVAILBLE - * DAT_CONN_QUAL_IN_USE - * - */ -DAT_RETURN -dapls_ib_setup_conn_listener(IN DAPL_IA * ia_ptr, - IN DAT_UINT64 ServiceID, IN DAPL_SP * sp_ptr) -{ - return (dapli_socket_listen(ia_ptr, ServiceID, sp_ptr)); -} - -/* - * dapl_ib_remove_conn_listener - * - * Have the CM remove a connection listener. - * - * Input: - * ia_handle IA handle - * ServiceID IB Channel Service ID - * - * Output: - * none - * - * Returns: - * DAT_SUCCESS - * DAT_INVALID_STATE - * - */ -DAT_RETURN -dapls_ib_remove_conn_listener(IN DAPL_IA * ia_ptr, IN DAPL_SP * sp_ptr) -{ - ib_cm_srvc_handle_t cm_ptr = sp_ptr->cm_srvc_handle; - - /* free cm_srvc_handle, release will cleanup */ - if (cm_ptr != NULL) { - /* cr_thread will free */ - sp_ptr->cm_srvc_handle = NULL; - dapli_cm_free(cm_ptr); - } - return DAT_SUCCESS; -} - -/* - * dapls_ib_accept_connection - * - * Perform necessary steps to accept a connection - * - * Input: - * cr_handle - * ep_handle - * private_data_size - * private_data - * - * Output: - * none - * - * Returns: - * DAT_SUCCESS - * DAT_INSUFFICIENT_RESOURCES - * DAT_INTERNAL_ERROR - * - */ -DAT_RETURN -dapls_ib_accept_connection(IN DAT_CR_HANDLE cr_handle, - IN DAT_EP_HANDLE ep_handle, - IN DAT_COUNT p_size, IN const DAT_PVOID p_data) -{ - DAPL_CR *cr_ptr; - DAPL_EP *ep_ptr; - - dapl_dbg_log(DAPL_DBG_TYPE_EP, - "dapls_ib_accept_connection(cr %p ep %p prd %p,%d)\n", - cr_handle, ep_handle, p_data, p_size); - - cr_ptr = (DAPL_CR *) cr_handle; - ep_ptr = (DAPL_EP *) ep_handle; - - /* allocate and attach a QP if necessary */ - if (ep_ptr->qp_state == DAPL_QP_STATE_UNATTACHED) { - DAT_RETURN status; - status = dapls_ib_qp_alloc(ep_ptr->header.owner_ia, - ep_ptr, ep_ptr); - if (status != DAT_SUCCESS) - return status; - } - return (dapli_socket_accept_usr(ep_ptr, cr_ptr, p_size, p_data)); -} - -/* - * dapls_ib_reject_connection - * - * Reject a connection - * - * Input: - * cr_handle - * - * Output: - * none - * - * Returns: - * DAT_SUCCESS - * DAT_INTERNAL_ERROR - * - */ -DAT_RETURN -dapls_ib_reject_connection(IN dp_ib_cm_handle_t cm_ptr, - IN int reason, - IN DAT_COUNT psize, IN const DAT_PVOID pdata) -{ - struct iovec iov[2]; - - dapl_dbg_log(DAPL_DBG_TYPE_EP, - " reject(cm %p reason %x, pdata %p, psize %d)\n", - cm_ptr, reason, pdata, psize); - - if (psize > DCM_MAX_PDATA_SIZE) - return DAT_LENGTH_ERROR; - - /* write reject data to indicate reject */ - cm_ptr->msg.op = htons(DCM_REJ_USER); - cm_ptr->msg.p_size = htons(psize); - - iov[0].iov_base = (void *)&cm_ptr->msg; - iov[0].iov_len = sizeof(ib_cm_msg_t) - DCM_MAX_PDATA_SIZE; - if (psize) { - iov[1].iov_base = pdata; - iov[1].iov_len = psize; - writev(cm_ptr->socket, iov, 2); - } else { - writev(cm_ptr->socket, iov, 1); - } - - /* release and cleanup CM object */ - dapli_cm_free(cm_ptr); - return DAT_SUCCESS; -} - -/* - * dapls_ib_cm_remote_addr - * - * Obtain the remote IP address given a connection - * - * Input: - * cr_handle - * - * Output: - * remote_ia_address: where to place the remote address - * - * Returns: - * DAT_SUCCESS - * DAT_INVALID_HANDLE - * - */ -DAT_RETURN -dapls_ib_cm_remote_addr(IN DAT_HANDLE dat_handle, - OUT DAT_SOCK_ADDR6 * remote_ia_address) -{ - DAPL_HEADER *header; - dp_ib_cm_handle_t conn; - - dapl_dbg_log(DAPL_DBG_TYPE_EP, - "dapls_ib_cm_remote_addr(dat_handle %p, ....)\n", - dat_handle); - - header = (DAPL_HEADER *) dat_handle; - - if (header->magic == DAPL_MAGIC_EP) - conn = dapl_get_cm_from_ep((DAPL_EP *) dat_handle); - else if (header->magic == DAPL_MAGIC_CR) - conn = ((DAPL_CR *) dat_handle)->ib_cm_handle; - else - return DAT_INVALID_HANDLE; - - dapl_os_memcpy(remote_ia_address, - &conn->msg.daddr.so, sizeof(DAT_SOCK_ADDR6)); - - return DAT_SUCCESS; -} - -int dapls_ib_private_data_size( - IN DAPL_HCA *hca_ptr) -{ - return DCM_MAX_PDATA_SIZE; -} - -/* outbound/inbound CR processing thread to avoid blocking applications */ -void cr_thread(void *arg) -{ - struct dapl_hca *hca_ptr = arg; - dp_ib_cm_handle_t cr, next_cr; - int opt, ret; - socklen_t opt_len; - char rbuf[2]; - struct dapl_fd_set *set; - enum DAPL_FD_EVENTS event; - - dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cr_thread: ENTER hca %p\n", hca_ptr); - set = dapl_alloc_fd_set(); - if (!set) - goto out; - - dapl_os_lock(&hca_ptr->ib_trans.lock); - hca_ptr->ib_trans.cr_state = IB_THREAD_RUN; - - while (1) { - dapl_fd_zero(set); - dapl_fd_set(hca_ptr->ib_trans.scm[0], set, DAPL_FD_READ); - - if (!dapl_llist_is_empty(&hca_ptr->ib_trans.list)) - next_cr = dapl_llist_peek_head(&hca_ptr->ib_trans.list); - else - next_cr = NULL; - - while (next_cr) { - cr = next_cr; - next_cr = dapl_llist_next_entry(&hca_ptr->ib_trans.list, - (DAPL_LLIST_ENTRY *) - &cr->local_entry); - dapls_cm_acquire(cr); /* hold thread ref */ - dapl_os_lock(&cr->lock); - if (cr->state == DCM_FREE || - hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) { - dapl_log(DAPL_DBG_TYPE_CM, - " CM FREE: %p ep=%p st=%s sck=%d refs=%d\n", - cr, cr->ep, dapl_cm_state_str(cr->state), - cr->socket, cr->ref_count); - - if (cr->socket != DAPL_INVALID_SOCKET) { - shutdown(cr->socket, SHUT_RDWR); - closesocket(cr->socket); - cr->socket = DAPL_INVALID_SOCKET; - } - dapl_os_unlock(&cr->lock); - dapls_cm_release(cr); /* release alloc ref */ - dapli_cm_dequeue(cr); /* release workq ref */ - dapls_cm_release(cr); /* release thread ref */ - continue; - } - - event = (cr->state == DCM_CONN_PENDING) ? - DAPL_FD_WRITE : DAPL_FD_READ; - - if (dapl_fd_set(cr->socket, set, event)) { - dapl_log(DAPL_DBG_TYPE_ERR, - " cr_thread: fd_set ERR st=%d fd %d" - " -> %s\n", cr->state, cr->socket, - inet_ntoa(((struct sockaddr_in *) - &cr->msg.daddr.so)->sin_addr)); - dapl_os_unlock(&cr->lock); - dapls_cm_release(cr); /* release ref */ - continue; - } - dapl_os_unlock(&cr->lock); - dapl_os_unlock(&hca_ptr->ib_trans.lock); - - ret = dapl_poll(cr->socket, event); - - dapl_dbg_log(DAPL_DBG_TYPE_THREAD, - " poll ret=0x%x %s sck=%d\n", - ret, dapl_cm_state_str(cr->state), - cr->socket); - - /* data on listen, qp exchange, and on disc req */ - dapl_os_lock(&cr->lock); - if ((ret == DAPL_FD_READ) || - (cr->state != DCM_CONN_PENDING && ret == DAPL_FD_ERROR)) { - if (cr->socket != DAPL_INVALID_SOCKET) { - switch (cr->state) { - case DCM_LISTEN: - dapl_os_unlock(&cr->lock); - dapli_socket_accept(cr); - break; - case DCM_ACCEPTING: - dapl_os_unlock(&cr->lock); - dapli_socket_accept_data(cr); - break; - case DCM_ACCEPTED: - dapl_os_unlock(&cr->lock); - dapli_socket_accept_rtu(cr); - break; - case DCM_REP_PENDING: - dapl_os_unlock(&cr->lock); - dapli_socket_connect_rtu(cr); - break; - case DCM_CONNECTED: - dapl_os_unlock(&cr->lock); - dapli_socket_disconnect(cr); - break; - default: - dapl_os_unlock(&cr->lock); - break; - } - } else - dapl_os_unlock(&cr->lock); - - /* ASYNC connections, writable, readable, error; check status */ - } else if (ret == DAPL_FD_WRITE || - (cr->state == DCM_CONN_PENDING && - ret == DAPL_FD_ERROR)) { - - opt = 0; - opt_len = sizeof(opt); - ret = getsockopt(cr->socket, SOL_SOCKET, - SO_ERROR, (char *)&opt, - &opt_len); - dapl_os_unlock(&cr->lock); - if (!ret && !opt) - dapli_socket_connected(cr, opt); - else - dapli_socket_connected(cr, opt ? opt : dapl_socket_errno()); - } else - dapl_os_unlock(&cr->lock); - - dapls_cm_release(cr); /* release ref */ - dapl_os_lock(&hca_ptr->ib_trans.lock); - } - - /* set to exit and all resources destroyed */ - if ((hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) && - (dapl_llist_is_empty(&hca_ptr->ib_trans.list))) - break; - - dapl_os_unlock(&hca_ptr->ib_trans.lock); - dapl_select(set); - - /* if pipe used to wakeup, consume */ - while (dapl_poll(hca_ptr->ib_trans.scm[0], - DAPL_FD_READ) == DAPL_FD_READ) { - if (recv(hca_ptr->ib_trans.scm[0], rbuf, 2, 0) == -1) - dapl_log(DAPL_DBG_TYPE_THREAD, - " cr_thread: read pipe error = %s\n", - strerror(errno)); - } - dapl_os_lock(&hca_ptr->ib_trans.lock); - - /* set to exit and all resources destroyed */ - if ((hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) && - (dapl_llist_is_empty(&hca_ptr->ib_trans.list))) - break; - } - - dapl_os_unlock(&hca_ptr->ib_trans.lock); - dapl_os_free(set, sizeof(struct dapl_fd_set)); -out: - hca_ptr->ib_trans.cr_state = IB_THREAD_EXIT; - dapl_dbg_log(DAPL_DBG_TYPE_THREAD, " cr_thread(hca %p) exit\n", hca_ptr); -} - - -#ifdef DAPL_COUNTERS -/* Debug aid: List all Connections in process and state */ -void dapls_print_cm_list(IN DAPL_IA *ia_ptr) -{ - /* Print in process CR's for this IA, if debug type set */ - int i = 0; - dp_ib_cm_handle_t cr, next_cr; - - dapl_os_lock(&ia_ptr->hca_ptr->ib_trans.lock); - if (!dapl_llist_is_empty((DAPL_LLIST_HEAD*) - &ia_ptr->hca_ptr->ib_trans.list)) - next_cr = dapl_llist_peek_head((DAPL_LLIST_HEAD*) - &ia_ptr->hca_ptr->ib_trans.list); - else - next_cr = NULL; - - printf("\n DAPL IA CONNECTIONS IN PROCESS:\n"); - while (next_cr) { - cr = next_cr; - next_cr = dapl_llist_next_entry((DAPL_LLIST_HEAD*) - &ia_ptr->hca_ptr->ib_trans.list, - (DAPL_LLIST_ENTRY*)&cr->local_entry); - - printf( " CONN[%d]: sp %p ep %p sock %d %s %s %s %s %s %s PORT L-%x R-%x PID L-%x R-%x\n", - i, cr->sp, cr->ep, cr->socket, - cr->msg.saddr.ib.qp_type == IBV_QPT_RC ? "RC" : "UD", - dapl_cm_state_str(cr->state), dapl_cm_op_str(ntohs(cr->msg.op)), - ntohs(cr->msg.op) == DCM_REQ ? /* local address */ - inet_ntoa(((struct sockaddr_in *)&cr->msg.daddr.so)->sin_addr) : - inet_ntoa(((struct sockaddr_in *)&cr->addr)->sin_addr), - cr->sp ? "<-" : "->", - ntohs(cr->msg.op) == DCM_REQ ? /* remote address */ - inet_ntoa(((struct sockaddr_in *)&cr->addr)->sin_addr) : - inet_ntoa(((struct sockaddr_in *)&cr->msg.daddr.so)->sin_addr), - - ntohs(cr->msg.op) == DCM_REQ ? /* local port */ - ntohs(((struct sockaddr_in *)&cr->msg.daddr.so)->sin_port) : - ntohs(((struct sockaddr_in *)&cr->addr)->sin_port), - - ntohs(cr->msg.op) == DCM_REQ ? /* remote port */ - ntohs(((struct sockaddr_in *)&cr->addr)->sin_port) : - ntohs(((struct sockaddr_in *)&cr->msg.daddr.so)->sin_port), - - cr->sp ? ntohs(*(uint16_t*)&cr->msg.resv[2]) : ntohs(*(uint16_t*)&cr->msg.resv[0]), - cr->sp ? ntohs(*(uint16_t*)&cr->msg.resv[0]) : ntohs(*(uint16_t*)&cr->msg.resv[2])); - - i++; - } - printf("\n"); - dapl_os_unlock(&ia_ptr->hca_ptr->ib_trans.lock); -} -#endif +/* + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + */ + +/*************************************************************************** + * + * Module: uDAPL + * + * Filename: dapl_ib_cm.c + * + * Author: Arlin Davis + * + * Created: 3/10/2005 + * + * Description: + * + * The uDAPL openib provider - connection management + * + **************************************************************************** + * Source Control System Information + * + * $Id: $ + * + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * + **************************************************************************/ + +#if defined(_WIN32) +#define FD_SETSIZE 1024 +#define DAPL_FD_SETSIZE FD_SETSIZE +#endif + +#include "dapl.h" +#include "dapl_adapter_util.h" +#include "dapl_evd_util.h" +#include "dapl_cr_util.h" +#include "dapl_name_service.h" +#include "dapl_ib_util.h" +#include "dapl_ep_util.h" +#include "dapl_osd.h" + +/* forward declarations */ +static DAT_RETURN +dapli_socket_connect(DAPL_EP * ep_ptr, + DAT_IA_ADDRESS_PTR r_addr, + DAT_CONN_QUAL r_qual, DAT_COUNT p_size, DAT_PVOID p_data); + +#ifdef DAPL_DBG +/* Check for EP linking to IA and proper connect state */ +void dapli_ep_check(DAPL_EP *ep) +{ + DAPL_IA *ia_ptr = ep->header.owner_ia; + DAPL_EP *ep_ptr, *next_ep_ptr; + int found = 0; + + dapl_os_lock(&ia_ptr->header.lock); + ep_ptr = (dapl_llist_is_empty (&ia_ptr->ep_list_head) + ? NULL : dapl_llist_peek_head (&ia_ptr->ep_list_head)); + + while (ep_ptr != NULL) { + next_ep_ptr = + dapl_llist_next_entry(&ia_ptr->ep_list_head, + &ep_ptr->header.ia_list_entry); + if (ep == ep_ptr) { + found++; + if ((ep->cr_ptr && ep->param.ep_state + != DAT_EP_STATE_COMPLETION_PENDING) || + (!ep->cr_ptr && ep->param.ep_state + != DAT_EP_STATE_ACTIVE_CONNECTION_PENDING)) + goto err; + else + goto match; + } + ep_ptr = next_ep_ptr; + } +err: + dapl_log(DAPL_DBG_TYPE_ERR, + " dapli_ep_check ERR: %s %s ep=%p state=%d magic=0x%x\n", + ep->cr_ptr ? "PASSIVE":"ACTIVE", + found ? "WRONG_STATE":"NOT_FOUND" , + ep, ep->param.ep_state, ep->header.magic); +match: + dapl_os_unlock(&ia_ptr->header.lock); + return; +} +#else +#define dapli_ep_check(ep) +#endif + +#if defined(_WIN32) || defined(_WIN64) +enum DAPL_FD_EVENTS { + DAPL_FD_READ = 0x1, + DAPL_FD_WRITE = 0x2, + DAPL_FD_ERROR = 0x4 +}; + +static int dapl_config_socket(DAPL_SOCKET s) +{ + unsigned long nonblocking = 1; + int ret, opt = 1; + + ret = ioctlsocket(s, FIONBIO, &nonblocking); + + /* no delay for small packets */ + if (!ret) + ret = setsockopt(s, IPPROTO_TCP, TCP_NODELAY, + (char *)&opt, sizeof(opt)); + return ret; +} + +static int dapl_connect_socket(DAPL_SOCKET s, struct sockaddr *addr, + int addrlen) +{ + int err; + + err = connect(s, addr, addrlen); + if (err == SOCKET_ERROR) + err = WSAGetLastError(); + return (err == WSAEWOULDBLOCK) ? EAGAIN : err; +} + +struct dapl_fd_set { + struct fd_set set[3]; +}; + +static struct dapl_fd_set *dapl_alloc_fd_set(void) +{ + return dapl_os_alloc(sizeof(struct dapl_fd_set)); +} + +static void dapl_fd_zero(struct dapl_fd_set *set) +{ + FD_ZERO(&set->set[0]); + FD_ZERO(&set->set[1]); + FD_ZERO(&set->set[2]); +} + +static int dapl_fd_set(DAPL_SOCKET s, struct dapl_fd_set *set, + enum DAPL_FD_EVENTS event) +{ + FD_SET(s, &set->set[(event == DAPL_FD_READ) ? 0 : 1]); + FD_SET(s, &set->set[2]); + return 0; +} + +static enum DAPL_FD_EVENTS dapl_poll(DAPL_SOCKET s, enum DAPL_FD_EVENTS event) +{ + struct fd_set rw_fds; + struct fd_set err_fds; + struct timeval tv; + int ret; + + FD_ZERO(&rw_fds); + FD_ZERO(&err_fds); + FD_SET(s, &rw_fds); + FD_SET(s, &err_fds); + + tv.tv_sec = 0; + tv.tv_usec = 0; + + if (event == DAPL_FD_READ) + ret = select(1, &rw_fds, NULL, &err_fds, &tv); + else + ret = select(1, NULL, &rw_fds, &err_fds, &tv); + + if (ret == 0) + return 0; + else if (ret == SOCKET_ERROR) + return DAPL_FD_ERROR; + else if (FD_ISSET(s, &rw_fds)) + return event; + else + return DAPL_FD_ERROR; +} + +static int dapl_select(struct dapl_fd_set *set) +{ + int ret; + + dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: sleep\n"); + ret = select(0, &set->set[0], &set->set[1], &set->set[2], NULL); + dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: wakeup\n"); + + if (ret == SOCKET_ERROR) + dapl_dbg_log(DAPL_DBG_TYPE_THREAD, + " dapl_select: error 0x%x\n", WSAGetLastError()); + + return ret; +} + +static int dapl_socket_errno(void) +{ + int err; + + err = WSAGetLastError(); + switch (err) { + case WSAEACCES: + case WSAEADDRINUSE: + return EADDRINUSE; + case WSAECONNRESET: + return ECONNRESET; + default: + return err; + } +} +#else // _WIN32 || _WIN64 +enum DAPL_FD_EVENTS { + DAPL_FD_READ = POLLIN, + DAPL_FD_WRITE = POLLOUT, + DAPL_FD_ERROR = POLLERR +}; + +static int dapl_config_socket(DAPL_SOCKET s) +{ + int ret, opt = 1; + + /* non-blocking */ + ret = fcntl(s, F_GETFL); + if (ret >= 0) + ret = fcntl(s, F_SETFL, ret | O_NONBLOCK); + + /* no delay for small packets */ + if (!ret) + ret = setsockopt(s, IPPROTO_TCP, TCP_NODELAY, + (char *)&opt, sizeof(opt)); + return ret; +} + +static int dapl_connect_socket(DAPL_SOCKET s, struct sockaddr *addr, + int addrlen) +{ + int ret; + + ret = connect(s, addr, addrlen); + + return (errno == EINPROGRESS) ? EAGAIN : ret; +} + +struct dapl_fd_set { + int index; + struct pollfd set[DAPL_FD_SETSIZE]; +}; + +static struct dapl_fd_set *dapl_alloc_fd_set(void) +{ + return dapl_os_alloc(sizeof(struct dapl_fd_set)); +} + +static void dapl_fd_zero(struct dapl_fd_set *set) +{ + set->index = 0; +} + +static int dapl_fd_set(DAPL_SOCKET s, struct dapl_fd_set *set, + enum DAPL_FD_EVENTS event) +{ + if (set->index == DAPL_FD_SETSIZE - 1) { + dapl_log(DAPL_DBG_TYPE_ERR, + "SCM ERR: cm_thread exceeded FD_SETSIZE %d\n", + set->index + 1); + return -1; + } + + set->set[set->index].fd = s; + set->set[set->index].revents = 0; + set->set[set->index++].events = event; + return 0; +} + +static enum DAPL_FD_EVENTS dapl_poll(DAPL_SOCKET s, enum DAPL_FD_EVENTS event) +{ + struct pollfd fds; + int ret; + + fds.fd = s; + fds.events = event; + fds.revents = 0; + ret = poll(&fds, 1, 0); + dapl_log(DAPL_DBG_TYPE_THREAD, " dapl_poll: fd=%d ret=%d, evnts=0x%x\n", + s, ret, fds.revents); + if (ret == 0) + return 0; + else if (ret < 0 || (fds.revents & (POLLERR | POLLHUP | POLLNVAL))) + return DAPL_FD_ERROR; + else + return event; +} + +static int dapl_select(struct dapl_fd_set *set) +{ + int ret; + + dapl_dbg_log(DAPL_DBG_TYPE_THREAD, " dapl_select: sleep, fds=%d\n", set->index); + ret = poll(set->set, set->index, -1); + dapl_dbg_log(DAPL_DBG_TYPE_THREAD, " dapl_select: wakeup, ret=0x%x\n", ret); + return ret; +} + +#define dapl_socket_errno() errno +#endif + +static void dapli_cm_thread_signal(dp_ib_cm_handle_t cm_ptr) +{ + if (cm_ptr->hca) + send(cm_ptr->hca->ib_trans.scm[1], "w", sizeof "w", 0); +} + +static void dapli_cm_free(dp_ib_cm_handle_t cm_ptr) +{ + dapl_os_lock(&cm_ptr->lock); + cm_ptr->state = DCM_FREE; + dapl_os_unlock(&cm_ptr->lock); + dapli_cm_thread_signal(cm_ptr); +} + +static void dapli_cm_dealloc(dp_ib_cm_handle_t cm_ptr) +{ + dapl_os_assert(!cm_ptr->ref_count); + + if (cm_ptr->socket != DAPL_INVALID_SOCKET) { + shutdown(cm_ptr->socket, SHUT_RDWR); + closesocket(cm_ptr->socket); + } + if (cm_ptr->ah) + ibv_destroy_ah(cm_ptr->ah); + + dapl_os_lock_destroy(&cm_ptr->lock); + dapl_os_wait_object_destroy(&cm_ptr->event); + dapl_os_free(cm_ptr, sizeof(*cm_ptr)); +} + +void dapls_cm_acquire(dp_ib_cm_handle_t cm_ptr) +{ + dapl_os_lock(&cm_ptr->lock); + cm_ptr->ref_count++; + dapl_os_unlock(&cm_ptr->lock); +} + +void dapls_cm_release(dp_ib_cm_handle_t cm_ptr) +{ + dapl_os_lock(&cm_ptr->lock); + cm_ptr->ref_count--; + if (cm_ptr->ref_count) { + dapl_os_unlock(&cm_ptr->lock); + return; + } + dapl_os_unlock(&cm_ptr->lock); + dapli_cm_dealloc(cm_ptr); +} + +static dp_ib_cm_handle_t dapli_cm_alloc(DAPL_EP *ep_ptr) +{ + dp_ib_cm_handle_t cm_ptr; + + /* Allocate CM, init lock, and initialize */ + if ((cm_ptr = dapl_os_alloc(sizeof(*cm_ptr))) == NULL) + return NULL; + + (void)dapl_os_memzero(cm_ptr, sizeof(*cm_ptr)); + if (dapl_os_lock_init(&cm_ptr->lock)) + goto bail; + + if (dapl_os_wait_object_init(&cm_ptr->event)) { + dapl_os_lock_destroy(&cm_ptr->lock); + goto bail; + } + dapl_llist_init_entry((DAPL_LLIST_ENTRY *)&cm_ptr->list_entry); + dapl_llist_init_entry((DAPL_LLIST_ENTRY *)&cm_ptr->local_entry); + + cm_ptr->msg.ver = htons(DCM_VER); + cm_ptr->socket = DAPL_INVALID_SOCKET; + dapls_cm_acquire(cm_ptr); + + /* Link EP and CM */ + if (ep_ptr != NULL) { + dapl_ep_link_cm(ep_ptr, cm_ptr); /* ref++ */ + cm_ptr->ep = ep_ptr; + cm_ptr->hca = ((DAPL_IA *)ep_ptr->param.ia_handle)->hca_ptr; + } + return cm_ptr; +bail: + dapl_os_free(cm_ptr, sizeof(*cm_ptr)); + return NULL; +} + +/* queue socket for processing CM work */ +static void dapli_cm_queue(dp_ib_cm_handle_t cm_ptr) +{ + /* add to work queue for cr thread processing */ + dapl_os_lock(&cm_ptr->hca->ib_trans.lock); + dapls_cm_acquire(cm_ptr); + dapl_llist_add_tail(&cm_ptr->hca->ib_trans.list, + (DAPL_LLIST_ENTRY *)&cm_ptr->local_entry, cm_ptr); + dapl_os_unlock(&cm_ptr->hca->ib_trans.lock); + dapli_cm_thread_signal(cm_ptr); +} + +/* called with local LIST lock */ +static void dapli_cm_dequeue(dp_ib_cm_handle_t cm_ptr) +{ + /* Remove from work queue, cr thread processing */ + dapl_llist_remove_entry(&cm_ptr->hca->ib_trans.list, + (DAPL_LLIST_ENTRY *)&cm_ptr->local_entry); + dapls_cm_release(cm_ptr); +} + +/* BLOCKING: called from dapl_ep_free, EP link will be last ref */ +void dapls_cm_free(dp_ib_cm_handle_t cm_ptr) +{ + dapl_log(DAPL_DBG_TYPE_CM, + " cm_free: cm %p %s ep %p refs=%d\n", + cm_ptr, dapl_cm_state_str(cm_ptr->state), + cm_ptr->ep, cm_ptr->ref_count); + + /* free from internal workq, wait until EP is last ref */ + dapl_os_lock(&cm_ptr->lock); + cm_ptr->state = DCM_FREE; + while (cm_ptr->ref_count != 1) { + dapli_cm_thread_signal(cm_ptr); + dapl_os_unlock(&cm_ptr->lock); + dapl_os_sleep_usec(10000); + dapl_os_lock(&cm_ptr->lock); + } + dapl_os_unlock(&cm_ptr->lock); + + /* unlink, dequeue from EP. Final ref so release will destroy */ + dapl_ep_unlink_cm(cm_ptr->ep, cm_ptr); +} + +/* + * ACTIVE/PASSIVE: called from CR thread or consumer via ep_disconnect + * or from ep_free. + */ +DAT_RETURN dapli_socket_disconnect(dp_ib_cm_handle_t cm_ptr) +{ + DAT_UINT32 disc_data = htonl(0xdead); + + dapl_os_lock(&cm_ptr->lock); + if (cm_ptr->state != DCM_CONNECTED || + cm_ptr->state == DCM_DISCONNECTED) { + dapl_os_unlock(&cm_ptr->lock); + return DAT_SUCCESS; + } + cm_ptr->state = DCM_DISCONNECTED; + send(cm_ptr->socket, (char *)&disc_data, sizeof(disc_data), 0); + dapl_os_unlock(&cm_ptr->lock); + + /* disconnect events for RC's only */ + if (cm_ptr->ep->param.ep_attr.service_type == DAT_SERVICE_TYPE_RC) { + dapl_os_lock(&cm_ptr->ep->header.lock); + dapls_modify_qp_state(cm_ptr->ep->qp_handle, IBV_QPS_ERR, 0,0,0); + dapl_os_unlock(&cm_ptr->ep->header.lock); + if (cm_ptr->ep->cr_ptr) { + dapls_cr_callback(cm_ptr, + IB_CME_DISCONNECTED, + NULL, 0, cm_ptr->sp); + } else { + dapl_evd_connection_callback(cm_ptr, + IB_CME_DISCONNECTED, + NULL, 0, cm_ptr->ep); + } + } + + /* release from workq */ + dapli_cm_free(cm_ptr); + + /* scheduled destroy via disconnect clean in callback */ + return DAT_SUCCESS; +} + +/* + * ACTIVE: socket connected, send QP information to peer + */ +static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err) +{ + int len, exp; + struct iovec iov[2]; + struct dapl_ep *ep_ptr = cm_ptr->ep; + + if (err) { + dapl_log(DAPL_DBG_TYPE_ERR, + " CONN_PENDING: %s ERR %s -> %s %d - %s\n", + err == -1 ? "POLL" : "SOCKOPT", + err == -1 ? strerror(dapl_socket_errno()) : strerror(err), + inet_ntoa(((struct sockaddr_in *) + &cm_ptr->addr)->sin_addr), + ntohs(((struct sockaddr_in *) + &cm_ptr->addr)->sin_port), + err == ETIMEDOUT ? "RETRYING...":"ABORTING"); + + /* retry a timeout */ + if (err == ETIMEDOUT) { + closesocket(cm_ptr->socket); + cm_ptr->socket = DAPL_INVALID_SOCKET; + dapli_socket_connect(cm_ptr->ep, (DAT_IA_ADDRESS_PTR)&cm_ptr->addr, + ntohs(((struct sockaddr_in *)&cm_ptr->addr)->sin_port) - 1000, + ntohs(cm_ptr->msg.p_size), &cm_ptr->msg.p_data); + dapl_ep_unlink_cm(cm_ptr->ep, cm_ptr); + dapli_cm_free(cm_ptr); + return; + } + + goto bail; + } + + dapl_os_lock(&cm_ptr->lock); + cm_ptr->state = DCM_REP_PENDING; + dapl_os_unlock(&cm_ptr->lock); + + /* send qp info and pdata to remote peer */ + exp = sizeof(ib_cm_msg_t) - DCM_MAX_PDATA_SIZE; + iov[0].iov_base = (void *)&cm_ptr->msg; + iov[0].iov_len = exp; + if (cm_ptr->msg.p_size) { + iov[1].iov_base = cm_ptr->msg.p_data; + iov[1].iov_len = ntohs(cm_ptr->msg.p_size); + len = writev(cm_ptr->socket, iov, 2); + } else { + len = writev(cm_ptr->socket, iov, 1); + } + + if (len != (exp + ntohs(cm_ptr->msg.p_size))) { + int err = dapl_socket_errno(); + dapl_log(DAPL_DBG_TYPE_ERR, + " CONN_PENDING len ERR 0x%x %s, wcnt=%d(%d) -> %s\n", + err, strerror(err), len, + exp + ntohs(cm_ptr->msg.p_size), + inet_ntoa(((struct sockaddr_in *) + ep_ptr->param. + remote_ia_address_ptr)->sin_addr)); + goto bail; + } + + dapl_dbg_log(DAPL_DBG_TYPE_CM, + " CONN_PENDING: sending SRC lid=0x%x," + " qpn=0x%x, psize=%d\n", + ntohs(cm_ptr->msg.saddr.ib.lid), + ntohl(cm_ptr->msg.saddr.ib.qpn), + ntohs(cm_ptr->msg.p_size)); + dapl_dbg_log(DAPL_DBG_TYPE_CM, + " CONN_PENDING: SRC GID subnet %016llx id %016llx\n", + (unsigned long long) + htonll(*(uint64_t*)&cm_ptr->msg.saddr.ib.gid[0]), + (unsigned long long) + htonll(*(uint64_t*)&cm_ptr->msg.saddr.ib.gid[8])); + return; + +bail: + /* mark CM object for cleanup */ + dapli_cm_free(cm_ptr); + dapl_evd_connection_callback(NULL, IB_CME_LOCAL_FAILURE, NULL, 0, ep_ptr); +} + +/* + * ACTIVE: Create socket, connect, defer exchange QP information to CR thread + * to avoid blocking. + */ +static DAT_RETURN +dapli_socket_connect(DAPL_EP * ep_ptr, + DAT_IA_ADDRESS_PTR r_addr, + DAT_CONN_QUAL r_qual, DAT_COUNT p_size, DAT_PVOID p_data) +{ + dp_ib_cm_handle_t cm_ptr; + int ret; + socklen_t sl; + DAPL_IA *ia_ptr = ep_ptr->header.owner_ia; + DAT_RETURN dat_ret = DAT_INSUFFICIENT_RESOURCES; + + dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect: r_qual %d p_size=%d\n", + r_qual, p_size); + + cm_ptr = dapli_cm_alloc(ep_ptr); + if (cm_ptr == NULL) + return dat_ret; + + /* create, connect, sockopt, and exchange QP information */ + if ((cm_ptr->socket = + socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == DAPL_INVALID_SOCKET) { + int err = dapl_socket_errno(); + dapl_log(DAPL_DBG_TYPE_ERR, + " connect: socket create ERR 0x%x %s\n", + err, strerror(err)); + goto bail; + } + + ret = dapl_config_socket(cm_ptr->socket); + if (ret < 0) { + dapl_log(DAPL_DBG_TYPE_ERR, + " connect: config socket %d RET %d ERR 0x%x %s\n", + cm_ptr->socket, ret, + dapl_socket_errno(), strerror(dapl_socket_errno())); + dat_ret = DAT_INTERNAL_ERROR; + goto bail; + } + + /* save remote address */ + dapl_os_memcpy(&cm_ptr->addr, r_addr, sizeof(*r_addr)); + +#ifdef DAPL_DBG + /* DBG: Active PID [0], PASSIVE PID [2]*/ + *(uint16_t*)&cm_ptr->msg.resv[0] = htons((uint16_t)dapl_os_getpid()); + *(uint16_t*)&cm_ptr->msg.resv[2] = ((struct sockaddr_in *)&cm_ptr->addr)->sin_port; +#endif + ((struct sockaddr_in *)&cm_ptr->addr)->sin_port = htons(r_qual + 1000); + ret = dapl_connect_socket(cm_ptr->socket, (struct sockaddr *)&cm_ptr->addr, + sizeof(cm_ptr->addr)); + if (ret && ret != EAGAIN) { + dapl_log(DAPL_DBG_TYPE_ERR, + " connect: dapl_connect_socket RET %d ERR 0x%x %s\n", + ret, dapl_socket_errno(), + strerror(dapl_socket_errno())); + dat_ret = DAT_INVALID_ADDRESS; + goto bail; + } + + /* REQ: QP info in msg.saddr, IA address in msg.daddr, and pdata */ + cm_ptr->hca = ia_ptr->hca_ptr; + cm_ptr->msg.op = ntohs(DCM_REQ); + cm_ptr->msg.saddr.ib.qpn = htonl(ep_ptr->qp_handle->qp_num); + cm_ptr->msg.saddr.ib.qp_type = ep_ptr->qp_handle->qp_type; + cm_ptr->msg.saddr.ib.lid = ia_ptr->hca_ptr->ib_trans.lid; + dapl_os_memcpy(&cm_ptr->msg.saddr.ib.gid[0], + &ia_ptr->hca_ptr->ib_trans.gid, 16); + + /* get local address information from socket */ + sl = sizeof(cm_ptr->msg.daddr.so); + if (getsockname(cm_ptr->socket, (struct sockaddr *)&cm_ptr->msg.daddr.so, &sl)) { + int err = dapl_socket_errno(); + dapl_log(DAPL_DBG_TYPE_ERR, + " connect getsockname ERROR: 0x%x %s -> %s r_qual %d\n", + err, strerror(err), + inet_ntoa(((struct sockaddr_in *)r_addr)->sin_addr), + (unsigned int)r_qual);; + } + + if (p_size) { + cm_ptr->msg.p_size = htons(p_size); + dapl_os_memcpy(cm_ptr->msg.p_data, p_data, p_size); + } + + /* connected or pending, either way results via async event */ + if (ret == 0) + dapli_socket_connected(cm_ptr, 0); + else + cm_ptr->state = DCM_CONN_PENDING; + + dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect: p_data=%p %p\n", + cm_ptr->msg.p_data, cm_ptr->msg.p_data); + + dapl_dbg_log(DAPL_DBG_TYPE_EP, + " connect: %s r_qual %d pending, p_sz=%d, %d %d ...\n", + inet_ntoa(((struct sockaddr_in *)&cm_ptr->addr)->sin_addr), + (unsigned int)r_qual, ntohs(cm_ptr->msg.p_size), + cm_ptr->msg.p_data[0], cm_ptr->msg.p_data[1]); + + /* queue up on work thread */ + dapli_cm_queue(cm_ptr); + return DAT_SUCCESS; +bail: + dapl_log(DAPL_DBG_TYPE_ERR, + " connect ERROR: -> %s r_qual %d\n", + inet_ntoa(((struct sockaddr_in *)r_addr)->sin_addr), + (unsigned int)r_qual); + + /* Never queued, destroy */ + dapls_cm_release(cm_ptr); + return dat_ret; +} + +/* + * ACTIVE: exchange QP information, called from CR thread + */ +static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr) +{ + DAPL_EP *ep_ptr = cm_ptr->ep; + int len, exp = sizeof(ib_cm_msg_t) - DCM_MAX_PDATA_SIZE; + ib_cm_events_t event = IB_CME_LOCAL_FAILURE; + socklen_t sl; + + /* read DST information into cm_ptr, overwrite SRC info */ + dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect_rtu: recv peer QP data\n"); + + len = recv(cm_ptr->socket, (char *)&cm_ptr->msg, exp, 0); + if (len != exp || ntohs(cm_ptr->msg.ver) != DCM_VER) { + int err = dapl_socket_errno(); + dapl_log(DAPL_DBG_TYPE_WARN, + " CONN_RTU read: sk %d ERR 0x%x, rcnt=%d, v=%d -> %s PORT L-%x R-%x PID L-%x R-%x\n", + cm_ptr->socket, err, len, ntohs(cm_ptr->msg.ver), + inet_ntoa(((struct sockaddr_in *)&cm_ptr->addr)->sin_addr), + ntohs(((struct sockaddr_in *)&cm_ptr->msg.daddr.so)->sin_port), + ntohs(((struct sockaddr_in *)&cm_ptr->addr)->sin_port), + ntohs(*(uint16_t*)&cm_ptr->msg.resv[0]), + ntohs(*(uint16_t*)&cm_ptr->msg.resv[2])); + + /* Retry; corner case where server tcp stack resets under load */ + if (err == ECONNRESET) { + closesocket(cm_ptr->socket); + cm_ptr->socket = DAPL_INVALID_SOCKET; + dapli_socket_connect(cm_ptr->ep, (DAT_IA_ADDRESS_PTR)&cm_ptr->addr, + ntohs(((struct sockaddr_in *)&cm_ptr->addr)->sin_port) - 1000, + ntohs(cm_ptr->msg.p_size), &cm_ptr->msg.p_data); + dapl_ep_unlink_cm(cm_ptr->ep, cm_ptr); + dapli_cm_free(cm_ptr); + return; + } + goto bail; + } + + /* keep the QP, address info in network order */ + + /* save remote address information, in msg.daddr */ + dapl_os_memcpy(&cm_ptr->addr, + &cm_ptr->msg.daddr.so, + sizeof(union dcm_addr)); + + /* save local address information from socket */ + sl = sizeof(cm_ptr->addr); + getsockname(cm_ptr->socket,(struct sockaddr *)&cm_ptr->addr, &sl); + + dapl_dbg_log(DAPL_DBG_TYPE_EP, + " CONN_RTU: DST %s %d lid=0x%x," + " qpn=0x%x, qp_type=%d, psize=%d\n", + inet_ntoa(((struct sockaddr_in *) + &cm_ptr->msg.daddr.so)->sin_addr), + ntohs(((struct sockaddr_in *) + &cm_ptr->msg.daddr.so)->sin_port), + ntohs(cm_ptr->msg.saddr.ib.lid), + ntohl(cm_ptr->msg.saddr.ib.qpn), + cm_ptr->msg.saddr.ib.qp_type, + ntohs(cm_ptr->msg.p_size)); + + /* validate private data size before reading */ + if (ntohs(cm_ptr->msg.p_size) > DCM_MAX_PDATA_SIZE) { + dapl_log(DAPL_DBG_TYPE_ERR, + " CONN_RTU read: psize (%d) wrong -> %s\n", + ntohs(cm_ptr->msg.p_size), + inet_ntoa(((struct sockaddr_in *) + ep_ptr->param. + remote_ia_address_ptr)->sin_addr)); + goto bail; + } + + /* read private data into cm_handle if any present */ + dapl_dbg_log(DAPL_DBG_TYPE_EP," CONN_RTU: read private data\n"); + exp = ntohs(cm_ptr->msg.p_size); + if (exp) { + len = recv(cm_ptr->socket, cm_ptr->msg.p_data, exp, 0); + if (len != exp) { + int err = dapl_socket_errno(); + dapl_log(DAPL_DBG_TYPE_ERR, + " CONN_RTU read pdata: ERR 0x%x %s, rcnt=%d -> %s\n", + err, strerror(err), len, + inet_ntoa(((struct sockaddr_in *) + ep_ptr->param. + remote_ia_address_ptr)->sin_addr)); + goto bail; + } + } + + /* check for consumer or protocol stack reject */ + if (ntohs(cm_ptr->msg.op) == DCM_REP) + event = IB_CME_CONNECTED; + else if (ntohs(cm_ptr->msg.op) == DCM_REJ_USER) + event = IB_CME_DESTINATION_REJECT_PRIVATE_DATA; + else + event = IB_CME_DESTINATION_REJECT; + + if (event != IB_CME_CONNECTED) { + dapl_log(DAPL_DBG_TYPE_CM, + " CONN_RTU: reject from %s %x\n", + inet_ntoa(((struct sockaddr_in *) + &cm_ptr->msg.daddr.so)->sin_addr), + ntohs(((struct sockaddr_in *) + &cm_ptr->msg.daddr.so)->sin_port)); + goto bail; + } + + /* modify QP to RTR and then to RTS with remote info */ + dapl_os_lock(&ep_ptr->header.lock); + if (dapls_modify_qp_state(ep_ptr->qp_handle, + IBV_QPS_RTR, + cm_ptr->msg.saddr.ib.qpn, + cm_ptr->msg.saddr.ib.lid, + (ib_gid_handle_t)cm_ptr->msg.saddr.ib.gid) != DAT_SUCCESS) { + dapl_log(DAPL_DBG_TYPE_ERR, + " CONN_RTU: QPS_RTR ERR %s (%d,%d,%x,%x,%x) -> %s %x\n", + strerror(errno), ep_ptr->qp_handle->qp_type, + ep_ptr->qp_state, ep_ptr->qp_handle->qp_num, + ntohl(cm_ptr->msg.saddr.ib.qpn), + ntohs(cm_ptr->msg.saddr.ib.lid), + inet_ntoa(((struct sockaddr_in *) + &cm_ptr->msg.daddr.so)->sin_addr), + ntohs(((struct sockaddr_in *) + &cm_ptr->msg.daddr.so)->sin_port)); + dapl_os_unlock(&ep_ptr->header.lock); + goto bail; + } + if (dapls_modify_qp_state(ep_ptr->qp_handle, + IBV_QPS_RTS, + cm_ptr->msg.saddr.ib.qpn, + cm_ptr->msg.saddr.ib.lid, + NULL) != DAT_SUCCESS) { + dapl_log(DAPL_DBG_TYPE_ERR, + " CONN_RTU: QPS_RTS ERR %s (%d,%d,%x,%x,%x) -> %s %x\n", + strerror(errno), ep_ptr->qp_handle->qp_type, + ep_ptr->qp_state, ep_ptr->qp_handle->qp_num, + ntohl(cm_ptr->msg.saddr.ib.qpn), + ntohs(cm_ptr->msg.saddr.ib.lid), + inet_ntoa(((struct sockaddr_in *) + &cm_ptr->msg.daddr.so)->sin_addr), + ntohs(((struct sockaddr_in *) + &cm_ptr->msg.daddr.so)->sin_port)); + dapl_os_unlock(&ep_ptr->header.lock); + goto bail; + } + dapl_os_unlock(&ep_ptr->header.lock); + dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect_rtu: send RTU\n"); + + /* complete handshake after final QP state change, Just ver+op */ + dapl_os_lock(&cm_ptr->lock); + cm_ptr->state = DCM_CONNECTED; + dapl_os_unlock(&cm_ptr->lock); + + cm_ptr->msg.op = ntohs(DCM_RTU); + if (send(cm_ptr->socket, (char *)&cm_ptr->msg, 4, 0) == -1) { + int err = dapl_socket_errno(); + dapl_log(DAPL_DBG_TYPE_ERR, + " CONN_RTU: write ERR = 0x%x %s\n", + err, strerror(err)); + goto bail; + } + /* post the event with private data */ + event = IB_CME_CONNECTED; + dapl_dbg_log(DAPL_DBG_TYPE_EP, " ACTIVE: connected!\n"); + +#ifdef DAT_EXTENSIONS +ud_bail: + if (cm_ptr->msg.saddr.ib.qp_type == IBV_QPT_UD) { + DAT_IB_EXTENSION_EVENT_DATA xevent; + ib_pd_handle_t pd_handle = + ((DAPL_PZ *)ep_ptr->param.pz_handle)->pd_handle; + + if (event == IB_CME_CONNECTED) { + cm_ptr->ah = dapls_create_ah(cm_ptr->hca, pd_handle, + ep_ptr->qp_handle, + cm_ptr->msg.saddr.ib.lid, + NULL); + if (cm_ptr->ah) { + /* post UD extended EVENT */ + xevent.status = 0; + xevent.type = DAT_IB_UD_REMOTE_AH; + xevent.remote_ah.ah = cm_ptr->ah; + xevent.remote_ah.qpn = ntohl(cm_ptr->msg.saddr.ib.qpn); + dapl_os_memcpy(&xevent.remote_ah.ia_addr, + &ep_ptr->remote_ia_address, + sizeof(union dcm_addr)); + event = DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED; + + dapl_log(DAPL_DBG_TYPE_CM, + " CONN_RTU: UD AH %p for lid 0x%x" + " qpn 0x%x\n", + cm_ptr->ah, + ntohs(cm_ptr->msg.saddr.ib.lid), + ntohl(cm_ptr->msg.saddr.ib.qpn)); + + } else + event = DAT_IB_UD_CONNECTION_ERROR_EVENT; + + } else if (event == IB_CME_LOCAL_FAILURE) { + event = DAT_IB_UD_CONNECTION_ERROR_EVENT; + } else + event = DAT_IB_UD_CONNECTION_REJECT_EVENT; + + dapls_evd_post_connection_event_ext( + (DAPL_EVD *) ep_ptr->param.connect_evd_handle, + event, + (DAT_EP_HANDLE) ep_ptr, + (DAT_COUNT) exp, + (DAT_PVOID *) cm_ptr->msg.p_data, + (DAT_PVOID *) &xevent); + + /* cleanup and release from local list */ + dapli_cm_free(cm_ptr); + + } else +#endif + { + dapli_ep_check(cm_ptr->ep); + dapl_evd_connection_callback(cm_ptr, event, cm_ptr->msg.p_data, + DCM_MAX_PDATA_SIZE, ep_ptr); + } + dapl_log(DAPL_DBG_TYPE_CM_EST, + " SCM ACTIVE CONN: %x -> %s %x\n", + ntohs(((struct sockaddr_in *) &cm_ptr->addr)->sin_port), + inet_ntoa(((struct sockaddr_in *) &cm_ptr->msg.daddr.so)->sin_addr), + ntohs(((struct sockaddr_in *) &cm_ptr->msg.daddr.so)->sin_port)-1000); + return; + +bail: + +#ifdef DAT_EXTENSIONS + if (cm_ptr->msg.saddr.ib.qp_type == IBV_QPT_UD) + goto ud_bail; +#endif + /* close socket, and post error event */ + dapl_os_lock(&cm_ptr->lock); + cm_ptr->state = DCM_REJECTED; + dapl_os_unlock(&cm_ptr->lock); + + dapl_evd_connection_callback(NULL, event, cm_ptr->msg.p_data, + DCM_MAX_PDATA_SIZE, ep_ptr); + dapli_cm_free(cm_ptr); +} + +/* + * PASSIVE: Create socket, listen, accept, exchange QP information + */ +DAT_RETURN +dapli_socket_listen(DAPL_IA * ia_ptr, DAT_CONN_QUAL serviceID, DAPL_SP * sp_ptr) +{ + struct sockaddr_in addr; + ib_cm_srvc_handle_t cm_ptr = NULL; + DAT_RETURN dat_status = DAT_SUCCESS; + int opt = 1; + + dapl_dbg_log(DAPL_DBG_TYPE_CM, + " setup listen(ia_ptr %p ServiceID %d sp_ptr %p)\n", + ia_ptr, serviceID, sp_ptr); + + cm_ptr = dapli_cm_alloc(NULL); + if (cm_ptr == NULL) + return DAT_INSUFFICIENT_RESOURCES; + + cm_ptr->sp = sp_ptr; + cm_ptr->hca = ia_ptr->hca_ptr; + + /* bind, listen, set sockopt, accept, exchange data */ + if ((cm_ptr->socket = + socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == DAPL_INVALID_SOCKET) { + int err = dapl_socket_errno(); + dapl_log(DAPL_DBG_TYPE_ERR, + " listen: socket create: ERR 0x%x %s\n", + err, strerror(err)); + dat_status = DAT_INSUFFICIENT_RESOURCES; + goto bail; + } + + setsockopt(cm_ptr->socket, SOL_SOCKET, SO_REUSEADDR, (char*)&opt, sizeof(opt)); + addr.sin_port = htons(serviceID + 1000); + addr.sin_family = AF_INET; + addr.sin_addr = ((struct sockaddr_in *) &ia_ptr->hca_ptr->hca_address)->sin_addr; + + if ((bind(cm_ptr->socket, (struct sockaddr *)&addr, sizeof(addr)) < 0) + || (listen(cm_ptr->socket, 128) < 0)) { + int err = dapl_socket_errno(); + dapl_log(DAPL_DBG_TYPE_CM, + " listen: ERROR 0x%x %s on port %d\n", + err, strerror(err), serviceID + 1000); + if (err == EADDRINUSE) + dat_status = DAT_CONN_QUAL_IN_USE; + else + dat_status = DAT_CONN_QUAL_UNAVAILABLE; + goto bail; + } + + /* set cm_handle for this service point, save listen socket */ + sp_ptr->cm_srvc_handle = cm_ptr; + dapl_os_memcpy(&cm_ptr->addr, &addr, sizeof(addr)); + + /* queue up listen socket to process inbound CR's */ + cm_ptr->state = DCM_LISTEN; + dapli_cm_queue(cm_ptr); + + dapl_dbg_log(DAPL_DBG_TYPE_CM, + " setup listen: port %d cr %p s_fd %d\n", + serviceID + 1000, cm_ptr, cm_ptr->socket); + + return dat_status; +bail: + /* Never queued, destroy here */ + dapls_cm_release(cm_ptr); + return dat_status; +} + +/* + * PASSIVE: accept socket + */ +static void dapli_socket_accept(ib_cm_srvc_handle_t cm_ptr) +{ + dp_ib_cm_handle_t acm_ptr; + int ret, len, opt = 1; + socklen_t sl; + + /* + * Accept all CR's on this port to avoid half-connection (SYN_RCV) + * stalls with many to one connection storms + */ + do { + /* Allocate accept CM and initialize */ + if ((acm_ptr = dapli_cm_alloc(NULL)) == NULL) + return; + + acm_ptr->sp = cm_ptr->sp; + acm_ptr->hca = cm_ptr->hca; + + len = sizeof(union dcm_addr); + acm_ptr->socket = accept(cm_ptr->socket, + (struct sockaddr *) + &acm_ptr->msg.daddr.so, + (socklen_t *) &len); + if (acm_ptr->socket == DAPL_INVALID_SOCKET) { + int err = dapl_socket_errno(); + dapl_log(DAPL_DBG_TYPE_ERR, + " ACCEPT: ERR 0x%x %s on FD %d l_cr %p\n", + err, strerror(err), cm_ptr->socket, cm_ptr); + dapls_cm_release(acm_ptr); + return; + } + dapl_dbg_log(DAPL_DBG_TYPE_CM, " accepting from %s %x\n", + inet_ntoa(((struct sockaddr_in *) + &acm_ptr->msg.daddr.so)->sin_addr), + ntohs(((struct sockaddr_in *) + &acm_ptr->msg.daddr.so)->sin_port)); + + /* no delay for small packets */ + ret = setsockopt(acm_ptr->socket, IPPROTO_TCP, TCP_NODELAY, + (char *)&opt, sizeof(opt)); + if (ret) { + int err = dapl_socket_errno(); + dapl_log(DAPL_DBG_TYPE_ERR, + " ACCEPT: NODELAY setsockopt:" + " RET %d ERR 0x%x %s\n", + ret, err, strerror(err)); + } + + /* get local address information from socket */ + sl = sizeof(acm_ptr->addr); + getsockname(acm_ptr->socket, (struct sockaddr *)&acm_ptr->addr, &sl); + acm_ptr->state = DCM_ACCEPTING; + dapli_cm_queue(acm_ptr); + + } while (dapl_poll(cm_ptr->socket, DAPL_FD_READ) == DAPL_FD_READ); +} + +/* + * PASSIVE: receive peer QP information, private data, post cr_event + */ +static void dapli_socket_accept_data(ib_cm_srvc_handle_t acm_ptr) +{ + int len, exp = sizeof(ib_cm_msg_t) - DCM_MAX_PDATA_SIZE; + void *p_data = NULL; + + dapl_dbg_log(DAPL_DBG_TYPE_EP, " socket accepted, read QP data\n"); + + /* read in DST QP info, IA address. check for private data */ + len = recv(acm_ptr->socket, (char *)&acm_ptr->msg, exp, 0); + if (len != exp || ntohs(acm_ptr->msg.ver) != DCM_VER) { + int err = dapl_socket_errno(); + dapl_log(DAPL_DBG_TYPE_ERR, + " ACCEPT read: ERR 0x%x %s, rcnt=%d, ver=%d\n", + err, strerror(err), len, ntohs(acm_ptr->msg.ver)); + goto bail; + } + + /* keep the QP, address info in network order */ + + /* validate private data size before reading */ + exp = ntohs(acm_ptr->msg.p_size); + if (exp > DCM_MAX_PDATA_SIZE) { + dapl_log(DAPL_DBG_TYPE_ERR, + " accept read: psize (%d) wrong\n", + acm_ptr->msg.p_size); + goto bail; + } + + /* read private data into cm_handle if any present */ + if (exp) { + len = recv(acm_ptr->socket, acm_ptr->msg.p_data, exp, 0); + if (len != exp) { + int err = dapl_socket_errno(); + dapl_log(DAPL_DBG_TYPE_ERR, + " accept read pdata: ERR 0x%x %s, rcnt=%d\n", + err, strerror(err), len); + goto bail; + } + p_data = acm_ptr->msg.p_data; + } + dapl_os_lock(&acm_ptr->lock); + acm_ptr->state = DCM_ACCEPTING_DATA; + dapl_os_unlock(&acm_ptr->lock); + + dapl_dbg_log(DAPL_DBG_TYPE_CM, + " ACCEPT: DST %s %x lid=0x%x, qpn=0x%x, psz=%d\n", + inet_ntoa(((struct sockaddr_in *) + &acm_ptr->msg.daddr.so)->sin_addr), + ntohs(((struct sockaddr_in *) + &acm_ptr->msg.daddr.so)->sin_port), + ntohs(acm_ptr->msg.saddr.ib.lid), + ntohl(acm_ptr->msg.saddr.ib.qpn), exp); + +#ifdef DAT_EXTENSIONS + if (acm_ptr->msg.saddr.ib.qp_type == IBV_QPT_UD) { + DAT_IB_EXTENSION_EVENT_DATA xevent; + + /* post EVENT, modify_qp created ah */ + xevent.status = 0; + xevent.type = DAT_IB_UD_CONNECT_REQUEST; + + dapls_evd_post_cr_event_ext(acm_ptr->sp, + DAT_IB_UD_CONNECTION_REQUEST_EVENT, + acm_ptr, + (DAT_COUNT) exp, + (DAT_PVOID *) acm_ptr->msg.p_data, + (DAT_PVOID *) &xevent); + } else +#endif + /* trigger CR event and return SUCCESS */ + dapls_cr_callback(acm_ptr, + IB_CME_CONNECTION_REQUEST_PENDING, + p_data, exp, acm_ptr->sp); + return; +bail: + /* mark for destroy, active will see socket close as rej */ + dapli_cm_free(acm_ptr); + return; +} + +/* + * PASSIVE: consumer accept, send local QP information, private data, + * queue on work thread to receive RTU information to avoid blocking + * user thread. + */ +static DAT_RETURN +dapli_socket_accept_usr(DAPL_EP * ep_ptr, + DAPL_CR * cr_ptr, DAT_COUNT p_size, DAT_PVOID p_data) +{ + DAPL_IA *ia_ptr = ep_ptr->header.owner_ia; + dp_ib_cm_handle_t cm_ptr = cr_ptr->ib_cm_handle; + ib_cm_msg_t local; + struct iovec iov[2]; + int len, exp = sizeof(ib_cm_msg_t) - DCM_MAX_PDATA_SIZE; + DAT_RETURN ret = DAT_INTERNAL_ERROR; + socklen_t sl; + + if (p_size > DCM_MAX_PDATA_SIZE) { + dapl_log(DAPL_DBG_TYPE_ERR, + " accept_usr: psize(%d) too large\n", p_size); + return DAT_LENGTH_ERROR; + } + + /* must have a accepted socket */ + if (cm_ptr->socket == DAPL_INVALID_SOCKET) { + dapl_log(DAPL_DBG_TYPE_ERR, + " accept_usr: cm socket invalid\n"); + goto bail; + } + + dapl_dbg_log(DAPL_DBG_TYPE_CM, + " ACCEPT_USR: remote lid=0x%x" + " qpn=0x%x qp_type %d, psize=%d\n", + ntohs(cm_ptr->msg.saddr.ib.lid), + ntohl(cm_ptr->msg.saddr.ib.qpn), + cm_ptr->msg.saddr.ib.qp_type, + ntohs(cm_ptr->msg.p_size)); + +#ifdef DAT_EXTENSIONS + if (cm_ptr->msg.saddr.ib.qp_type == IBV_QPT_UD && + ep_ptr->qp_handle->qp_type != IBV_QPT_UD) { + dapl_log(DAPL_DBG_TYPE_ERR, + " ACCEPT_USR: ERR remote QP is UD," + ", but local QP is not\n"); + ret = (DAT_INVALID_HANDLE | DAT_INVALID_HANDLE_EP); + goto bail; + } +#endif + + /* modify QP to RTR and then to RTS with remote info already read */ + dapl_os_lock(&ep_ptr->header.lock); + if (dapls_modify_qp_state(ep_ptr->qp_handle, + IBV_QPS_RTR, + cm_ptr->msg.saddr.ib.qpn, + cm_ptr->msg.saddr.ib.lid, + (ib_gid_handle_t)cm_ptr->msg.saddr.ib.gid) != DAT_SUCCESS) { + dapl_log(DAPL_DBG_TYPE_ERR, + " ACCEPT_USR: QPS_RTR ERR %s -> %s\n", + strerror(errno), + inet_ntoa(((struct sockaddr_in *) + &cm_ptr->msg.daddr.so)->sin_addr)); + dapl_os_unlock(&ep_ptr->header.lock); + goto bail; + } + if (dapls_modify_qp_state(ep_ptr->qp_handle, + IBV_QPS_RTS, + cm_ptr->msg.saddr.ib.qpn, + cm_ptr->msg.saddr.ib.lid, + NULL) != DAT_SUCCESS) { + dapl_log(DAPL_DBG_TYPE_ERR, + " ACCEPT_USR: QPS_RTS ERR %s -> %s\n", + strerror(errno), + inet_ntoa(((struct sockaddr_in *) + &cm_ptr->msg.daddr.so)->sin_addr)); + dapl_os_unlock(&ep_ptr->header.lock); + goto bail; + } + dapl_os_unlock(&ep_ptr->header.lock); + + /* save remote address information */ + dapl_os_memcpy(&ep_ptr->remote_ia_address, + &cm_ptr->msg.daddr.so, + sizeof(union dcm_addr)); + + /* send our QP info, IA address, pdata. Don't overwrite dst data */ + local.ver = htons(DCM_VER); + local.op = htons(DCM_REP); + local.saddr.ib.qpn = htonl(ep_ptr->qp_handle->qp_num); + local.saddr.ib.qp_type = ep_ptr->qp_handle->qp_type; + local.saddr.ib.lid = ia_ptr->hca_ptr->ib_trans.lid; + dapl_os_memcpy(&local.saddr.ib.gid[0], + &ia_ptr->hca_ptr->ib_trans.gid, 16); + + /* Get local address information from socket */ + sl = sizeof(local.daddr.so); + getsockname(cm_ptr->socket, (struct sockaddr *)&local.daddr.so, &sl); + +#ifdef DAPL_DBG + /* DBG: Active PID [0], PASSIVE PID [2] */ + *(uint16_t*)&cm_ptr->msg.resv[2] = htons((uint16_t)dapl_os_getpid()); + dapl_os_memcpy(local.resv, cm_ptr->msg.resv, 4); +#endif + cm_ptr->hca = ia_ptr->hca_ptr; + dapl_os_lock(&cm_ptr->lock); + cm_ptr->state = DCM_ACCEPTED; + dapl_os_unlock(&cm_ptr->lock); + + /* Link CM to EP, already queued on work thread */ + dapl_ep_link_cm(ep_ptr, cm_ptr); + cm_ptr->ep = ep_ptr; + + local.p_size = htons(p_size); + iov[0].iov_base = (void *)&local; + iov[0].iov_len = exp; + + if (p_size) { + iov[1].iov_base = p_data; + iov[1].iov_len = p_size; + len = writev(cm_ptr->socket, iov, 2); + } else + len = writev(cm_ptr->socket, iov, 1); + + if (len != (p_size + exp)) { + int err = dapl_socket_errno(); + dapl_log(DAPL_DBG_TYPE_ERR, + " ACCEPT_USR: ERR 0x%x %s, wcnt=%d -> %s\n", + err, strerror(err), len, + inet_ntoa(((struct sockaddr_in *) + &cm_ptr->msg.daddr.so)->sin_addr)); + dapl_ep_unlink_cm(ep_ptr, cm_ptr); + cm_ptr->ep = NULL; + goto bail; + } + + dapl_dbg_log(DAPL_DBG_TYPE_CM, + " ACCEPT_USR: local lid=0x%x qpn=0x%x psz=%d\n", + ntohs(local.saddr.ib.lid), + ntohl(local.saddr.ib.qpn), ntohs(local.p_size)); + dapl_dbg_log(DAPL_DBG_TYPE_CM, + " ACCEPT_USR: local GID subnet %016llx id %016llx\n", + (unsigned long long) + htonll(*(uint64_t*)&local.saddr.ib.gid[0]), + (unsigned long long) + htonll(*(uint64_t*)&local.saddr.ib.gid[8])); + + dapl_dbg_log(DAPL_DBG_TYPE_EP, " PASSIVE: accepted!\n"); + + return DAT_SUCCESS; +bail: + /* schedule cleanup from workq */ + dapli_cm_free(cm_ptr); + return ret; +} + +/* + * PASSIVE: read RTU from active peer, post CONN event + */ +static void dapli_socket_accept_rtu(dp_ib_cm_handle_t cm_ptr) +{ + int len; + ib_cm_events_t event = IB_CME_CONNECTED; + + /* complete handshake after final QP state change, VER and OP */ + len = recv(cm_ptr->socket, (char *)&cm_ptr->msg, 4, 0); + if (len != 4 || ntohs(cm_ptr->msg.op) != DCM_RTU) { + dapl_log(DAPL_DBG_TYPE_ERR, + " ACCEPT_RTU: rcv ERR, rcnt=%d op=%x <- %s\n", + len, ntohs(cm_ptr->msg.op), + inet_ntoa(((struct sockaddr_in *) + &cm_ptr->msg.daddr.so)->sin_addr)); + event = IB_CME_DESTINATION_REJECT; + goto bail; + } + + /* save state and reference to EP, queue for disc event */ + dapl_os_lock(&cm_ptr->lock); + cm_ptr->state = DCM_CONNECTED; + dapl_os_unlock(&cm_ptr->lock); + + /* final data exchange if remote QP state is good to go */ + dapl_dbg_log(DAPL_DBG_TYPE_EP, " PASSIVE: connected!\n"); + +#ifdef DAT_EXTENSIONS +ud_bail: + if (cm_ptr->msg.saddr.ib.qp_type == IBV_QPT_UD) { + DAT_IB_EXTENSION_EVENT_DATA xevent; + + ib_pd_handle_t pd_handle = + ((DAPL_PZ *)cm_ptr->ep->param.pz_handle)->pd_handle; + + if (event == IB_CME_CONNECTED) { + cm_ptr->ah = dapls_create_ah(cm_ptr->hca, pd_handle, + cm_ptr->ep->qp_handle, + cm_ptr->msg.saddr.ib.lid, + NULL); + if (cm_ptr->ah) { + /* post EVENT, modify_qp created ah */ + xevent.status = 0; + xevent.type = DAT_IB_UD_PASSIVE_REMOTE_AH; + xevent.remote_ah.ah = cm_ptr->ah; + xevent.remote_ah.qpn = ntohl(cm_ptr->msg.saddr.ib.qpn); + dapl_os_memcpy(&xevent.remote_ah.ia_addr, + &cm_ptr->msg.daddr.so, + sizeof(union dcm_addr)); + event = DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED; + } else + event = DAT_IB_UD_CONNECTION_ERROR_EVENT; + } else + event = DAT_IB_UD_CONNECTION_ERROR_EVENT; + + dapl_log(DAPL_DBG_TYPE_CM, + " CONN_RTU: UD AH %p for lid 0x%x qpn 0x%x\n", + cm_ptr->ah, ntohs(cm_ptr->msg.saddr.ib.lid), + ntohl(cm_ptr->msg.saddr.ib.qpn)); + + dapls_evd_post_connection_event_ext( + (DAPL_EVD *) + cm_ptr->ep->param.connect_evd_handle, + event, + (DAT_EP_HANDLE) cm_ptr->ep, + (DAT_COUNT) ntohs(cm_ptr->msg.p_size), + (DAT_PVOID *) cm_ptr->msg.p_data, + (DAT_PVOID *) &xevent); + + /* cleanup and release from local list, still on EP list */ + dapli_cm_free(cm_ptr); + + } else +#endif + { + dapli_ep_check(cm_ptr->ep); + dapls_cr_callback(cm_ptr, event, NULL, 0, cm_ptr->sp); + } + dapl_log(DAPL_DBG_TYPE_CM_EST, + " SCM PASSIVE CONN: %x <- %s %x\n", + cm_ptr->sp->conn_qual, + inet_ntoa(((struct sockaddr_in *) &cm_ptr->msg.daddr.so)->sin_addr), + ntohs(((struct sockaddr_in *) &cm_ptr->msg.daddr.so)->sin_port)); + return; + +bail: +#ifdef DAT_EXTENSIONS + if (cm_ptr->msg.saddr.ib.qp_type == IBV_QPT_UD) + goto ud_bail; +#endif + dapl_os_lock(&cm_ptr->lock); + cm_ptr->state = DCM_REJECTED; + dapl_os_unlock(&cm_ptr->lock); + + dapls_cr_callback(cm_ptr, event, NULL, 0, cm_ptr->sp); + dapli_cm_free(cm_ptr); +} + +/* + * dapls_ib_connect + * + * Initiate a connection with the passive listener on another node + * + * Input: + * ep_handle, + * remote_ia_address, + * remote_conn_qual, + * prd_size size of private data and structure + * prd_prt pointer to private data structure + * + * Output: + * none + * + * Returns: + * DAT_SUCCESS + * DAT_INSUFFICIENT_RESOURCES + * DAT_INVALID_PARAMETER + * + */ +DAT_RETURN +dapls_ib_connect(IN DAT_EP_HANDLE ep_handle, + IN DAT_IA_ADDRESS_PTR remote_ia_address, + IN DAT_CONN_QUAL remote_conn_qual, + IN DAT_COUNT private_data_size, IN void *private_data) +{ + DAPL_EP *ep_ptr = (DAPL_EP *) ep_handle; + + dapl_dbg_log(DAPL_DBG_TYPE_EP, + " connect(ep_handle %p ....)\n", ep_handle); + + return (dapli_socket_connect(ep_ptr, remote_ia_address, + remote_conn_qual, + private_data_size, private_data)); +} + +/* + * dapls_ib_disconnect + * + * Disconnect an EP + * + * Input: + * ep_handle, + * disconnect_flags + * + * Output: + * none + * + * Returns: + * DAT_SUCCESS + */ +DAT_RETURN +dapls_ib_disconnect(IN DAPL_EP * ep_ptr, IN DAT_CLOSE_FLAGS close_flags) +{ + dp_ib_cm_handle_t cm_ptr = dapl_get_cm_from_ep(ep_ptr); + + dapl_os_lock(&ep_ptr->header.lock); + if (ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECTED || + ep_ptr->param.ep_attr.service_type != DAT_SERVICE_TYPE_RC || + cm_ptr == NULL) { + dapl_os_unlock(&ep_ptr->header.lock); + return DAT_SUCCESS; + } + dapl_os_unlock(&ep_ptr->header.lock); + return (dapli_socket_disconnect(cm_ptr)); +} + +/* + * dapls_ib_disconnect_clean + * + * Clean up outstanding connection data. This routine is invoked + * after the final disconnect callback has occurred. Only on the + * ACTIVE side of a connection. It is also called if dat_ep_connect + * times out using the consumer supplied timeout value. + * + * Input: + * ep_ptr DAPL_EP + * active Indicates active side of connection + * + * Output: + * none + * + * Returns: + * void + * + */ +void +dapls_ib_disconnect_clean(IN DAPL_EP * ep_ptr, + IN DAT_BOOLEAN active, + IN const ib_cm_events_t ib_cm_event) +{ + if (ib_cm_event == IB_CME_TIMEOUT) { + dp_ib_cm_handle_t cm_ptr; + + if ((cm_ptr = dapl_get_cm_from_ep(ep_ptr)) == NULL) + return; + + dapl_log(DAPL_DBG_TYPE_WARN, + "dapls_ib_disc_clean: CONN_TIMEOUT ep %p cm %p %s\n", + ep_ptr, cm_ptr, dapl_cm_state_str(cm_ptr->state)); + + /* schedule release of socket and local resources */ + dapli_cm_free(cm_ptr); + } +} + +/* + * dapl_ib_setup_conn_listener + * + * Have the CM set up a connection listener. + * + * Input: + * ibm_hca_handle HCA handle + * qp_handle QP handle + * + * Output: + * none + * + * Returns: + * DAT_SUCCESS + * DAT_INSUFFICIENT_RESOURCES + * DAT_INTERNAL_ERROR + * DAT_CONN_QUAL_UNAVAILBLE + * DAT_CONN_QUAL_IN_USE + * + */ +DAT_RETURN +dapls_ib_setup_conn_listener(IN DAPL_IA * ia_ptr, + IN DAT_UINT64 ServiceID, IN DAPL_SP * sp_ptr) +{ + return (dapli_socket_listen(ia_ptr, ServiceID, sp_ptr)); +} + +/* + * dapl_ib_remove_conn_listener + * + * Have the CM remove a connection listener. + * + * Input: + * ia_handle IA handle + * ServiceID IB Channel Service ID + * + * Output: + * none + * + * Returns: + * DAT_SUCCESS + * DAT_INVALID_STATE + * + */ +DAT_RETURN +dapls_ib_remove_conn_listener(IN DAPL_IA * ia_ptr, IN DAPL_SP * sp_ptr) +{ + ib_cm_srvc_handle_t cm_ptr = sp_ptr->cm_srvc_handle; + + /* free cm_srvc_handle, release will cleanup */ + if (cm_ptr != NULL) { + /* cr_thread will free */ + sp_ptr->cm_srvc_handle = NULL; + dapli_cm_free(cm_ptr); + } + return DAT_SUCCESS; +} + +/* + * dapls_ib_accept_connection + * + * Perform necessary steps to accept a connection + * + * Input: + * cr_handle + * ep_handle + * private_data_size + * private_data + * + * Output: + * none + * + * Returns: + * DAT_SUCCESS + * DAT_INSUFFICIENT_RESOURCES + * DAT_INTERNAL_ERROR + * + */ +DAT_RETURN +dapls_ib_accept_connection(IN DAT_CR_HANDLE cr_handle, + IN DAT_EP_HANDLE ep_handle, + IN DAT_COUNT p_size, IN const DAT_PVOID p_data) +{ + DAPL_CR *cr_ptr; + DAPL_EP *ep_ptr; + + dapl_dbg_log(DAPL_DBG_TYPE_EP, + "dapls_ib_accept_connection(cr %p ep %p prd %p,%d)\n", + cr_handle, ep_handle, p_data, p_size); + + cr_ptr = (DAPL_CR *) cr_handle; + ep_ptr = (DAPL_EP *) ep_handle; + + /* allocate and attach a QP if necessary */ + if (ep_ptr->qp_state == DAPL_QP_STATE_UNATTACHED) { + DAT_RETURN status; + status = dapls_ib_qp_alloc(ep_ptr->header.owner_ia, + ep_ptr, ep_ptr); + if (status != DAT_SUCCESS) + return status; + } + return (dapli_socket_accept_usr(ep_ptr, cr_ptr, p_size, p_data)); +} + +/* + * dapls_ib_reject_connection + * + * Reject a connection + * + * Input: + * cr_handle + * + * Output: + * none + * + * Returns: + * DAT_SUCCESS + * DAT_INTERNAL_ERROR + * + */ +DAT_RETURN +dapls_ib_reject_connection(IN dp_ib_cm_handle_t cm_ptr, + IN int reason, + IN DAT_COUNT psize, IN const DAT_PVOID pdata) +{ + struct iovec iov[2]; + + dapl_dbg_log(DAPL_DBG_TYPE_EP, + " reject(cm %p reason %x, pdata %p, psize %d)\n", + cm_ptr, reason, pdata, psize); + + if (psize > DCM_MAX_PDATA_SIZE) + return DAT_LENGTH_ERROR; + + /* write reject data to indicate reject */ + cm_ptr->msg.op = htons(DCM_REJ_USER); + cm_ptr->msg.p_size = htons(psize); + + iov[0].iov_base = (void *)&cm_ptr->msg; + iov[0].iov_len = sizeof(ib_cm_msg_t) - DCM_MAX_PDATA_SIZE; + if (psize) { + iov[1].iov_base = pdata; + iov[1].iov_len = psize; + writev(cm_ptr->socket, iov, 2); + } else { + writev(cm_ptr->socket, iov, 1); + } + + /* release and cleanup CM object */ + dapli_cm_free(cm_ptr); + return DAT_SUCCESS; +} + +/* + * dapls_ib_cm_remote_addr + * + * Obtain the remote IP address given a connection + * + * Input: + * cr_handle + * + * Output: + * remote_ia_address: where to place the remote address + * + * Returns: + * DAT_SUCCESS + * DAT_INVALID_HANDLE + * + */ +DAT_RETURN +dapls_ib_cm_remote_addr(IN DAT_HANDLE dat_handle, + OUT DAT_SOCK_ADDR6 * remote_ia_address) +{ + DAPL_HEADER *header; + dp_ib_cm_handle_t conn; + + dapl_dbg_log(DAPL_DBG_TYPE_EP, + "dapls_ib_cm_remote_addr(dat_handle %p, ....)\n", + dat_handle); + + header = (DAPL_HEADER *) dat_handle; + + if (header->magic == DAPL_MAGIC_EP) + conn = dapl_get_cm_from_ep((DAPL_EP *) dat_handle); + else if (header->magic == DAPL_MAGIC_CR) + conn = ((DAPL_CR *) dat_handle)->ib_cm_handle; + else + return DAT_INVALID_HANDLE; + + dapl_os_memcpy(remote_ia_address, + &conn->msg.daddr.so, sizeof(DAT_SOCK_ADDR6)); + + return DAT_SUCCESS; +} + +int dapls_ib_private_data_size( + IN DAPL_HCA *hca_ptr) +{ + return DCM_MAX_PDATA_SIZE; +} + +/* outbound/inbound CR processing thread to avoid blocking applications */ +void cr_thread(void *arg) +{ + struct dapl_hca *hca_ptr = arg; + dp_ib_cm_handle_t cr, next_cr; + int opt, ret; + socklen_t opt_len; + char rbuf[2]; + struct dapl_fd_set *set; + enum DAPL_FD_EVENTS event; + + dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cr_thread: ENTER hca %p\n", hca_ptr); + set = dapl_alloc_fd_set(); + if (!set) + goto out; + + dapl_os_lock(&hca_ptr->ib_trans.lock); + hca_ptr->ib_trans.cr_state = IB_THREAD_RUN; + + while (1) { + dapl_fd_zero(set); + dapl_fd_set(hca_ptr->ib_trans.scm[0], set, DAPL_FD_READ); + + if (!dapl_llist_is_empty(&hca_ptr->ib_trans.list)) + next_cr = dapl_llist_peek_head(&hca_ptr->ib_trans.list); + else + next_cr = NULL; + + while (next_cr) { + cr = next_cr; + next_cr = dapl_llist_next_entry(&hca_ptr->ib_trans.list, + (DAPL_LLIST_ENTRY *) + &cr->local_entry); + dapls_cm_acquire(cr); /* hold thread ref */ + dapl_os_lock(&cr->lock); + if (cr->state == DCM_FREE || + hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) { + dapl_log(DAPL_DBG_TYPE_CM, + " CM FREE: %p ep=%p st=%s sck=%d refs=%d\n", + cr, cr->ep, dapl_cm_state_str(cr->state), + cr->socket, cr->ref_count); + + if (cr->socket != DAPL_INVALID_SOCKET) { + shutdown(cr->socket, SHUT_RDWR); + closesocket(cr->socket); + cr->socket = DAPL_INVALID_SOCKET; + } + dapl_os_unlock(&cr->lock); + dapls_cm_release(cr); /* release alloc ref */ + dapli_cm_dequeue(cr); /* release workq ref */ + dapls_cm_release(cr); /* release thread ref */ + continue; + } + + event = (cr->state == DCM_CONN_PENDING) ? + DAPL_FD_WRITE : DAPL_FD_READ; + + if (dapl_fd_set(cr->socket, set, event)) { + dapl_log(DAPL_DBG_TYPE_ERR, + " cr_thread: fd_set ERR st=%d fd %d" + " -> %s\n", cr->state, cr->socket, + inet_ntoa(((struct sockaddr_in *) + &cr->msg.daddr.so)->sin_addr)); + dapl_os_unlock(&cr->lock); + dapls_cm_release(cr); /* release ref */ + continue; + } + dapl_os_unlock(&cr->lock); + dapl_os_unlock(&hca_ptr->ib_trans.lock); + + ret = dapl_poll(cr->socket, event); + + dapl_dbg_log(DAPL_DBG_TYPE_THREAD, + " poll ret=0x%x %s sck=%d\n", + ret, dapl_cm_state_str(cr->state), + cr->socket); + + /* data on listen, qp exchange, and on disc req */ + dapl_os_lock(&cr->lock); + if ((ret == DAPL_FD_READ) || + (cr->state != DCM_CONN_PENDING && ret == DAPL_FD_ERROR)) { + if (cr->socket != DAPL_INVALID_SOCKET) { + switch (cr->state) { + case DCM_LISTEN: + dapl_os_unlock(&cr->lock); + dapli_socket_accept(cr); + break; + case DCM_ACCEPTING: + dapl_os_unlock(&cr->lock); + dapli_socket_accept_data(cr); + break; + case DCM_ACCEPTED: + dapl_os_unlock(&cr->lock); + dapli_socket_accept_rtu(cr); + break; + case DCM_REP_PENDING: + dapl_os_unlock(&cr->lock); + dapli_socket_connect_rtu(cr); + break; + case DCM_CONNECTED: + dapl_os_unlock(&cr->lock); + dapli_socket_disconnect(cr); + break; + case DCM_DISCONNECTED: + cr->state = DCM_FREE; + dapl_os_unlock(&cr->lock); + break; + default: + if (ret == DAPL_FD_ERROR) + cr->state = DCM_FREE; + dapl_os_unlock(&cr->lock); + break; + } + } else + dapl_os_unlock(&cr->lock); + + /* ASYNC connections, writable, readable, error; check status */ + } else if (ret == DAPL_FD_WRITE || + (cr->state == DCM_CONN_PENDING && + ret == DAPL_FD_ERROR)) { + + opt = 0; + opt_len = sizeof(opt); + ret = getsockopt(cr->socket, SOL_SOCKET, + SO_ERROR, (char *)&opt, + &opt_len); + dapl_os_unlock(&cr->lock); + if (!ret && !opt) + dapli_socket_connected(cr, opt); + else + dapli_socket_connected(cr, opt ? opt : dapl_socket_errno()); + } else + dapl_os_unlock(&cr->lock); + + dapls_cm_release(cr); /* release ref */ + dapl_os_lock(&hca_ptr->ib_trans.lock); + } + + /* set to exit and all resources destroyed */ + if ((hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) && + (dapl_llist_is_empty(&hca_ptr->ib_trans.list))) + break; + + dapl_os_unlock(&hca_ptr->ib_trans.lock); + dapl_select(set); + + /* if pipe used to wakeup, consume */ + while (dapl_poll(hca_ptr->ib_trans.scm[0], + DAPL_FD_READ) == DAPL_FD_READ) { + if (recv(hca_ptr->ib_trans.scm[0], rbuf, 2, 0) == -1) + dapl_log(DAPL_DBG_TYPE_THREAD, + " cr_thread: read pipe error = %s\n", + strerror(errno)); + } + dapl_os_lock(&hca_ptr->ib_trans.lock); + + /* set to exit and all resources destroyed */ + if ((hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) && + (dapl_llist_is_empty(&hca_ptr->ib_trans.list))) + break; + } + + dapl_os_unlock(&hca_ptr->ib_trans.lock); + dapl_os_free(set, sizeof(struct dapl_fd_set)); +out: + hca_ptr->ib_trans.cr_state = IB_THREAD_EXIT; + dapl_dbg_log(DAPL_DBG_TYPE_THREAD, " cr_thread(hca %p) exit\n", hca_ptr); +} + + +#ifdef DAPL_COUNTERS +/* Debug aid: List all Connections in process and state */ +void dapls_print_cm_list(IN DAPL_IA *ia_ptr) +{ + /* Print in process CR's for this IA, if debug type set */ + int i = 0; + dp_ib_cm_handle_t cr, next_cr; + + dapl_os_lock(&ia_ptr->hca_ptr->ib_trans.lock); + if (!dapl_llist_is_empty((DAPL_LLIST_HEAD*) + &ia_ptr->hca_ptr->ib_trans.list)) + next_cr = dapl_llist_peek_head((DAPL_LLIST_HEAD*) + &ia_ptr->hca_ptr->ib_trans.list); + else + next_cr = NULL; + + printf("\n DAPL IA CONNECTIONS IN PROCESS:\n"); + while (next_cr) { + cr = next_cr; + next_cr = dapl_llist_next_entry((DAPL_LLIST_HEAD*) + &ia_ptr->hca_ptr->ib_trans.list, + (DAPL_LLIST_ENTRY*)&cr->local_entry); + + printf( " CONN[%d]: sp %p ep %p sock %d %s %s %s %s %s %s PORT L-%x R-%x PID L-%x R-%x\n", + i, cr->sp, cr->ep, cr->socket, + cr->msg.saddr.ib.qp_type == IBV_QPT_RC ? "RC" : "UD", + dapl_cm_state_str(cr->state), dapl_cm_op_str(ntohs(cr->msg.op)), + ntohs(cr->msg.op) == DCM_REQ ? /* local address */ + inet_ntoa(((struct sockaddr_in *)&cr->msg.daddr.so)->sin_addr) : + inet_ntoa(((struct sockaddr_in *)&cr->addr)->sin_addr), + cr->sp ? "<-" : "->", + ntohs(cr->msg.op) == DCM_REQ ? /* remote address */ + inet_ntoa(((struct sockaddr_in *)&cr->addr)->sin_addr) : + inet_ntoa(((struct sockaddr_in *)&cr->msg.daddr.so)->sin_addr), + + ntohs(cr->msg.op) == DCM_REQ ? /* local port */ + ntohs(((struct sockaddr_in *)&cr->msg.daddr.so)->sin_port) : + ntohs(((struct sockaddr_in *)&cr->addr)->sin_port), + + ntohs(cr->msg.op) == DCM_REQ ? /* remote port */ + ntohs(((struct sockaddr_in *)&cr->addr)->sin_port) : + ntohs(((struct sockaddr_in *)&cr->msg.daddr.so)->sin_port), + + cr->sp ? ntohs(*(uint16_t*)&cr->msg.resv[2]) : ntohs(*(uint16_t*)&cr->msg.resv[0]), + cr->sp ? ntohs(*(uint16_t*)&cr->msg.resv[0]) : ntohs(*(uint16_t*)&cr->msg.resv[2])); + + i++; + } + printf("\n"); + dapl_os_unlock(&ia_ptr->hca_ptr->ib_trans.lock); +} +#endif diff --git a/trunk/ulp/dapl2/dapl/openib_ucm/SOURCES b/trunk/ulp/dapl2/dapl/openib_ucm/SOURCES index 381afa23..1f9d9ebf 100644 --- a/trunk/ulp/dapl2/dapl/openib_ucm/SOURCES +++ b/trunk/ulp/dapl2/dapl/openib_ucm/SOURCES @@ -22,7 +22,7 @@ SOURCES = udapl.rc ..\dapl_common_src.c ..\dapl_udapl_src.c ..\openib_common.c \ INCLUDES = ..\include;..\openib_common\;..\common;windows;..\..\dat\include;\ ..\..\dat\udat\windows;..\udapl\windows;\ ..\..\..\..\inc;..\..\..\..\inc\user;..\..\..\libibverbs\include;\ - ..\..\..\..\inc\user\linux; + ..\..\..\librdmacm\include;..\..\..\..\inc\user\linux; DAPL_OPTS = -DEXPORT_DAPL_SYMBOLS -DDAT_EXTENSIONS -DOPENIB -DCQ_WAIT_OBJECT diff --git a/trunk/ulp/dapl2/dapl/openib_ucm/cm.c b/trunk/ulp/dapl2/dapl/openib_ucm/cm.c index be15c0fe..ed962d57 100644 --- a/trunk/ulp/dapl2/dapl/openib_ucm/cm.c +++ b/trunk/ulp/dapl2/dapl/openib_ucm/cm.c @@ -1,2166 +1,2176 @@ -/* - * Copyright (c) 2009 Intel Corporation. All rights reserved. - * - * This Software is licensed under one of the following licenses: - * - * 1) under the terms of the "Common Public License 1.0" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/cpl.php. - * - * 2) under the terms of the "The BSD License" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/bsd-license.php. - * - * 3) under the terms of the "GNU General Public License (GPL) Version 2" a - * copy of which is available from the Open Source Initiative, see - * http://www.opensource.org/licenses/gpl-license.php. - * - * Licensee has the right to choose one of the above licenses. - * - * Redistributions of source code must retain the above copyright - * notice and one of the license notices. - * - * Redistributions in binary form must reproduce both the above copyright - * notice, one of the license notices in the documentation - * and/or other materials provided with the distribution. - */ - -#include "dapl.h" -#include "dapl_adapter_util.h" -#include "dapl_evd_util.h" -#include "dapl_cr_util.h" -#include "dapl_name_service.h" -#include "dapl_ib_util.h" -#include "dapl_ep_util.h" -#include "dapl_osd.h" - - -#if defined(_WIN32) -#include -#else // _WIN32 -enum DAPL_FD_EVENTS { - DAPL_FD_READ = POLLIN, - DAPL_FD_WRITE = POLLOUT, - DAPL_FD_ERROR = POLLERR -}; - -struct dapl_fd_set { - int index; - struct pollfd set[DAPL_FD_SETSIZE]; -}; - -static struct dapl_fd_set *dapl_alloc_fd_set(void) -{ - return dapl_os_alloc(sizeof(struct dapl_fd_set)); -} - -static void dapl_fd_zero(struct dapl_fd_set *set) -{ - set->index = 0; -} - -static int dapl_fd_set(DAPL_SOCKET s, struct dapl_fd_set *set, - enum DAPL_FD_EVENTS event) -{ - if (set->index == DAPL_FD_SETSIZE - 1) { - dapl_log(DAPL_DBG_TYPE_ERR, - "SCM ERR: cm_thread exceeded FD_SETSIZE %d\n", - set->index + 1); - return -1; - } - - set->set[set->index].fd = s; - set->set[set->index].revents = 0; - set->set[set->index++].events = event; - return 0; -} - -static enum DAPL_FD_EVENTS dapl_poll(DAPL_SOCKET s, enum DAPL_FD_EVENTS event) -{ - struct pollfd fds; - int ret; - - fds.fd = s; - fds.events = event; - fds.revents = 0; - ret = poll(&fds, 1, 0); - dapl_log(DAPL_DBG_TYPE_CM, " dapl_poll: fd=%d ret=%d, evnts=0x%x\n", - s, ret, fds.revents); - if (ret == 0) - return 0; - else if (fds.revents & (POLLERR | POLLHUP | POLLNVAL)) - return DAPL_FD_ERROR; - else - return fds.revents; -} - -static int dapl_select(struct dapl_fd_set *set, int time_ms) -{ - int ret; - - dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: sleep, fds=%d\n", - set->index); - ret = poll(set->set, set->index, time_ms); - dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: wakeup, ret=0x%x\n", ret); - return ret; -} -#endif - -/* forward declarations */ -static int ucm_reply(dp_ib_cm_handle_t cm); -static void ucm_accept(ib_cm_srvc_handle_t cm, ib_cm_msg_t *msg); -static void ucm_connect_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg); -static void ucm_accept_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg); -static int ucm_send(ib_hca_transport_t *tp, ib_cm_msg_t *msg, DAT_PVOID p_data, DAT_COUNT p_size); -static void ucm_disconnect_final(dp_ib_cm_handle_t cm); -DAT_RETURN dapli_cm_disconnect(dp_ib_cm_handle_t cm); -DAT_RETURN dapli_cm_connect(DAPL_EP *ep, dp_ib_cm_handle_t cm); - -#define UCM_SND_BURST 50 - -/* Service ids - port space */ -static uint16_t ucm_get_port(ib_hca_transport_t *tp, uint16_t port) -{ - int i = 0; - - dapl_os_lock(&tp->plock); - /* get specific ID */ - if (port) { - if (tp->sid[port] == 0) { - tp->sid[port] = 1; - i = port; - } - goto done; - } - - /* get any free ID */ - for (i = 0xffff; i > 0; i--) { - if (tp->sid[i] == 0) { - tp->sid[i] = 1; - break; - } - } -done: - dapl_os_unlock(&tp->plock); - return i; -} - -static void ucm_free_port(ib_hca_transport_t *tp, uint16_t port) -{ - dapl_os_lock(&tp->plock); - tp->sid[port] = 0; - dapl_os_unlock(&tp->plock); -} - -static void ucm_check_timers(dp_ib_cm_handle_t cm, int *timer) -{ - DAPL_OS_TIMEVAL time; - - dapl_os_lock(&cm->lock); - dapl_os_get_time(&time); - switch (cm->state) { - case DCM_REP_PENDING: - *timer = cm->hca->ib_trans.cm_timer; - /* wait longer each retry */ - if ((time - cm->timer)/1000 > - (cm->hca->ib_trans.rep_time << cm->retries)) { - dapl_log(DAPL_DBG_TYPE_CM_WARN, - " CM_REQ retry %p %d [lid, port, qpn]:" - " %x %x %x -> %x %x %x Time(ms) %llu > %d\n", - cm, cm->retries+1, ntohs(cm->msg.saddr.ib.lid), - ntohs(cm->msg.sport), ntohl(cm->msg.saddr.ib.qpn), - ntohs(cm->msg.daddr.ib.lid), ntohs(cm->msg.dport), - ntohl(cm->msg.dqpn), (time - cm->timer)/1000, - cm->hca->ib_trans.rep_time << cm->retries); - cm->retries++; - dapl_os_unlock(&cm->lock); - dapli_cm_connect(cm->ep, cm); - return; - } - break; - case DCM_RTU_PENDING: - *timer = cm->hca->ib_trans.cm_timer; - if ((time - cm->timer)/1000 > - (cm->hca->ib_trans.rtu_time << cm->retries)) { - dapl_log(DAPL_DBG_TYPE_CM_WARN, - " CM_REPLY retry %d [lid, port, qpn]:" - " %x %x %x -> %x %x %x r_pid %x (%x) Time(ms) %llu > %d\n", - cm->retries+1, - ntohs(cm->msg.saddr.ib.lid), - ntohs(cm->msg.sport), - ntohl(cm->msg.saddr.ib.qpn), - ntohs(cm->msg.daddr.ib.lid), - ntohs(cm->msg.dport), - ntohl(cm->msg.daddr.ib.qpn), - ntohl(*(DAT_UINT32*)cm->msg.resv), - ntohl(*(DAT_UINT32*)cm->msg.resv), - (time - cm->timer)/1000, - cm->hca->ib_trans.rtu_time << cm->retries); - cm->retries++; - dapl_os_unlock(&cm->lock); - ucm_reply(cm); - return; - } - break; - case DCM_DISC_PENDING: - *timer = cm->hca->ib_trans.cm_timer; - /* wait longer each retry */ - if ((time - cm->timer)/1000 > - (cm->hca->ib_trans.rtu_time << cm->retries)) { - dapl_log(DAPL_DBG_TYPE_CM_WARN, - " CM_DREQ retry %d [lid, port, qpn]:" - " %x %x %x -> %x %x %x r_pid %x (%x) Time(ms) %llu > %d\n", - cm->retries+1, - ntohs(cm->msg.saddr.ib.lid), - ntohs(cm->msg.sport), - ntohl(cm->msg.saddr.ib.qpn), - ntohs(cm->msg.daddr.ib.lid), - ntohs(cm->msg.dport), - ntohl(cm->msg.dqpn), - ntohl(*(DAT_UINT32*)cm->msg.resv), - ntohl(*(DAT_UINT32*)cm->msg.resv), - (time - cm->timer)/1000, - cm->hca->ib_trans.rtu_time << cm->retries); - cm->retries++; - dapl_os_unlock(&cm->lock); - dapli_cm_disconnect(cm); - return; - } - break; - default: - break; - } - dapl_os_unlock(&cm->lock); -} - -/* SEND CM MESSAGE PROCESSING */ - -/* Get CM UD message from send queue, called with s_lock held */ -static ib_cm_msg_t *ucm_get_smsg(ib_hca_transport_t *tp) -{ - ib_cm_msg_t *msg = NULL; - int ret, polled = 0, hd = tp->s_hd; - - hd++; -retry: - if (hd == tp->qpe) - hd = 0; - - if (hd == tp->s_tl) - msg = NULL; - else { - msg = &tp->sbuf[hd]; - tp->s_hd = hd; /* new hd */ - } - - /* if empty, process some completions */ - if ((msg == NULL) && (!polled)) { - struct ibv_wc wc; - - /* process completions, based on UCM_SND_BURST */ - ret = ibv_poll_cq(tp->scq, 1, &wc); - if (ret < 0) { - dapl_log(DAPL_DBG_TYPE_WARN, - " get_smsg: cq %p %s\n", - tp->scq, strerror(errno)); - } - /* free up completed sends, update tail */ - if (ret > 0) { - tp->s_tl = (int)wc.wr_id; - dapl_log(DAPL_DBG_TYPE_CM, - " get_smsg: wr_cmp (%d) s_tl=%d\n", - wc.status, tp->s_tl); - } - polled++; - goto retry; - } - return msg; -} - -/* RECEIVE CM MESSAGE PROCESSING */ - -static int ucm_post_rmsg(ib_hca_transport_t *tp, ib_cm_msg_t *msg) -{ - struct ibv_recv_wr recv_wr, *recv_err; - struct ibv_sge sge; - - recv_wr.next = NULL; - recv_wr.sg_list = &sge; - recv_wr.num_sge = 1; - recv_wr.wr_id = (uint64_t)(uintptr_t) msg; - sge.length = sizeof(ib_cm_msg_t) + sizeof(struct ibv_grh); - sge.lkey = tp->mr_rbuf->lkey; - sge.addr = (uintptr_t)((char *)msg - sizeof(struct ibv_grh)); - - return (ibv_post_recv(tp->qp, &recv_wr, &recv_err)); -} - -static int ucm_reject(ib_hca_transport_t *tp, ib_cm_msg_t *msg) -{ - ib_cm_msg_t smsg; - - /* setup op, rearrange the src, dst cm and addr info */ - (void)dapl_os_memzero(&smsg, sizeof(smsg)); - smsg.ver = htons(DCM_VER); - smsg.op = htons(DCM_REJ_CM); - smsg.dport = msg->sport; - smsg.dqpn = msg->sqpn; - smsg.sport = msg->dport; - smsg.sqpn = msg->dqpn; - - dapl_os_memcpy(&smsg.daddr, &msg->saddr, sizeof(union dcm_addr)); - - /* no dst_addr IB info in REQ, init lid, gid, get type from saddr */ - smsg.saddr.ib.lid = tp->addr.ib.lid; - smsg.saddr.ib.qp_type = msg->saddr.ib.qp_type; - dapl_os_memcpy(&smsg.saddr.ib.gid[0], - &tp->addr.ib.gid, 16); - - dapl_os_memcpy(&smsg.saddr, &msg->daddr, sizeof(union dcm_addr)); - - dapl_dbg_log(DAPL_DBG_TYPE_CM, - " CM reject -> LID %x, QPN %x PORT %x\n", - ntohs(smsg.daddr.ib.lid), - ntohl(smsg.dqpn), ntohs(smsg.dport)); - - return (ucm_send(tp, &smsg, NULL, 0)); -} - -static void ucm_process_recv(ib_hca_transport_t *tp, - ib_cm_msg_t *msg, - dp_ib_cm_handle_t cm) -{ - dapl_os_lock(&cm->lock); - switch (cm->state) { - case DCM_LISTEN: /* passive */ - dapl_os_unlock(&cm->lock); - ucm_accept(cm, msg); - break; - case DCM_RTU_PENDING: /* passive */ - dapl_os_unlock(&cm->lock); - ucm_accept_rtu(cm, msg); - break; - case DCM_REP_PENDING: /* active */ - dapl_os_unlock(&cm->lock); - ucm_connect_rtu(cm, msg); - break; - case DCM_CONNECTED: /* active and passive */ - /* DREQ, change state and process */ - if (ntohs(msg->op) == DCM_DREQ) { - cm->state = DCM_DISC_RECV; - dapl_os_unlock(&cm->lock); - dapli_cm_disconnect(cm); - break; - } - /* active: RTU was dropped, resend */ - if (ntohs(msg->op) == DCM_REP) { - dapl_log(DAPL_DBG_TYPE_CM_WARN, - " RESEND RTU: op %s st %s [lid, port, qpn]:" - " %x %x %x -> %x %x %x\n", - dapl_cm_op_str(ntohs(msg->op)), - dapl_cm_state_str(cm->state), - ntohs(msg->saddr.ib.lid), - ntohs(msg->sport), - ntohl(msg->saddr.ib.qpn), - ntohs(msg->daddr.ib.lid), - ntohs(msg->dport), - ntohl(msg->daddr.ib.qpn)); - - cm->msg.op = htons(DCM_RTU); - ucm_send(&cm->hca->ib_trans, &cm->msg, NULL, 0); - } - dapl_os_unlock(&cm->lock); - break; - case DCM_DISC_PENDING: /* active and passive */ - /* DREQ or DREP, finalize */ - dapl_os_unlock(&cm->lock); - ucm_disconnect_final(cm); - break; - case DCM_DISCONNECTED: - case DCM_FREE: - /* DREQ dropped, resend */ - if (ntohs(msg->op) == DCM_DREQ) { - dapl_log(DAPL_DBG_TYPE_CM_WARN, - " RESEND DREP: op %s st %s [lid, port, qpn]:" - " %x %x %x -> %x %x %x\n", - dapl_cm_op_str(ntohs(msg->op)), - dapl_cm_state_str(cm->state), - ntohs(msg->saddr.ib.lid), - ntohs(msg->sport), - ntohl(msg->saddr.ib.qpn), - ntohs(msg->daddr.ib.lid), - ntohs(msg->dport), - ntohl(msg->daddr.ib.qpn)); - cm->msg.op = htons(DCM_DREP); - ucm_send(&cm->hca->ib_trans, &cm->msg, NULL, 0); - - } else if (ntohs(msg->op) != DCM_DREP){ - /* DREP ok to ignore, any other print warning */ - dapl_log(DAPL_DBG_TYPE_WARN, - " ucm_recv: UNEXPECTED MSG on cm %p" - " <- op %s, st %s spsp %x sqpn %x\n", - cm, dapl_cm_op_str(ntohs(msg->op)), - dapl_cm_state_str(cm->state), - ntohs(msg->sport), ntohl(msg->sqpn)); - } - dapl_os_unlock(&cm->lock); - break; - default: - dapl_log(DAPL_DBG_TYPE_WARN, - " ucm_recv: UNKNOWN state" - " <- op %s, %s spsp %x sqpn %x\n", - dapl_cm_op_str(ntohs(msg->op)), - dapl_cm_state_str(cm->state), - ntohs(msg->sport), ntohl(msg->sqpn)); - dapl_os_unlock(&cm->lock); - break; - } -} - -/* Find matching CM object for this receive message, return CM reference, timer */ -dp_ib_cm_handle_t ucm_cm_find(ib_hca_transport_t *tp, ib_cm_msg_t *msg) -{ - dp_ib_cm_handle_t cm, next, found = NULL; - struct dapl_llist_entry **list; - DAPL_OS_LOCK *lock; - int listenq = 0; - - /* conn list first, duplicate requests for DCM_REQ */ - list = &tp->list; - lock = &tp->lock; - -retry_listenq: - dapl_os_lock(lock); - if (!dapl_llist_is_empty(list)) - next = dapl_llist_peek_head(list); - else - next = NULL; - - while (next) { - cm = next; - next = dapl_llist_next_entry(list, - (DAPL_LLIST_ENTRY *)&cm->local_entry); - if (cm->state == DCM_DESTROY || cm->state == DCM_FREE) - continue; - - /* CM sPORT + QPN, match is good enough for listenq */ - if (listenq && - cm->msg.sport == msg->dport && - cm->msg.sqpn == msg->dqpn) { - found = cm; - break; - } - /* connectq, check src and dst, check duplicate conn_reqs */ - if (!listenq && - cm->msg.sport == msg->dport && cm->msg.sqpn == msg->dqpn && - cm->msg.dport == msg->sport && cm->msg.dqpn == msg->sqpn && - cm->msg.daddr.ib.lid == msg->saddr.ib.lid) { - if (ntohs(msg->op) != DCM_REQ) { - found = cm; - break; - } else { - /* duplicate; bail and throw away */ - dapl_os_unlock(lock); - dapl_log(DAPL_DBG_TYPE_CM_WARN, - " DUPLICATE: cm %p op %s st %s [lid, port, qpn]:" - " %x %x %x <- %x %x %x\n", cm, - dapl_cm_op_str(ntohs(msg->op)), - dapl_cm_state_str(cm->state), - ntohs(msg->daddr.ib.lid), - ntohs(msg->dport), - ntohl(msg->daddr.ib.qpn), - ntohs(msg->saddr.ib.lid), - ntohs(msg->sport), - ntohl(msg->saddr.ib.qpn)); - - return NULL; - } - } - } - dapl_os_unlock(lock); - - /* no duplicate request on connq, check listenq for new request */ - if (ntohs(msg->op) == DCM_REQ && !listenq && !found) { - listenq = 1; - list = &tp->llist; - lock = &tp->llock; - goto retry_listenq; - } - - /* not match on listenq for valid request, send reject */ - if (ntohs(msg->op) == DCM_REQ && !found) { - dapl_log(DAPL_DBG_TYPE_WARN, - " ucm_recv: NO LISTENER for %s %x %x i%x c%x" - " < %x %x %x, sending reject\n", - dapl_cm_op_str(ntohs(msg->op)), - ntohs(msg->daddr.ib.lid), ntohs(msg->dport), - ntohl(msg->daddr.ib.qpn), ntohl(msg->sqpn), - ntohs(msg->saddr.ib.lid), ntohs(msg->sport), - ntohl(msg->saddr.ib.qpn)); - - ucm_reject(tp, msg); - } - - if (!found) { - dapl_log(DAPL_DBG_TYPE_CM, - " ucm_recv: NO MATCH op %s %x %x i%x c%x" - " < %x %x %x\n", - dapl_cm_op_str(ntohs(msg->op)), - ntohs(msg->daddr.ib.lid), ntohs(msg->dport), - ntohl(msg->daddr.ib.qpn), ntohl(msg->sqpn), - ntohs(msg->saddr.ib.lid), ntohs(msg->sport), - ntohl(msg->saddr.ib.qpn)); - } - - return found; -} - -/* Get rmsgs from CM completion queue, 10 at a time */ -static void ucm_recv(ib_hca_transport_t *tp) -{ - struct ibv_wc wc[10]; - ib_cm_msg_t *msg; - dp_ib_cm_handle_t cm; - int i, ret, notify = 0; - struct ibv_cq *ibv_cq = NULL; - DAPL_HCA *hca; - - /* POLLIN on channel FD */ - ret = ibv_get_cq_event(tp->rch, &ibv_cq, (void *)&hca); - if (ret == 0) { - ibv_ack_cq_events(ibv_cq, 1); - } -retry: - ret = ibv_poll_cq(tp->rcq, 10, wc); - if (ret <= 0) { - if (!ret && !notify) { - ibv_req_notify_cq(tp->rcq, 0); - notify = 1; - goto retry; - } - return; - } else - notify = 0; - - for (i = 0; i < ret; i++) { - msg = (ib_cm_msg_t*) (uintptr_t) wc[i].wr_id; - - dapl_dbg_log(DAPL_DBG_TYPE_CM, - " ucm_recv: wc status=%d, ln=%d id=%p sqp=%x\n", - wc[i].status, wc[i].byte_len, - (void*)wc[i].wr_id, wc[i].src_qp); - - /* validate CM message, version */ - if (ntohs(msg->ver) != DCM_VER) { - dapl_log(DAPL_DBG_TYPE_WARN, - " ucm_recv: UNKNOWN msg %p, ver %d\n", - msg, msg->ver); - ucm_post_rmsg(tp, msg); - continue; - } - if (!(cm = ucm_cm_find(tp, msg))) { - ucm_post_rmsg(tp, msg); - continue; - } - - /* match, process it */ - ucm_process_recv(tp, msg, cm); - ucm_post_rmsg(tp, msg); - } - - /* finished this batch of WC's, poll and rearm */ - goto retry; -} - -/* ACTIVE/PASSIVE: build and send CM message out of CM object */ -static int ucm_send(ib_hca_transport_t *tp, ib_cm_msg_t *msg, DAT_PVOID p_data, DAT_COUNT p_size) -{ - ib_cm_msg_t *smsg = NULL; - struct ibv_send_wr wr, *bad_wr; - struct ibv_sge sge; - int len, ret = -1; - uint16_t dlid = ntohs(msg->daddr.ib.lid); - - /* Get message from send queue, copy data, and send */ - dapl_os_lock(&tp->slock); - if ((smsg = ucm_get_smsg(tp)) == NULL) - goto bail; - - len = (sizeof(*msg) - DCM_MAX_PDATA_SIZE); - dapl_os_memcpy(smsg, msg, len); - if (p_size) { - smsg->p_size = ntohs(p_size); - dapl_os_memcpy(&smsg->p_data, p_data, p_size); - } - - wr.next = NULL; - wr.sg_list = &sge; - wr.num_sge = 1; - wr.opcode = IBV_WR_SEND; - wr.wr_id = (unsigned long)tp->s_hd; - wr.send_flags = (wr.wr_id % UCM_SND_BURST) ? 0 : IBV_SEND_SIGNALED; - if (len <= tp->max_inline_send) - wr.send_flags |= IBV_SEND_INLINE; - - sge.length = len + p_size; - sge.lkey = tp->mr_sbuf->lkey; - sge.addr = (uintptr_t)smsg; - - dapl_dbg_log(DAPL_DBG_TYPE_CM, - " ucm_send: op %s ln %d lid %x c_qpn %x rport %s\n", - dapl_cm_op_str(ntohs(smsg->op)), - sge.length, htons(smsg->daddr.ib.lid), - htonl(smsg->dqpn), htons(smsg->dport)); - - /* empty slot, then create AH */ - if (!tp->ah[dlid]) { - tp->ah[dlid] = - dapls_create_ah(tp->hca, tp->pd, tp->qp, - htons(dlid), NULL); - if (!tp->ah[dlid]) - goto bail; - } - - wr.wr.ud.ah = tp->ah[dlid]; - wr.wr.ud.remote_qpn = ntohl(smsg->dqpn); - wr.wr.ud.remote_qkey = DAT_UD_QKEY; - - ret = ibv_post_send(tp->qp, &wr, &bad_wr); -bail: - dapl_os_unlock(&tp->slock); - return ret; -} - -/* ACTIVE/PASSIVE: CM objects */ -static void dapli_cm_dealloc(dp_ib_cm_handle_t cm) { - - dapl_os_assert(!cm->ref_count); - dapl_os_lock_destroy(&cm->lock); - dapl_os_wait_object_destroy(&cm->event); - dapl_os_free(cm, sizeof(*cm)); -} - -void dapls_cm_acquire(dp_ib_cm_handle_t cm) -{ - dapl_os_lock(&cm->lock); - cm->ref_count++; - dapl_os_unlock(&cm->lock); -} - -void dapls_cm_release(dp_ib_cm_handle_t cm) -{ - dapl_os_lock(&cm->lock); - cm->ref_count--; - if (cm->ref_count) { - dapl_os_unlock(&cm->lock); - return; - } - /* client, release local conn id port */ - if (!cm->sp && cm->msg.sport) - ucm_free_port(&cm->hca->ib_trans, ntohs(cm->msg.sport)); - - /* clean up any UD address handles */ - if (cm->ah) { - ibv_destroy_ah(cm->ah); - cm->ah = NULL; - } - dapl_os_unlock(&cm->lock); - dapli_cm_dealloc(cm); -} - -dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep) -{ - dp_ib_cm_handle_t cm; - - /* Allocate CM, init lock, and initialize */ - if ((cm = dapl_os_alloc(sizeof(*cm))) == NULL) - return NULL; - - (void)dapl_os_memzero(cm, sizeof(*cm)); - if (dapl_os_lock_init(&cm->lock)) - goto bail; - - if (dapl_os_wait_object_init(&cm->event)) { - dapl_os_lock_destroy(&cm->lock); - goto bail; - } - dapls_cm_acquire(cm); - - cm->msg.ver = htons(DCM_VER); - *(DAT_UINT32*)cm->msg.resv = htonl(dapl_os_getpid()); /* exchange PID for debugging */ - - /* ACTIVE: init source address QP info from local EP */ - if (ep) { - DAPL_HCA *hca = ep->header.owner_ia->hca_ptr; - - cm->msg.sport = htons(ucm_get_port(&hca->ib_trans, 0)); - if (!cm->msg.sport) { - dapl_os_wait_object_destroy(&cm->event); - dapl_os_lock_destroy(&cm->lock); - goto bail; - } - /* link CM object to EP */ - dapl_ep_link_cm(ep, cm); - cm->hca = hca; - cm->ep = ep; - - /* IB info in network order */ - cm->msg.sqpn = htonl(hca->ib_trans.qp->qp_num); /* ucm */ - cm->msg.saddr.ib.qpn = htonl(ep->qp_handle->qp_num); /* ep */ - cm->msg.saddr.ib.qp_type = ep->qp_handle->qp_type; - cm->msg.saddr.ib.lid = hca->ib_trans.addr.ib.lid; - dapl_os_memcpy(&cm->msg.saddr.ib.gid[0], - &hca->ib_trans.addr.ib.gid, 16); - } - return cm; -bail: - dapl_os_free(cm, sizeof(*cm)); - return NULL; -} - -/* schedule destruction of CM object */ -void dapli_cm_free(dp_ib_cm_handle_t cm) -{ - dapl_log(DAPL_DBG_TYPE_CM, - " dapli_cm_free: cm %p %s ep %p refs=%d\n", - cm, dapl_cm_state_str(cm->state), - cm->ep, cm->ref_count); - - dapl_os_lock(&cm->lock); - cm->state = DCM_FREE; - dapls_thread_signal(&cm->hca->ib_trans.signal); - dapl_os_unlock(&cm->lock); -} - -/* Blocking, ONLY called from dat_ep_free */ -void dapls_cm_free(dp_ib_cm_handle_t cm) -{ - dapl_log(DAPL_DBG_TYPE_CM, - " dapl_cm_free: cm %p %s ep %p refs=%d\n", - cm, dapl_cm_state_str(cm->state), - cm->ep, cm->ref_count); - - /* free from internal workq, wait until EP is last ref */ - dapl_os_lock(&cm->lock); - if (cm->state != DCM_FREE) - cm->state = DCM_FREE; - - while (cm->ref_count != 1) { - dapl_os_unlock(&cm->lock); - dapls_thread_signal(&cm->hca->ib_trans.signal); - dapl_os_sleep_usec(10000); - dapl_os_lock(&cm->lock); - } - dapl_os_unlock(&cm->lock); - - /* unlink, dequeue from EP. Final ref so release will destroy */ - dapl_ep_unlink_cm(cm->ep, cm); -} - -/* ACTIVE/PASSIVE: queue up connection object on CM list */ -static void dapli_queue_conn(dp_ib_cm_handle_t cm) -{ - /* add to work queue, list, for cm thread processing */ - dapl_llist_init_entry((DAPL_LLIST_ENTRY *)&cm->local_entry); - dapl_os_lock(&cm->hca->ib_trans.lock); - dapls_cm_acquire(cm); - dapl_llist_add_tail(&cm->hca->ib_trans.list, - (DAPL_LLIST_ENTRY *)&cm->local_entry, cm); - dapl_os_unlock(&cm->hca->ib_trans.lock); - dapls_thread_signal(&cm->hca->ib_trans.signal); -} - -/* PASSIVE: queue up listen object on listen list */ -static void dapli_queue_listen(dp_ib_cm_handle_t cm) -{ - /* add to work queue, llist, for cm thread processing */ - dapl_llist_init_entry((DAPL_LLIST_ENTRY *)&cm->local_entry); - dapl_os_lock(&cm->hca->ib_trans.llock); - dapls_cm_acquire(cm); - dapl_llist_add_tail(&cm->hca->ib_trans.llist, - (DAPL_LLIST_ENTRY *)&cm->local_entry, cm); - dapl_os_unlock(&cm->hca->ib_trans.llock); -} - -static void dapli_dequeue_listen(dp_ib_cm_handle_t cm) -{ - DAPL_HCA *hca = cm->hca; - - dapl_os_lock(&hca->ib_trans.llock); - dapl_llist_remove_entry(&hca->ib_trans.llist, - (DAPL_LLIST_ENTRY *)&cm->local_entry); - dapls_cm_release(cm); - dapl_os_unlock(&hca->ib_trans.llock); -} - -/* called with local LIST and CM object lock */ -static void dapli_cm_dequeue(dp_ib_cm_handle_t cm) -{ - /* Remove from work queue, cr thread processing */ - dapl_llist_remove_entry(&cm->hca->ib_trans.list, - (DAPL_LLIST_ENTRY *)&cm->local_entry); - dapls_cm_release(cm); -} - -static void ucm_disconnect_final(dp_ib_cm_handle_t cm) -{ - /* no EP attachment or not RC, nothing to process */ - if (cm->ep == NULL || - cm->ep->param.ep_attr.service_type != DAT_SERVICE_TYPE_RC) - return; - - dapl_os_lock(&cm->lock); - if (cm->state == DCM_DISCONNECTED) { - dapl_os_unlock(&cm->lock); - return; - } - - cm->state = DCM_DISCONNECTED; - dapl_os_unlock(&cm->lock); - - if (cm->sp) - dapls_cr_callback(cm, IB_CME_DISCONNECTED, NULL, 0, cm->sp); - else - dapl_evd_connection_callback(cm, IB_CME_DISCONNECTED, NULL, 0, cm->ep); - -} - -/* - * called from consumer thread via ep_disconnect/ep_free or - * from cm_thread when receiving DREQ - */ -DAT_RETURN dapli_cm_disconnect(dp_ib_cm_handle_t cm) -{ - int finalize = 1; - int wakeup = 0; - - dapl_os_lock(&cm->lock); - switch (cm->state) { - case DCM_CONNECTED: - /* CONSUMER: move to err state to flush, if not UD */ - if (cm->ep->qp_handle->qp_type != IBV_QPT_UD) - dapls_modify_qp_state(cm->ep->qp_handle, IBV_QPS_ERR,0,0,0); - - /* send DREQ, event after DREP or DREQ timeout */ - cm->state = DCM_DISC_PENDING; - cm->msg.op = htons(DCM_DREQ); - finalize = 0; /* wait for DREP, wakeup timer after DREQ sent */ - wakeup = 1; - break; - case DCM_DISC_PENDING: - /* DREQ timeout, resend until retries exhausted */ - cm->msg.op = htons(DCM_DREQ); - if (cm->retries >= cm->hca->ib_trans.retries) { - dapl_log(DAPL_DBG_TYPE_ERR, - " CM_DREQ: RETRIES EXHAUSTED:" - " %x %x %x -> %x %x %x\n", - htons(cm->msg.saddr.ib.lid), - htonl(cm->msg.saddr.ib.qpn), - htons(cm->msg.sport), - htons(cm->msg.daddr.ib.lid), - htonl(cm->msg.dqpn), - htons(cm->msg.dport)); - finalize = 1; - } - break; - case DCM_DISC_RECV: - /* CM_THREAD: move to err state to flush, if not UD */ - if (cm->ep->qp_handle->qp_type != IBV_QPT_UD) - dapls_modify_qp_state(cm->ep->qp_handle, IBV_QPS_ERR,0,0,0); - - /* DREQ received, send DREP and schedule event, finalize */ - cm->msg.op = htons(DCM_DREP); - break; - case DCM_DISCONNECTED: - dapl_os_unlock(&cm->lock); - return DAT_SUCCESS; - default: - dapl_log(DAPL_DBG_TYPE_WARN, - " disconnect UNKNOWN state: ep %p cm %p %s %s" - " %x %x %x %s %x %x %x r_pid %x (%x)\n", - cm->ep, cm, - cm->msg.saddr.ib.qp_type == IBV_QPT_RC ? "RC" : "UD", - dapl_cm_state_str(cm->state), - ntohs(cm->msg.saddr.ib.lid), - ntohs(cm->msg.sport), - ntohl(cm->msg.saddr.ib.qpn), - cm->sp ? "<-" : "->", - ntohs(cm->msg.daddr.ib.lid), - ntohs(cm->msg.dport), - ntohl(cm->msg.daddr.ib.qpn), - ntohs(cm->msg.op) == DCM_REQ ? 0 : ntohl(*(DAT_UINT32*)cm->msg.resv), - ntohs(cm->msg.op) == DCM_REQ ? 0 : ntohl(*(DAT_UINT32*)cm->msg.resv)); - - dapl_os_unlock(&cm->lock); - return DAT_SUCCESS; - } - - dapl_os_get_time(&cm->timer); /* reply expected */ - ucm_send(&cm->hca->ib_trans, &cm->msg, NULL, 0); - dapl_os_unlock(&cm->lock); - - if (wakeup) - dapls_thread_signal(&cm->hca->ib_trans.signal); - - if (finalize) - ucm_disconnect_final(cm); - - return DAT_SUCCESS; -} - -/* - * ACTIVE: get remote CM SID server info from r_addr. - * send, or resend CM msg via UD CM QP - */ -DAT_RETURN -dapli_cm_connect(DAPL_EP *ep, dp_ib_cm_handle_t cm) -{ - dapl_log(DAPL_DBG_TYPE_EP, - " connect: lid %x i_qpn %x lport %x p_sz=%d -> " - " lid %x c_qpn %x rport %x\n", - htons(cm->msg.saddr.ib.lid), htonl(cm->msg.saddr.ib.qpn), - htons(cm->msg.sport), htons(cm->msg.p_size), - htons(cm->msg.daddr.ib.lid), htonl(cm->msg.dqpn), - htons(cm->msg.dport)); - - dapl_os_lock(&cm->lock); - if (cm->state != DCM_REP_PENDING) { - dapl_os_unlock(&cm->lock); - return DAT_INVALID_STATE; - } - - if (cm->retries == cm->hca->ib_trans.retries) { - dapl_log(DAPL_DBG_TYPE_ERR, - " CM_REQ: RETRIES EXHAUSTED:" - " 0x%x %x 0x%x -> 0x%x %x 0x%x\n", - htons(cm->msg.saddr.ib.lid), - htonl(cm->msg.saddr.ib.qpn), - htons(cm->msg.sport), - htons(cm->msg.daddr.ib.lid), - htonl(cm->msg.dqpn), - htons(cm->msg.dport)); - - dapl_os_unlock(&cm->lock); - -#ifdef DAPL_COUNTERS - /* called from check_timers in cm_thread, cm lock held */ - if (g_dapl_dbg_type & DAPL_DBG_TYPE_CM_LIST) { - dapl_os_unlock(&cm->hca->ib_trans.lock); - dapls_print_cm_list(ep->header.owner_ia); - dapl_os_lock(&cm->hca->ib_trans.lock); - } -#endif - dapl_evd_connection_callback(cm, - IB_CME_DESTINATION_UNREACHABLE, - NULL, 0, ep); - - return DAT_ERROR(DAT_INVALID_ADDRESS, - DAT_INVALID_ADDRESS_UNREACHABLE); - } - dapl_os_unlock(&cm->lock); - - cm->msg.op = htons(DCM_REQ); - dapl_os_get_time(&cm->timer); /* reply expected */ - if (ucm_send(&cm->hca->ib_trans, &cm->msg, - &cm->msg.p_data, ntohs(cm->msg.p_size))) - goto bail; - - /* first time through, link EP and CM, put on work queue */ - if (!cm->retries) { - dapli_queue_conn(cm); - } - return DAT_SUCCESS; - -bail: - dapl_log(DAPL_DBG_TYPE_WARN, - " connect: ERR %s -> cm_lid %x cm_qpn %x r_psp %x p_sz=%d\n", - strerror(errno), htons(cm->msg.daddr.ib.lid), - htonl(cm->msg.dqpn), htons(cm->msg.dport), - htonl(cm->msg.p_size)); - - dapli_cm_free(cm); - return DAT_INSUFFICIENT_RESOURCES; -} - -/* - * ACTIVE: exchange QP information, called from CR thread - */ -static void ucm_connect_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg) -{ - DAPL_EP *ep = cm->ep; - ib_cm_events_t event = IB_CME_CONNECTED; - - dapl_os_lock(&cm->lock); - if (cm->state != DCM_REP_PENDING) { - dapl_log(DAPL_DBG_TYPE_WARN, - " CONN_RTU: UNEXPECTED state:" - " op %s, st %s <- lid %x sqpn %x sport %x\n", - dapl_cm_op_str(ntohs(msg->op)), - dapl_cm_state_str(cm->state), - ntohs(msg->saddr.ib.lid), ntohl(msg->saddr.ib.qpn), - ntohs(msg->sport)); - dapl_os_unlock(&cm->lock); - return; - } - - /* save remote address information to EP and CM */ - dapl_os_memcpy(&ep->remote_ia_address, - &msg->saddr, sizeof(union dcm_addr)); - dapl_os_memcpy(&cm->msg.daddr, - &msg->saddr, sizeof(union dcm_addr)); - - /* validate private data size, and copy if necessary */ - if (msg->p_size) { - if (ntohs(msg->p_size) > DCM_MAX_PDATA_SIZE) { - dapl_log(DAPL_DBG_TYPE_WARN, - " CONN_RTU: invalid p_size %d:" - " st %s <- lid %x sqpn %x spsp %x\n", - ntohs(msg->p_size), - dapl_cm_state_str(cm->state), - ntohs(msg->saddr.ib.lid), - ntohl(msg->saddr.ib.qpn), - ntohs(msg->sport)); - dapl_os_unlock(&cm->lock); - goto bail; - } - dapl_os_memcpy(cm->msg.p_data, - msg->p_data, ntohs(msg->p_size)); - } - - dapl_dbg_log(DAPL_DBG_TYPE_CM, - " CONN_RTU: DST lid=%x," - " iqp=%x, qp_type=%d, port=%x psize=%d\n", - ntohs(cm->msg.daddr.ib.lid), - ntohl(cm->msg.daddr.ib.qpn), cm->msg.daddr.ib.qp_type, - ntohs(msg->sport), ntohs(msg->p_size)); - - if (ntohs(msg->op) == DCM_REP) - event = IB_CME_CONNECTED; - else if (ntohs(msg->op) == DCM_REJ_USER) - event = IB_CME_DESTINATION_REJECT_PRIVATE_DATA; - else - event = IB_CME_DESTINATION_REJECT; - - if (event != IB_CME_CONNECTED) { - dapl_log(DAPL_DBG_TYPE_CM, - " ACTIVE: CM_REQ REJECTED:" - " cm %p op %s, st %s dlid %x iqp %x port %x <-" - " slid %x iqp %x port %x\n", cm, - dapl_cm_op_str(ntohs(msg->op)), - dapl_cm_state_str(cm->state), - ntohs(msg->daddr.ib.lid), ntohl(msg->daddr.ib.qpn), - ntohs(msg->dport), ntohs(msg->saddr.ib.lid), - ntohl(msg->saddr.ib.qpn), ntohs(msg->sport)); - - cm->state = DCM_REJECTED; - dapl_os_unlock(&cm->lock); - -#ifdef DAT_EXTENSIONS - if (cm->msg.daddr.ib.qp_type == IBV_QPT_UD) - goto ud_bail; - else -#endif - goto bail; - } - dapl_os_unlock(&cm->lock); - - /* modify QP to RTR and then to RTS with remote info */ - dapl_os_lock(&cm->ep->header.lock); - if (dapls_modify_qp_state(cm->ep->qp_handle, - IBV_QPS_RTR, - cm->msg.daddr.ib.qpn, - cm->msg.daddr.ib.lid, - (ib_gid_handle_t)cm->msg.daddr.ib.gid) != DAT_SUCCESS) { - dapl_log(DAPL_DBG_TYPE_ERR, - " CONN_RTU: QPS_RTR ERR %s <- lid %x iqp %x\n", - strerror(errno), ntohs(cm->msg.daddr.ib.lid), - ntohl(cm->msg.daddr.ib.qpn)); - dapl_os_unlock(&cm->ep->header.lock); - event = IB_CME_LOCAL_FAILURE; - goto bail; - } - if (dapls_modify_qp_state(cm->ep->qp_handle, - IBV_QPS_RTS, - cm->msg.daddr.ib.qpn, - cm->msg.daddr.ib.lid, - NULL) != DAT_SUCCESS) { - dapl_log(DAPL_DBG_TYPE_ERR, - " CONN_RTU: QPS_RTS ERR %s <- lid %x iqp %x\n", - strerror(errno), ntohs(cm->msg.daddr.ib.lid), - ntohl(cm->msg.daddr.ib.qpn)); - dapl_os_unlock(&cm->ep->header.lock); - event = IB_CME_LOCAL_FAILURE; - goto bail; - } - dapl_os_unlock(&cm->ep->header.lock); - - /* Send RTU, no private data */ - cm->msg.op = htons(DCM_RTU); - - dapl_os_lock(&cm->lock); - cm->state = DCM_CONNECTED; - dapl_os_unlock(&cm->lock); - - if (ucm_send(&cm->hca->ib_trans, &cm->msg, NULL, 0)) - goto bail; - - /* init cm_handle and post the event with private data */ - dapl_dbg_log(DAPL_DBG_TYPE_EP, " ACTIVE: connected!\n"); - -#ifdef DAT_EXTENSIONS -ud_bail: - if (cm->msg.daddr.ib.qp_type == IBV_QPT_UD) { - DAT_IB_EXTENSION_EVENT_DATA xevent; - uint16_t lid = ntohs(cm->msg.daddr.ib.lid); - - /* post EVENT, modify_qp, AH already created, ucm msg */ - xevent.status = 0; - xevent.type = DAT_IB_UD_REMOTE_AH; - xevent.remote_ah.qpn = ntohl(cm->msg.daddr.ib.qpn); - xevent.remote_ah.ah = dapls_create_ah(cm->hca, - cm->ep->qp_handle->pd, - cm->ep->qp_handle, - htons(lid), - NULL); - if (xevent.remote_ah.ah == NULL) { - dapl_log(DAPL_DBG_TYPE_ERR, - " active UD RTU: ERR create_ah" - " for qpn 0x%x lid 0x%x\n", - xevent.remote_ah.qpn, lid); - event = IB_CME_LOCAL_FAILURE; - goto bail; - } - cm->ah = xevent.remote_ah.ah; /* keep ref to destroy */ - - dapl_os_memcpy(&xevent.remote_ah.ia_addr, - &cm->msg.daddr, - sizeof(union dcm_addr)); - - /* remote ia_addr reference includes ucm qpn, not IB qpn */ - ((union dcm_addr*) - &xevent.remote_ah.ia_addr)->ib.qpn = cm->msg.dqpn; - - dapl_dbg_log(DAPL_DBG_TYPE_EP, - " ACTIVE: UD xevent ah %p qpn %x lid %x\n", - xevent.remote_ah.ah, xevent.remote_ah.qpn, lid); - dapl_dbg_log(DAPL_DBG_TYPE_EP, - " ACTIVE: UD xevent ia_addr qp_type %d" - " lid 0x%x qpn 0x%x gid 0x"F64x" 0x"F64x" \n", - ((union dcm_addr*) - &xevent.remote_ah.ia_addr)->ib.qp_type, - ntohs(((union dcm_addr*) - &xevent.remote_ah.ia_addr)->ib.lid), - ntohl(((union dcm_addr*) - &xevent.remote_ah.ia_addr)->ib.qpn), - ntohll(*(uint64_t*)&cm->msg.daddr.ib.gid[0]), - ntohll(*(uint64_t*)&cm->msg.daddr.ib.gid[8])); - - if (event == IB_CME_CONNECTED) - event = DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED; - else { - xevent.type = DAT_IB_UD_CONNECT_REJECT; - event = DAT_IB_UD_CONNECTION_REJECT_EVENT; - } - - dapls_evd_post_connection_event_ext( - (DAPL_EVD *)cm->ep->param.connect_evd_handle, - event, - (DAT_EP_HANDLE)ep, - (DAT_COUNT)ntohs(cm->msg.p_size), - (DAT_PVOID *)cm->msg.p_data, - (DAT_PVOID *)&xevent); - } else -#endif - { - dapl_evd_connection_callback(cm, - IB_CME_CONNECTED, - cm->msg.p_data, ntohs(cm->msg.p_size), cm->ep); - } - dapl_log(DAPL_DBG_TYPE_CM_EST, - " UCM_ACTIVE_CONN %d [lid port qpn] %x %x %x -> %x %x %x\n", - cm->retries, ntohs(cm->msg.saddr.ib.lid), - ntohs(cm->msg.sport), ntohl(cm->msg.saddr.ib.qpn), - ntohs(cm->msg.daddr.ib.lid), ntohs(cm->msg.dport), - ntohl(cm->msg.dqpn)); - return; -bail: - dapl_evd_connection_callback(NULL, event, cm->msg.p_data, ntohs(cm->msg.p_size), cm->ep); - dapli_cm_free(cm); -} - -/* - * PASSIVE: Accept on listen CM PSP. - * create new CM object for this CR, - * receive peer QP information, private data, - * and post cr_event - */ -static void ucm_accept(ib_cm_srvc_handle_t cm, ib_cm_msg_t *msg) -{ - dp_ib_cm_handle_t acm; - - /* Allocate accept CM and setup passive references */ - if ((acm = dapls_ib_cm_create(NULL)) == NULL) { - dapl_log(DAPL_DBG_TYPE_WARN, " accept: ERR cm_create\n"); - return; - } - - /* dest CM info from CR msg, source CM info from listen */ - acm->sp = cm->sp; - acm->hca = cm->hca; - acm->msg.op = msg->op; - acm->msg.dport = msg->sport; - acm->msg.dqpn = msg->sqpn; - acm->msg.sport = cm->msg.sport; - acm->msg.sqpn = cm->msg.sqpn; - acm->msg.p_size = msg->p_size; - - /* CR saddr is CM daddr info, need EP for local saddr */ - dapl_os_memcpy(&acm->msg.daddr, &msg->saddr, sizeof(union dcm_addr)); - - dapl_log(DAPL_DBG_TYPE_CM, - " accept: DST port=%x lid=%x, iqp=%x, psize=%d\n", - ntohs(acm->msg.dport), ntohs(acm->msg.daddr.ib.lid), - htonl(acm->msg.daddr.ib.qpn), htons(acm->msg.p_size)); - - /* validate private data size before reading */ - if (ntohs(msg->p_size) > DCM_MAX_PDATA_SIZE) { - dapl_log(DAPL_DBG_TYPE_WARN, " accept: psize (%d) wrong\n", - ntohs(msg->p_size)); - goto bail; - } - - /* read private data into cm_handle if any present */ - if (msg->p_size) - dapl_os_memcpy(acm->msg.p_data, - msg->p_data, ntohs(msg->p_size)); - - acm->state = DCM_ACCEPTING; - dapli_queue_conn(acm); - -#ifdef DAT_EXTENSIONS - if (acm->msg.daddr.ib.qp_type == IBV_QPT_UD) { - DAT_IB_EXTENSION_EVENT_DATA xevent; - - /* post EVENT, modify_qp created ah */ - xevent.status = 0; - xevent.type = DAT_IB_UD_CONNECT_REQUEST; - - dapls_evd_post_cr_event_ext(acm->sp, - DAT_IB_UD_CONNECTION_REQUEST_EVENT, - acm, - (DAT_COUNT)ntohs(acm->msg.p_size), - (DAT_PVOID *)acm->msg.p_data, - (DAT_PVOID *)&xevent); - } else -#endif - /* trigger CR event and return SUCCESS */ - dapls_cr_callback(acm, - IB_CME_CONNECTION_REQUEST_PENDING, - acm->msg.p_data, ntohs(msg->p_size), acm->sp); - return; - -bail: - /* schedule work thread cleanup */ - dapli_cm_free(acm); - return; -} - -/* - * PASSIVE: read RTU from active peer, post CONN event - */ -static void ucm_accept_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg) -{ - dapl_os_lock(&cm->lock); - if ((ntohs(msg->op) != DCM_RTU) || (cm->state != DCM_RTU_PENDING)) { - dapl_log(DAPL_DBG_TYPE_WARN, - " accept_rtu: UNEXPECTED op, state:" - " op %s, st %s <- lid %x iqp %x sport %x\n", - dapl_cm_op_str(ntohs(msg->op)), - dapl_cm_state_str(cm->state), - ntohs(msg->saddr.ib.lid), ntohl(msg->saddr.ib.qpn), - ntohs(msg->sport)); - dapl_os_unlock(&cm->lock); - goto bail; - } - cm->state = DCM_CONNECTED; - dapl_os_unlock(&cm->lock); - - /* final data exchange if remote QP state is good to go */ - dapl_dbg_log(DAPL_DBG_TYPE_CM, " PASSIVE: connected!\n"); - -#ifdef DAT_EXTENSIONS - if (cm->msg.saddr.ib.qp_type == IBV_QPT_UD) { - DAT_IB_EXTENSION_EVENT_DATA xevent; - uint16_t lid = ntohs(cm->msg.daddr.ib.lid); - - /* post EVENT, modify_qp, AH already created, ucm msg */ - xevent.status = 0; - xevent.type = DAT_IB_UD_PASSIVE_REMOTE_AH; - xevent.remote_ah.qpn = ntohl(cm->msg.daddr.ib.qpn); - xevent.remote_ah.ah = dapls_create_ah(cm->hca, - cm->ep->qp_handle->pd, - cm->ep->qp_handle, - htons(lid), - NULL); - if (xevent.remote_ah.ah == NULL) { - dapl_log(DAPL_DBG_TYPE_ERR, - " passive UD RTU: ERR create_ah" - " for qpn 0x%x lid 0x%x\n", - xevent.remote_ah.qpn, lid); - goto bail; - } - cm->ah = xevent.remote_ah.ah; /* keep ref to destroy */ - dapl_os_memcpy(&xevent.remote_ah.ia_addr, - &cm->msg.daddr, - sizeof(union dcm_addr)); - - /* remote ia_addr reference includes ucm qpn, not IB qpn */ - ((union dcm_addr*) - &xevent.remote_ah.ia_addr)->ib.qpn = cm->msg.dqpn; - - dapl_dbg_log(DAPL_DBG_TYPE_EP, - " PASSIVE: UD xevent ah %p qpn %x lid %x\n", - xevent.remote_ah.ah, xevent.remote_ah.qpn, lid); - dapl_dbg_log(DAPL_DBG_TYPE_EP, - " PASSIVE: UD xevent ia_addr qp_type %d" - " lid 0x%x qpn 0x%x gid 0x"F64x" 0x"F64x" \n", - ((union dcm_addr*) - &xevent.remote_ah.ia_addr)->ib.qp_type, - ntohs(((union dcm_addr*) - &xevent.remote_ah.ia_addr)->ib.lid), - ntohl(((union dcm_addr*) - &xevent.remote_ah.ia_addr)->ib.qpn), - ntohll(*(uint64_t*)&cm->msg.daddr.ib.gid[0]), - ntohll(*(uint64_t*)&cm->msg.daddr.ib.gid[8])); - - dapls_evd_post_connection_event_ext( - (DAPL_EVD *)cm->ep->param.connect_evd_handle, - DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED, - (DAT_EP_HANDLE)cm->ep, - (DAT_COUNT)ntohs(cm->msg.p_size), - (DAT_PVOID *)cm->msg.p_data, - (DAT_PVOID *)&xevent); - } else { -#endif - dapls_cr_callback(cm, IB_CME_CONNECTED, NULL, 0, cm->sp); - } - dapl_log(DAPL_DBG_TYPE_CM_EST, - " UCM_PASSIVE_CONN %d [lid port qpn] %x %x %x <- %x %x %x\n", - cm->retries, ntohs(cm->msg.saddr.ib.lid), - ntohs(cm->msg.sport), ntohl(cm->msg.saddr.ib.qpn), - ntohs(cm->msg.daddr.ib.lid), ntohs(cm->msg.dport), - ntohl(cm->msg.dqpn)); - return; -bail: - dapls_cr_callback(cm, IB_CME_LOCAL_FAILURE, NULL, 0, cm->sp); - dapli_cm_free(cm); -} - -/* - * PASSIVE: user accepted, send reply message with pdata - */ -static int ucm_reply(dp_ib_cm_handle_t cm) -{ - dapl_os_lock(&cm->lock); - if (cm->state != DCM_RTU_PENDING) { - dapl_log(DAPL_DBG_TYPE_ERR, - " CM_REPLY: wrong state %s", - dapl_cm_state_str(cm->state)); - dapl_os_unlock(&cm->lock); - return -1; - } - - if (cm->retries == cm->hca->ib_trans.retries) { - dapl_log(DAPL_DBG_TYPE_ERR, - " CM_REPLY: RETRIES EXHAUSTED (lid port qpn)" - " %x %x %x -> %x %x %x\n", - htons(cm->msg.saddr.ib.lid), - htons(cm->msg.sport), - htonl(cm->msg.saddr.ib.qpn), - htons(cm->msg.daddr.ib.lid), - htons(cm->msg.dport), - htonl(cm->msg.daddr.ib.qpn)); - - dapl_os_unlock(&cm->lock); -#ifdef DAPL_COUNTERS - /* called from check_timers in cm_thread, cm lock held */ - if (g_dapl_dbg_type & DAPL_DBG_TYPE_CM_LIST) { - dapl_os_unlock(&cm->hca->ib_trans.lock); - dapls_print_cm_list(dapl_llist_peek_head(&cm->hca->ia_list_head)); - dapl_os_lock(&cm->hca->ib_trans.lock); - } -#endif -#ifdef DAT_EXTENSIONS - if (cm->msg.saddr.ib.qp_type == IBV_QPT_UD) { - DAT_IB_EXTENSION_EVENT_DATA xevent; - - /* post REJECT event with CONN_REQ p_data */ - xevent.status = 0; - xevent.type = DAT_IB_UD_CONNECT_ERROR; - - dapls_evd_post_connection_event_ext( - (DAPL_EVD *)cm->ep->param.connect_evd_handle, - DAT_IB_UD_CONNECTION_ERROR_EVENT, - (DAT_EP_HANDLE)cm->ep, - (DAT_COUNT)ntohs(cm->msg.p_size), - (DAT_PVOID *)cm->msg.p_data, - (DAT_PVOID *)&xevent); - } else -#endif - dapls_cr_callback(cm, IB_CME_LOCAL_FAILURE, - NULL, 0, cm->sp); - return -1; - } - dapl_os_get_time(&cm->timer); /* RTU expected */ - dapl_os_unlock(&cm->lock); - if (ucm_send(&cm->hca->ib_trans, &cm->msg, cm->p_data, cm->p_size)) - return -1; - - return 0; -} - - -/* - * PASSIVE: consumer accept, send local QP information, private data, - * queue on work thread to receive RTU information to avoid blocking - * user thread. - */ -DAT_RETURN -dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data) -{ - DAPL_IA *ia = ep->header.owner_ia; - dp_ib_cm_handle_t cm = cr->ib_cm_handle; - - if (p_size > DCM_MAX_PDATA_SIZE) - return DAT_LENGTH_ERROR; - - dapl_os_lock(&cm->lock); - if (cm->state != DCM_ACCEPTING) { - dapl_os_unlock(&cm->lock); - return DAT_INVALID_STATE; - } - dapl_os_unlock(&cm->lock); - - dapl_dbg_log(DAPL_DBG_TYPE_CM, - " ACCEPT_USR: remote lid=%x" - " iqp=%x qp_type %d, psize=%d\n", - ntohs(cm->msg.daddr.ib.lid), - ntohl(cm->msg.daddr.ib.qpn), cm->msg.daddr.ib.qp_type, - p_size); - - dapl_dbg_log(DAPL_DBG_TYPE_CM, - " ACCEPT_USR: remote GID subnet %016llx id %016llx\n", - (unsigned long long) - htonll(*(uint64_t*)&cm->msg.daddr.ib.gid[0]), - (unsigned long long) - htonll(*(uint64_t*)&cm->msg.daddr.ib.gid[8])); - -#ifdef DAT_EXTENSIONS - if (cm->msg.daddr.ib.qp_type == IBV_QPT_UD && - ep->qp_handle->qp_type != IBV_QPT_UD) { - dapl_log(DAPL_DBG_TYPE_ERR, - " ACCEPT_USR: ERR remote QP is UD," - ", but local QP is not\n"); - return (DAT_INVALID_HANDLE | DAT_INVALID_HANDLE_EP); - } -#endif - - /* modify QP to RTR and then to RTS with remote info already read */ - dapl_os_lock(&ep->header.lock); - if (dapls_modify_qp_state(ep->qp_handle, - IBV_QPS_RTR, - cm->msg.daddr.ib.qpn, - cm->msg.daddr.ib.lid, - (ib_gid_handle_t)cm->msg.daddr.ib.gid) != DAT_SUCCESS) { - dapl_log(DAPL_DBG_TYPE_ERR, - " ACCEPT_USR: QPS_RTR ERR %s -> lid %x qpn %x\n", - strerror(errno), ntohs(cm->msg.daddr.ib.lid), - ntohl(cm->msg.daddr.ib.qpn)); - dapl_os_unlock(&ep->header.lock); - goto bail; - } - if (dapls_modify_qp_state(ep->qp_handle, - IBV_QPS_RTS, - cm->msg.daddr.ib.qpn, - cm->msg.daddr.ib.lid, - NULL) != DAT_SUCCESS) { - dapl_log(DAPL_DBG_TYPE_ERR, - " ACCEPT_USR: QPS_RTS ERR %s -> lid %x qpn %x\n", - strerror(errno), ntohs(cm->msg.daddr.ib.lid), - ntohl(cm->msg.daddr.ib.qpn)); - dapl_os_unlock(&ep->header.lock); - goto bail; - } - dapl_os_unlock(&ep->header.lock); - - /* save remote address information */ - dapl_os_memcpy(&ep->remote_ia_address, - &cm->msg.saddr, sizeof(union dcm_addr)); - - /* setup local QP info and type from EP, copy pdata, for reply */ - cm->msg.op = htons(DCM_REP); - cm->msg.saddr.ib.qpn = htonl(ep->qp_handle->qp_num); - cm->msg.saddr.ib.qp_type = ep->qp_handle->qp_type; - cm->msg.saddr.ib.lid = cm->hca->ib_trans.addr.ib.lid; - dapl_os_memcpy(&cm->msg.saddr.ib.gid[0], - &cm->hca->ib_trans.addr.ib.gid, 16); - - /* - * UD: deliver p_data with REQ and EST event, keep REQ p_data in - * cm->msg.p_data and save REPLY accept data in cm->p_data for retries - */ - cm->p_size = p_size; - dapl_os_memcpy(&cm->p_data, p_data, p_size); - - /* save state and setup valid reference to EP, HCA */ - dapl_ep_link_cm(ep, cm); - cm->ep = ep; - cm->hca = ia->hca_ptr; - - dapl_os_lock(&cm->lock); - dapl_os_get_time(&cm->timer); /* RTU expected */ - cm->state = DCM_RTU_PENDING; - dapl_os_unlock(&cm->lock); - - if (ucm_reply(cm)) { - dapl_ep_unlink_cm(ep, cm); - goto bail; - } - dapl_dbg_log(DAPL_DBG_TYPE_CM, " PASSIVE: accepted!\n"); - dapls_thread_signal(&cm->hca->ib_trans.signal); - return DAT_SUCCESS; -bail: - dapli_cm_free(cm); - return DAT_INTERNAL_ERROR; -} - - -/* - * dapls_ib_connect - * - * Initiate a connection with the passive listener on another node - * - * Input: - * ep_handle, - * remote_ia_address, - * remote_conn_qual, - * prd_size size of private data and structure - * prd_prt pointer to private data structure - * - * Output: - * none - * - * Returns: - * DAT_SUCCESS - * DAT_INSUFFICIENT_RESOURCES - * DAT_INVALID_PARAMETER - * - */ -DAT_RETURN -dapls_ib_connect(IN DAT_EP_HANDLE ep_handle, - IN DAT_IA_ADDRESS_PTR r_addr, - IN DAT_CONN_QUAL r_psp, - IN DAT_COUNT p_size, IN void *p_data) -{ - DAPL_EP *ep = (DAPL_EP *)ep_handle; - dp_ib_cm_handle_t cm; - - /* create CM object, initialize SRC info from EP */ - cm = dapls_ib_cm_create(ep); - if (cm == NULL) - return DAT_INSUFFICIENT_RESOURCES; - - /* remote hca and port: lid, gid, network order */ - dapl_os_memcpy(&cm->msg.daddr, r_addr, sizeof(union dcm_addr)); - - /* remote uCM information, comes from consumer provider r_addr */ - cm->msg.dport = htons((uint16_t)r_psp); - cm->msg.dqpn = cm->msg.daddr.ib.qpn; - cm->msg.daddr.ib.qpn = 0; /* don't have a remote qpn until reply */ - - if (p_size) { - cm->msg.p_size = htons(p_size); - dapl_os_memcpy(&cm->msg.p_data, p_data, p_size); - } - - cm->state = DCM_REP_PENDING; - - /* build connect request, send to remote CM based on r_addr info */ - return (dapli_cm_connect(ep, cm)); -} - -/* - * dapls_ib_disconnect - * - * Disconnect an EP - * - * Input: - * ep_handle, - * disconnect_flags - * - * Output: - * none - * - * Returns: - * DAT_SUCCESS - */ -DAT_RETURN -dapls_ib_disconnect(IN DAPL_EP *ep_ptr, IN DAT_CLOSE_FLAGS close_flags) -{ - dp_ib_cm_handle_t cm_ptr = dapl_get_cm_from_ep(ep_ptr); - - dapl_os_lock(&ep_ptr->header.lock); - if (ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECTED || - ep_ptr->param.ep_attr.service_type != DAT_SERVICE_TYPE_RC || - cm_ptr == NULL) { - dapl_os_unlock(&ep_ptr->header.lock); - return DAT_SUCCESS; - } - dapl_os_unlock(&ep_ptr->header.lock); - - dapli_cm_disconnect(cm_ptr); - - /* ABRUPT close, wait for callback and DISCONNECTED state */ - if (close_flags == DAT_CLOSE_ABRUPT_FLAG) { - dapl_os_lock(&ep_ptr->header.lock); - while (ep_ptr->param.ep_state != DAT_EP_STATE_DISCONNECTED) { - dapl_os_unlock(&ep_ptr->header.lock); - dapl_os_sleep_usec(10000); - dapl_os_lock(&ep_ptr->header.lock); - } - dapl_os_unlock(&ep_ptr->header.lock); - } - - return DAT_SUCCESS; -} - -/* - * dapls_ib_disconnect_clean - * - * Clean up outstanding connection data. This routine is invoked - * after the final disconnect callback has occurred. Only on the - * ACTIVE side of a connection. It is also called if dat_ep_connect - * times out using the consumer supplied timeout value. - * - * Input: - * ep_ptr DAPL_EP - * active Indicates active side of connection - * - * Output: - * none - * - * Returns: - * void - * - */ -void -dapls_ib_disconnect_clean(IN DAPL_EP *ep, - IN DAT_BOOLEAN active, - IN const ib_cm_events_t ib_cm_event) -{ - if (ib_cm_event == IB_CME_TIMEOUT) { - dp_ib_cm_handle_t cm_ptr; - - if ((cm_ptr = dapl_get_cm_from_ep(ep)) == NULL) - return; - - dapl_log(DAPL_DBG_TYPE_WARN, - "dapls_ib_disc_clean: CONN_TIMEOUT ep %p cm %p %s\n", - ep, cm_ptr, dapl_cm_state_str(cm_ptr->state)); - - /* schedule release of socket and local resources */ - dapli_cm_free(cm_ptr); - } -} - -/* - * dapl_ib_setup_conn_listener - * - * Have the CM set up a connection listener. - * - * Input: - * ibm_hca_handle HCA handle - * qp_handle QP handle - * - * Output: - * none - * - * Returns: - * DAT_SUCCESS - * DAT_INSUFFICIENT_RESOURCES - * DAT_INTERNAL_ERROR - * DAT_CONN_QUAL_UNAVAILBLE - * DAT_CONN_QUAL_IN_USE - * - */ -DAT_RETURN -dapls_ib_setup_conn_listener(IN DAPL_IA *ia, - IN DAT_UINT64 sid, - IN DAPL_SP *sp) -{ - ib_cm_srvc_handle_t cm = NULL; - - dapl_dbg_log(DAPL_DBG_TYPE_EP, - " listen(ia %p ServiceID %x sp %p)\n", - ia, sid, sp); - - /* reserve local port, then allocate CM object */ - if (!ucm_get_port(&ia->hca_ptr->ib_trans, (uint16_t)sid)) { - dapl_dbg_log(DAPL_DBG_TYPE_WARN, - " listen: ERROR %s on conn_qual %x\n", - strerror(errno), sid); - return DAT_CONN_QUAL_IN_USE; - } - - /* cm_create will setup saddr for listen server */ - if ((cm = dapls_ib_cm_create(NULL)) == NULL) - return DAT_INSUFFICIENT_RESOURCES; - - /* LISTEN: init DST address and QP info to local CM server info */ - cm->sp = sp; - cm->hca = ia->hca_ptr; - cm->msg.sport = htons((uint16_t)sid); - cm->msg.sqpn = htonl(ia->hca_ptr->ib_trans.qp->qp_num); - cm->msg.saddr.ib.qp_type = IBV_QPT_UD; - cm->msg.saddr.ib.lid = ia->hca_ptr->ib_trans.addr.ib.lid; - dapl_os_memcpy(&cm->msg.saddr.ib.gid[0], - &cm->hca->ib_trans.addr.ib.gid, 16); - - /* save cm_handle reference in service point */ - sp->cm_srvc_handle = cm; - - /* queue up listen socket to process inbound CR's */ - cm->state = DCM_LISTEN; - dapli_queue_listen(cm); - - return DAT_SUCCESS; -} - - -/* - * dapl_ib_remove_conn_listener - * - * Have the CM remove a connection listener. - * - * Input: - * ia_handle IA handle - * ServiceID IB Channel Service ID - * - * Output: - * none - * - * Returns: - * DAT_SUCCESS - * DAT_INVALID_STATE - * - */ -DAT_RETURN -dapls_ib_remove_conn_listener(IN DAPL_IA *ia, IN DAPL_SP *sp) -{ - ib_cm_srvc_handle_t cm = sp->cm_srvc_handle; - - /* free cm_srvc_handle and port, and mark CM for cleanup */ - if (cm) { - dapl_dbg_log(DAPL_DBG_TYPE_EP, - " remove_listener(ia %p sp %p cm %p psp=%x)\n", - ia, sp, cm, ntohs(cm->msg.dport)); - - sp->cm_srvc_handle = NULL; - dapli_dequeue_listen(cm); - ucm_free_port(&cm->hca->ib_trans, ntohs(cm->msg.sport)); - dapls_cm_release(cm); /* last ref, dealloc */ - } - return DAT_SUCCESS; -} - -/* - * dapls_ib_accept_connection - * - * Perform necessary steps to accept a connection - * - * Input: - * cr_handle - * ep_handle - * private_data_size - * private_data - * - * Output: - * none - * - * Returns: - * DAT_SUCCESS - * DAT_INSUFFICIENT_RESOURCES - * DAT_INTERNAL_ERROR - * - */ -DAT_RETURN -dapls_ib_accept_connection(IN DAT_CR_HANDLE cr_handle, - IN DAT_EP_HANDLE ep_handle, - IN DAT_COUNT p_size, - IN const DAT_PVOID p_data) -{ - DAPL_CR *cr = (DAPL_CR *)cr_handle; - DAPL_EP *ep = (DAPL_EP *)ep_handle; - - dapl_dbg_log(DAPL_DBG_TYPE_EP, - " accept_connection(cr %p ep %p prd %p,%d)\n", - cr, ep, p_data, p_size); - - /* allocate and attach a QP if necessary */ - if (ep->qp_state == DAPL_QP_STATE_UNATTACHED) { - DAT_RETURN status; - status = dapls_ib_qp_alloc(ep->header.owner_ia, - ep, ep); - if (status != DAT_SUCCESS) - return status; - } - return (dapli_accept_usr(ep, cr, p_size, p_data)); -} - -/* - * dapls_ib_reject_connection - * - * Reject a connection - * - * Input: - * cr_handle - * - * Output: - * none - * - * Returns: - * DAT_SUCCESS - * DAT_INTERNAL_ERROR - * - */ -DAT_RETURN -dapls_ib_reject_connection(IN dp_ib_cm_handle_t cm, - IN int reason, - IN DAT_COUNT psize, IN const DAT_PVOID pdata) -{ - dapl_dbg_log(DAPL_DBG_TYPE_EP, - " reject(cm %p reason %x, pdata %p, psize %d)\n", - cm, reason, pdata, psize); - - if (psize > DCM_MAX_PDATA_SIZE) - return DAT_LENGTH_ERROR; - - /* cr_thread will destroy CR, update saddr lid, gid, qp_type info */ - dapl_os_lock(&cm->lock); - dapl_log(DAPL_DBG_TYPE_CM, - " PASSIVE: REJECTING CM_REQ:" - " cm %p op %s, st %s slid %x iqp %x port %x ->" - " dlid %x iqp %x port %x\n", cm, - dapl_cm_op_str(ntohs(cm->msg.op)), - dapl_cm_state_str(cm->state), - ntohs(cm->hca->ib_trans.addr.ib.lid), - ntohl(cm->msg.saddr.ib.qpn), - ntohs(cm->msg.sport), ntohs(cm->msg.daddr.ib.lid), - ntohl(cm->msg.daddr.ib.qpn), ntohs(cm->msg.dport)); - - cm->state = DCM_REJECTED; - cm->msg.saddr.ib.lid = cm->hca->ib_trans.addr.ib.lid; - cm->msg.saddr.ib.qp_type = cm->msg.daddr.ib.qp_type; - dapl_os_memcpy(&cm->msg.saddr.ib.gid[0], - &cm->hca->ib_trans.addr.ib.gid, 16); - cm->msg.op = htons(DCM_REJ_USER); - - if (ucm_send(&cm->hca->ib_trans, &cm->msg, pdata, psize)) { - dapl_log(DAPL_DBG_TYPE_WARN, - " cm_reject: send ERR: %s\n", strerror(errno)); - dapl_os_unlock(&cm->lock); - return DAT_INTERNAL_ERROR; - } - dapl_os_unlock(&cm->lock); - dapli_cm_free(cm); - return DAT_SUCCESS; -} - -/* - * dapls_ib_cm_remote_addr - * - * Obtain the remote IP address given a connection - * - * Input: - * cr_handle - * - * Output: - * remote_ia_address: where to place the remote address - * - * Returns: - * DAT_SUCCESS - * DAT_INVALID_HANDLE - * - */ -DAT_RETURN -dapls_ib_cm_remote_addr(IN DAT_HANDLE dat_handle, - OUT DAT_SOCK_ADDR6 * remote_ia_address) -{ - DAPL_HEADER *header; - dp_ib_cm_handle_t cm; - - dapl_dbg_log(DAPL_DBG_TYPE_EP, - "dapls_ib_cm_remote_addr(dat_handle %p, ....)\n", - dat_handle); - - header = (DAPL_HEADER *) dat_handle; - - if (header->magic == DAPL_MAGIC_EP) - cm = dapl_get_cm_from_ep((DAPL_EP *) dat_handle); - else if (header->magic == DAPL_MAGIC_CR) - cm = ((DAPL_CR *) dat_handle)->ib_cm_handle; - else - return DAT_INVALID_HANDLE; - - dapl_os_memcpy(remote_ia_address, - &cm->msg.daddr, sizeof(DAT_SOCK_ADDR6)); - - return DAT_SUCCESS; -} - -int dapls_ib_private_data_size( - IN DAPL_HCA *hca_ptr) -{ - return DCM_MAX_PDATA_SIZE; -} - -#if defined(_WIN32) || defined(_WIN64) - -void cm_thread(void *arg) -{ - struct dapl_hca *hca = arg; - dp_ib_cm_handle_t cm, next; - DWORD time_ms; - - dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cm_thread: ENTER hca %p\n", hca); - dapl_os_lock(&hca->ib_trans.lock); - for (hca->ib_trans.cm_state = IB_THREAD_RUN; - hca->ib_trans.cm_state == IB_THREAD_RUN || - !dapl_llist_is_empty(&hca->ib_trans.list); - dapl_os_lock(&hca->ib_trans.lock)) { - - time_ms = INFINITE; - CompSetZero(&hca->ib_trans.signal.set); - CompSetAdd(&hca->ib_hca_handle->channel, &hca->ib_trans.signal.set); - CompSetAdd(&hca->ib_trans.rch->comp_channel, &hca->ib_trans.signal.set); - CompSetAdd(&hca->ib_trans.ib_cq->comp_channel, &hca->ib_trans.signal.set); - - next = dapl_llist_is_empty(&hca->ib_trans.list) ? NULL : - dapl_llist_peek_head(&hca->ib_trans.list); - - while (next) { - cm = next; - next = dapl_llist_next_entry(&hca->ib_trans.list, - (DAPL_LLIST_ENTRY *)&cm->local_entry); - dapls_cm_acquire(cm); /* hold thread ref */ - dapl_os_lock(&cm->lock); - if (cm->state == DCM_FREE || - hca->ib_trans.cm_state != IB_THREAD_RUN) { - dapl_os_unlock(&cm->lock); - dapl_log(DAPL_DBG_TYPE_CM, - " CM FREE: %p ep=%p st=%s refs=%d\n", - cm, cm->ep, dapl_cm_state_str(cm->state), - cm->ref_count); - - dapls_cm_release(cm); /* release alloc ref */ - dapli_cm_dequeue(cm); /* release workq ref */ - dapls_cm_release(cm); /* release thread ref */ - continue; - } - dapl_os_unlock(&cm->lock); - ucm_check_timers(cm, &time_ms); - dapls_cm_release(cm); /* release thread ref */ - } - - dapl_os_unlock(&hca->ib_trans.lock); - - hca->ib_hca_handle->channel.Milliseconds = time_ms; - hca->ib_trans.rch->comp_channel.Milliseconds = time_ms; - hca->ib_trans.ib_cq->comp_channel.Milliseconds = time_ms; - CompSetPoll(&hca->ib_trans.signal.set, time_ms); - - hca->ib_hca_handle->channel.Milliseconds = 0; - hca->ib_trans.rch->comp_channel.Milliseconds = 0; - hca->ib_trans.ib_cq->comp_channel.Milliseconds = 0; - - ucm_recv(&hca->ib_trans); - ucm_async_event(hca); - dapli_cq_event_cb(&hca->ib_trans); - } - - dapl_os_unlock(&hca->ib_trans.lock); - hca->ib_trans.cm_state = IB_THREAD_EXIT; - dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cm_thread(hca %p) exit\n", hca); -} - -#else // _WIN32 || _WIN64 - -void cm_thread(void *arg) -{ - struct dapl_hca *hca = arg; - dp_ib_cm_handle_t cm, next; - struct dapl_fd_set *set; - char rbuf[2]; - int time_ms; - - dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cm_thread: ENTER hca %p\n", hca); - set = dapl_alloc_fd_set(); - if (!set) - goto out; - - dapl_os_lock(&hca->ib_trans.lock); - hca->ib_trans.cm_state = IB_THREAD_RUN; - - while (1) { - time_ms = -1; /* reset to blocking */ - dapl_fd_zero(set); - dapl_fd_set(hca->ib_trans.signal.scm[0], set, DAPL_FD_READ); - dapl_fd_set(hca->ib_hca_handle->async_fd, set, DAPL_FD_READ); - dapl_fd_set(hca->ib_trans.rch->fd, set, DAPL_FD_READ); - dapl_fd_set(hca->ib_trans.ib_cq->fd, set, DAPL_FD_READ); - - if (!dapl_llist_is_empty(&hca->ib_trans.list)) - next = dapl_llist_peek_head(&hca->ib_trans.list); - else - next = NULL; - - while (next) { - cm = next; - next = dapl_llist_next_entry( - &hca->ib_trans.list, - (DAPL_LLIST_ENTRY *)&cm->local_entry); - dapls_cm_acquire(cm); /* hold thread ref */ - dapl_os_lock(&cm->lock); - if (cm->state == DCM_FREE || - hca->ib_trans.cm_state != IB_THREAD_RUN) { - dapl_os_unlock(&cm->lock); - dapl_log(DAPL_DBG_TYPE_CM, - " CM FREE: %p ep=%p st=%s refs=%d\n", - cm, cm->ep, dapl_cm_state_str(cm->state), - cm->ref_count); - - dapls_cm_release(cm); /* release alloc ref */ - dapli_cm_dequeue(cm); /* release workq ref */ - dapls_cm_release(cm); /* release thread ref */ - continue; - } - dapl_os_unlock(&cm->lock); - ucm_check_timers(cm, &time_ms); - dapls_cm_release(cm); /* release thread ref */ - } - - /* set to exit and all resources destroyed */ - if ((hca->ib_trans.cm_state != IB_THREAD_RUN) && - (dapl_llist_is_empty(&hca->ib_trans.list))) - break; - - dapl_os_unlock(&hca->ib_trans.lock); - dapl_select(set, time_ms); - - /* Process events: CM, ASYNC, NOTIFY THREAD */ - if (dapl_poll(hca->ib_trans.rch->fd, - DAPL_FD_READ) == DAPL_FD_READ) { - ucm_recv(&hca->ib_trans); - } - if (dapl_poll(hca->ib_hca_handle->async_fd, - DAPL_FD_READ) == DAPL_FD_READ) { - ucm_async_event(hca); - } - if (dapl_poll(hca->ib_trans.ib_cq->fd, - DAPL_FD_READ) == DAPL_FD_READ) { - dapli_cq_event_cb(&hca->ib_trans); - } - while (dapl_poll(hca->ib_trans.signal.scm[0], - DAPL_FD_READ) == DAPL_FD_READ) { - recv(hca->ib_trans.signal.scm[0], rbuf, 2, 0); - } - dapl_os_lock(&hca->ib_trans.lock); - - /* set to exit and all resources destroyed */ - if ((hca->ib_trans.cm_state != IB_THREAD_RUN) && - (dapl_llist_is_empty(&hca->ib_trans.list))) - break; - } - - dapl_os_unlock(&hca->ib_trans.lock); - free(set); -out: - hca->ib_trans.cm_state = IB_THREAD_EXIT; - dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cm_thread(hca %p) exit\n", hca); -} -#endif - -#ifdef DAPL_COUNTERS -static char _ctr_host_[128]; -/* Debug aid: List all Connections in process and state */ -void dapls_print_cm_list(IN DAPL_IA *ia_ptr) -{ - /* Print in process CM's for this IA, if debug type set */ - int i = 0; - dp_ib_cm_handle_t cm, next_cm; - struct dapl_llist_entry **list; - DAPL_OS_LOCK *lock; - - /* LISTEN LIST */ - list = &ia_ptr->hca_ptr->ib_trans.llist; - lock = &ia_ptr->hca_ptr->ib_trans.llock; - - dapl_os_lock(lock); - if (!dapl_llist_is_empty((DAPL_LLIST_HEAD*)list)) - next_cm = dapl_llist_peek_head((DAPL_LLIST_HEAD*)list); - else - next_cm = NULL; - - gethostname(_ctr_host_, sizeof(_ctr_host_)); - printf("\n [%s:%x] DAPL IA LISTEN/CONNECTIONS IN PROCESS:\n", - _ctr_host_ , dapl_os_getpid()); - - while (next_cm) { - cm = next_cm; - next_cm = dapl_llist_next_entry((DAPL_LLIST_HEAD*)list, - (DAPL_LLIST_ENTRY*)&cm->local_entry); - - printf( " LISTEN[%d]: sp %p %s uCM_QP: %x %x %x l_pid %x (%x)\n", - i, cm->sp, dapl_cm_state_str(cm->state), - ntohs(cm->msg.saddr.ib.lid), ntohs(cm->msg.sport), - ntohl(cm->msg.sqpn), ntohl(*(DAT_UINT32*)cm->msg.resv), - ntohl(*(DAT_UINT32*)cm->msg.resv)); - i++; - } - dapl_os_unlock(lock); - - /* CONNECTION LIST */ - list = &ia_ptr->hca_ptr->ib_trans.list; - lock = &ia_ptr->hca_ptr->ib_trans.lock; - - dapl_os_lock(lock); - if (!dapl_llist_is_empty((DAPL_LLIST_HEAD*)list)) - next_cm = dapl_llist_peek_head((DAPL_LLIST_HEAD*)list); - else - next_cm = NULL; - - while (next_cm) { - cm = next_cm; - next_cm = dapl_llist_next_entry((DAPL_LLIST_HEAD*)list, - (DAPL_LLIST_ENTRY*)&cm->local_entry); - - printf( " CONN[%d]: ep %p cm %p %s %s" - " %x %x %x %s %x %x %x r_pid %x (%x)\n", - i, cm->ep, cm, - cm->msg.saddr.ib.qp_type == IBV_QPT_RC ? "RC" : "UD", - dapl_cm_state_str(cm->state), - ntohs(cm->msg.saddr.ib.lid), - ntohs(cm->msg.sport), - ntohl(cm->msg.saddr.ib.qpn), - cm->sp ? "<-" : "->", - ntohs(cm->msg.daddr.ib.lid), - ntohs(cm->msg.dport), - ntohl(cm->msg.daddr.ib.qpn), - ntohs(cm->msg.op) == DCM_REQ ? 0 : ntohl(*(DAT_UINT32*)cm->msg.resv), - ntohs(cm->msg.op) == DCM_REQ ? 0 : ntohl(*(DAT_UINT32*)cm->msg.resv)); - i++; - } - printf("\n"); - dapl_os_unlock(lock); -} -#endif +/* + * Copyright (c) 2009 Intel Corporation. All rights reserved. + * + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + */ + +#include "dapl.h" +#include "dapl_adapter_util.h" +#include "dapl_evd_util.h" +#include "dapl_cr_util.h" +#include "dapl_name_service.h" +#include "dapl_ib_util.h" +#include "dapl_ep_util.h" +#include "dapl_osd.h" + + +#if defined(_WIN32) +#include +#else // _WIN32 +enum DAPL_FD_EVENTS { + DAPL_FD_READ = POLLIN, + DAPL_FD_WRITE = POLLOUT, + DAPL_FD_ERROR = POLLERR +}; + +struct dapl_fd_set { + int index; + struct pollfd set[DAPL_FD_SETSIZE]; +}; + +static struct dapl_fd_set *dapl_alloc_fd_set(void) +{ + return dapl_os_alloc(sizeof(struct dapl_fd_set)); +} + +static void dapl_fd_zero(struct dapl_fd_set *set) +{ + set->index = 0; +} + +static int dapl_fd_set(DAPL_SOCKET s, struct dapl_fd_set *set, + enum DAPL_FD_EVENTS event) +{ + if (set->index == DAPL_FD_SETSIZE - 1) { + dapl_log(DAPL_DBG_TYPE_ERR, + "SCM ERR: cm_thread exceeded FD_SETSIZE %d\n", + set->index + 1); + return -1; + } + + set->set[set->index].fd = s; + set->set[set->index].revents = 0; + set->set[set->index++].events = event; + return 0; +} + +static enum DAPL_FD_EVENTS dapl_poll(DAPL_SOCKET s, enum DAPL_FD_EVENTS event) +{ + struct pollfd fds; + int ret; + + fds.fd = s; + fds.events = event; + fds.revents = 0; + ret = poll(&fds, 1, 0); + dapl_log(DAPL_DBG_TYPE_CM, " dapl_poll: fd=%d ret=%d, evnts=0x%x\n", + s, ret, fds.revents); + if (ret == 0) + return 0; + else if (fds.revents & (POLLERR | POLLHUP | POLLNVAL)) + return DAPL_FD_ERROR; + else + return fds.revents; +} + +static int dapl_select(struct dapl_fd_set *set, int time_ms) +{ + int ret; + + dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: sleep, fds=%d\n", + set->index); + ret = poll(set->set, set->index, time_ms); + dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: wakeup, ret=0x%x\n", ret); + return ret; +} +#endif + +/* forward declarations */ +static int ucm_reply(dp_ib_cm_handle_t cm); +static void ucm_accept(ib_cm_srvc_handle_t cm, ib_cm_msg_t *msg); +static void ucm_connect_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg); +static void ucm_accept_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg); +static int ucm_send(ib_hca_transport_t *tp, ib_cm_msg_t *msg, DAT_PVOID p_data, DAT_COUNT p_size); +static void ucm_disconnect_final(dp_ib_cm_handle_t cm); +DAT_RETURN dapli_cm_disconnect(dp_ib_cm_handle_t cm); +DAT_RETURN dapli_cm_connect(DAPL_EP *ep, dp_ib_cm_handle_t cm); + +/* Service ids - port space */ +static uint16_t ucm_get_port(ib_hca_transport_t *tp, uint16_t port) +{ + int i = 0; + + dapl_os_lock(&tp->plock); + /* get specific ID */ + if (port) { + if (tp->sid[port] == 0) { + tp->sid[port] = 1; + i = port; + } + goto done; + } + + /* get any free ID */ + for (i = 0xffff; i > 0; i--) { + if (tp->sid[i] == 0) { + tp->sid[i] = 1; + break; + } + } +done: + dapl_os_unlock(&tp->plock); + return i; +} + +static void ucm_free_port(ib_hca_transport_t *tp, uint16_t port) +{ + dapl_os_lock(&tp->plock); + tp->sid[port] = 0; + dapl_os_unlock(&tp->plock); +} + +static void ucm_check_timers(dp_ib_cm_handle_t cm, int *timer) +{ + DAPL_OS_TIMEVAL time; + + dapl_os_lock(&cm->lock); + dapl_os_get_time(&time); + switch (cm->state) { + case DCM_REP_PENDING: + *timer = cm->hca->ib_trans.cm_timer; + /* wait longer each retry */ + if ((time - cm->timer)/1000 > + (cm->hca->ib_trans.rep_time << cm->retries)) { + dapl_log(DAPL_DBG_TYPE_CM_WARN, + " CM_REQ retry %p %d [lid, port, qpn]:" + " %x %x %x -> %x %x %x Time(ms) %llu > %d\n", + cm, cm->retries+1, ntohs(cm->msg.saddr.ib.lid), + ntohs(cm->msg.sport), ntohl(cm->msg.saddr.ib.qpn), + ntohs(cm->msg.daddr.ib.lid), ntohs(cm->msg.dport), + ntohl(cm->msg.dqpn), (time - cm->timer)/1000, + cm->hca->ib_trans.rep_time << cm->retries); + cm->retries++; + dapl_os_unlock(&cm->lock); + dapli_cm_connect(cm->ep, cm); + return; + } + break; + case DCM_RTU_PENDING: + *timer = cm->hca->ib_trans.cm_timer; + if ((time - cm->timer)/1000 > + (cm->hca->ib_trans.rtu_time << cm->retries)) { + dapl_log(DAPL_DBG_TYPE_CM_WARN, + " CM_REPLY retry %d [lid, port, qpn]:" + " %x %x %x -> %x %x %x r_pid %x (%x) Time(ms) %llu > %d\n", + cm->retries+1, + ntohs(cm->msg.saddr.ib.lid), + ntohs(cm->msg.sport), + ntohl(cm->msg.saddr.ib.qpn), + ntohs(cm->msg.daddr.ib.lid), + ntohs(cm->msg.dport), + ntohl(cm->msg.daddr.ib.qpn), + ntohl(*(DAT_UINT32*)cm->msg.resv), + ntohl(*(DAT_UINT32*)cm->msg.resv), + (time - cm->timer)/1000, + cm->hca->ib_trans.rtu_time << cm->retries); + cm->retries++; + dapl_os_unlock(&cm->lock); + ucm_reply(cm); + return; + } + break; + case DCM_DISC_PENDING: + *timer = cm->hca->ib_trans.cm_timer; + /* wait longer each retry */ + if ((time - cm->timer)/1000 > + (cm->hca->ib_trans.rtu_time << cm->retries)) { + dapl_log(DAPL_DBG_TYPE_CM_WARN, + " CM_DREQ retry %d [lid, port, qpn]:" + " %x %x %x -> %x %x %x r_pid %x (%x) Time(ms) %llu > %d\n", + cm->retries+1, + ntohs(cm->msg.saddr.ib.lid), + ntohs(cm->msg.sport), + ntohl(cm->msg.saddr.ib.qpn), + ntohs(cm->msg.daddr.ib.lid), + ntohs(cm->msg.dport), + ntohl(cm->msg.dqpn), + ntohl(*(DAT_UINT32*)cm->msg.resv), + ntohl(*(DAT_UINT32*)cm->msg.resv), + (time - cm->timer)/1000, + cm->hca->ib_trans.rtu_time << cm->retries); + cm->retries++; + dapl_os_unlock(&cm->lock); + dapli_cm_disconnect(cm); + return; + } + break; + default: + break; + } + dapl_os_unlock(&cm->lock); +} + +/* SEND CM MESSAGE PROCESSING */ + +/* Get CM UD message from send queue, called with s_lock held */ +static ib_cm_msg_t *ucm_get_smsg(ib_hca_transport_t *tp) +{ + ib_cm_msg_t *msg = NULL; + int ret, polled = 0, hd = tp->s_hd; + + hd++; + + if (hd == tp->qpe) + hd = 0; +retry: + if (hd == tp->s_tl) + msg = NULL; + else { + msg = &tp->sbuf[hd]; + tp->s_hd = hd; /* new hd */ + } + + /* if empty, process some completions */ + if ((msg == NULL) && (!polled)) { + struct ibv_wc wc; + + /* process completions, based on UCM_TX_BURST */ + ret = ibv_poll_cq(tp->scq, 1, &wc); + if (ret < 0) { + dapl_log(DAPL_DBG_TYPE_WARN, + " get_smsg: cq %p %s\n", + tp->scq, strerror(errno)); + } + /* free up completed sends, update tail */ + if (ret > 0) { + tp->s_tl = (int)wc.wr_id; + dapl_log(DAPL_DBG_TYPE_CM, + " get_smsg: wr_cmp (%d) s_tl=%d\n", + wc.status, tp->s_tl); + } + polled++; + goto retry; + } + return msg; +} + +/* RECEIVE CM MESSAGE PROCESSING */ + +static int ucm_post_rmsg(ib_hca_transport_t *tp, ib_cm_msg_t *msg) +{ + struct ibv_recv_wr recv_wr, *recv_err; + struct ibv_sge sge; + + recv_wr.next = NULL; + recv_wr.sg_list = &sge; + recv_wr.num_sge = 1; + recv_wr.wr_id = (uint64_t)(uintptr_t) msg; + sge.length = sizeof(ib_cm_msg_t) + sizeof(struct ibv_grh); + sge.lkey = tp->mr_rbuf->lkey; + sge.addr = (uintptr_t)((char *)msg - sizeof(struct ibv_grh)); + + return (ibv_post_recv(tp->qp, &recv_wr, &recv_err)); +} + +static int ucm_reject(ib_hca_transport_t *tp, ib_cm_msg_t *msg) +{ + ib_cm_msg_t smsg; + + /* setup op, rearrange the src, dst cm and addr info */ + (void)dapl_os_memzero(&smsg, sizeof(smsg)); + smsg.ver = htons(DCM_VER); + smsg.op = htons(DCM_REJ_CM); + smsg.dport = msg->sport; + smsg.dqpn = msg->sqpn; + smsg.sport = msg->dport; + smsg.sqpn = msg->dqpn; + + dapl_os_memcpy(&smsg.daddr, &msg->saddr, sizeof(union dcm_addr)); + + /* no dst_addr IB info in REQ, init lid, gid, get type from saddr */ + smsg.saddr.ib.lid = tp->addr.ib.lid; + smsg.saddr.ib.qp_type = msg->saddr.ib.qp_type; + dapl_os_memcpy(&smsg.saddr.ib.gid[0], + &tp->addr.ib.gid, 16); + + dapl_os_memcpy(&smsg.saddr, &msg->daddr, sizeof(union dcm_addr)); + + dapl_dbg_log(DAPL_DBG_TYPE_CM, + " CM reject -> LID %x, QPN %x PORT %x\n", + ntohs(smsg.daddr.ib.lid), + ntohl(smsg.dqpn), ntohs(smsg.dport)); + + return (ucm_send(tp, &smsg, NULL, 0)); +} + +static void ucm_process_recv(ib_hca_transport_t *tp, + ib_cm_msg_t *msg, + dp_ib_cm_handle_t cm) +{ + dapl_os_lock(&cm->lock); + switch (cm->state) { + case DCM_LISTEN: /* passive */ + dapl_os_unlock(&cm->lock); + ucm_accept(cm, msg); + break; + case DCM_RTU_PENDING: /* passive */ + dapl_os_unlock(&cm->lock); + ucm_accept_rtu(cm, msg); + break; + case DCM_REP_PENDING: /* active */ + dapl_os_unlock(&cm->lock); + ucm_connect_rtu(cm, msg); + break; + case DCM_CONNECTED: /* active and passive */ + /* DREQ, change state and process */ + if (ntohs(msg->op) == DCM_DREQ) { + cm->state = DCM_DISC_RECV; + dapl_os_unlock(&cm->lock); + dapli_cm_disconnect(cm); + break; + } + /* active: RTU was dropped, resend */ + if (ntohs(msg->op) == DCM_REP) { + dapl_log(DAPL_DBG_TYPE_CM_WARN, + " RESEND RTU: op %s st %s [lid, port, qpn]:" + " %x %x %x -> %x %x %x\n", + dapl_cm_op_str(ntohs(msg->op)), + dapl_cm_state_str(cm->state), + ntohs(msg->saddr.ib.lid), + ntohs(msg->sport), + ntohl(msg->saddr.ib.qpn), + ntohs(msg->daddr.ib.lid), + ntohs(msg->dport), + ntohl(msg->daddr.ib.qpn)); + + cm->msg.op = htons(DCM_RTU); + ucm_send(&cm->hca->ib_trans, &cm->msg, NULL, 0); + } + dapl_os_unlock(&cm->lock); + break; + case DCM_DISC_PENDING: /* active and passive */ + /* DREQ or DREP, finalize */ + dapl_os_unlock(&cm->lock); + ucm_disconnect_final(cm); + break; + case DCM_DISCONNECTED: + case DCM_FREE: + /* DREQ dropped, resend */ + if (ntohs(msg->op) == DCM_DREQ) { + dapl_log(DAPL_DBG_TYPE_CM_WARN, + " RESEND DREP: op %s st %s [lid, port, qpn]:" + " %x %x %x -> %x %x %x\n", + dapl_cm_op_str(ntohs(msg->op)), + dapl_cm_state_str(cm->state), + ntohs(msg->saddr.ib.lid), + ntohs(msg->sport), + ntohl(msg->saddr.ib.qpn), + ntohs(msg->daddr.ib.lid), + ntohs(msg->dport), + ntohl(msg->daddr.ib.qpn)); + cm->msg.op = htons(DCM_DREP); + ucm_send(&cm->hca->ib_trans, &cm->msg, NULL, 0); + + } else if (ntohs(msg->op) != DCM_DREP){ + /* DREP ok to ignore, any other print warning */ + dapl_log(DAPL_DBG_TYPE_WARN, + " ucm_recv: UNEXPECTED MSG on cm %p" + " <- op %s, st %s spsp %x sqpn %x\n", + cm, dapl_cm_op_str(ntohs(msg->op)), + dapl_cm_state_str(cm->state), + ntohs(msg->sport), ntohl(msg->sqpn)); + } + dapl_os_unlock(&cm->lock); + break; + default: + dapl_log(DAPL_DBG_TYPE_WARN, + " ucm_recv: UNKNOWN state" + " <- op %s, %s spsp %x sqpn %x\n", + dapl_cm_op_str(ntohs(msg->op)), + dapl_cm_state_str(cm->state), + ntohs(msg->sport), ntohl(msg->sqpn)); + dapl_os_unlock(&cm->lock); + break; + } +} + +/* Find matching CM object for this receive message, return CM reference, timer */ +dp_ib_cm_handle_t ucm_cm_find(ib_hca_transport_t *tp, ib_cm_msg_t *msg) +{ + dp_ib_cm_handle_t cm, next, found = NULL; + struct dapl_llist_entry **list; + DAPL_OS_LOCK *lock; + int listenq = 0; + + /* conn list first, duplicate requests for DCM_REQ */ + list = &tp->list; + lock = &tp->lock; + +retry_listenq: + dapl_os_lock(lock); + if (!dapl_llist_is_empty(list)) + next = dapl_llist_peek_head(list); + else + next = NULL; + + while (next) { + cm = next; + next = dapl_llist_next_entry(list, + (DAPL_LLIST_ENTRY *)&cm->local_entry); + if (cm->state == DCM_DESTROY || cm->state == DCM_FREE) + continue; + + /* CM sPORT + QPN, match is good enough for listenq */ + if (listenq && + cm->msg.sport == msg->dport && + cm->msg.sqpn == msg->dqpn) { + found = cm; + break; + } + /* connectq, check src and dst, check duplicate conn_reqs */ + if (!listenq && + cm->msg.sport == msg->dport && cm->msg.sqpn == msg->dqpn && + cm->msg.dport == msg->sport && cm->msg.dqpn == msg->sqpn && + cm->msg.daddr.ib.lid == msg->saddr.ib.lid) { + if (ntohs(msg->op) != DCM_REQ) { + found = cm; + break; + } else { + /* duplicate; bail and throw away */ + dapl_os_unlock(lock); + dapl_log(DAPL_DBG_TYPE_CM_WARN, + " DUPLICATE: cm %p op %s st %s [lid, port, qpn]:" + " %x %x %x <- %x %x %x\n", cm, + dapl_cm_op_str(ntohs(msg->op)), + dapl_cm_state_str(cm->state), + ntohs(msg->daddr.ib.lid), + ntohs(msg->dport), + ntohl(msg->daddr.ib.qpn), + ntohs(msg->saddr.ib.lid), + ntohs(msg->sport), + ntohl(msg->saddr.ib.qpn)); + + return NULL; + } + } + } + dapl_os_unlock(lock); + + /* no duplicate request on connq, check listenq for new request */ + if (ntohs(msg->op) == DCM_REQ && !listenq && !found) { + listenq = 1; + list = &tp->llist; + lock = &tp->llock; + goto retry_listenq; + } + + /* not match on listenq for valid request, send reject */ + if (ntohs(msg->op) == DCM_REQ && !found) { + dapl_log(DAPL_DBG_TYPE_WARN, + " ucm_recv: NO LISTENER for %s %x %x i%x c%x" + " < %x %x %x, sending reject\n", + dapl_cm_op_str(ntohs(msg->op)), + ntohs(msg->daddr.ib.lid), ntohs(msg->dport), + ntohl(msg->daddr.ib.qpn), ntohl(msg->sqpn), + ntohs(msg->saddr.ib.lid), ntohs(msg->sport), + ntohl(msg->saddr.ib.qpn)); + + ucm_reject(tp, msg); + } + + if (!found) { + dapl_log(DAPL_DBG_TYPE_CM, + " ucm_recv: NO MATCH op %s %x %x i%x c%x" + " < %x %x %x\n", + dapl_cm_op_str(ntohs(msg->op)), + ntohs(msg->daddr.ib.lid), ntohs(msg->dport), + ntohl(msg->daddr.ib.qpn), ntohl(msg->sqpn), + ntohs(msg->saddr.ib.lid), ntohs(msg->sport), + ntohl(msg->saddr.ib.qpn)); + } + + return found; +} + +/* Get rmsgs from CM completion queue, 10 at a time */ +static void ucm_recv(ib_hca_transport_t *tp) +{ + struct ibv_wc wc[10]; + ib_cm_msg_t *msg; + dp_ib_cm_handle_t cm; + int i, ret, notify = 0; + struct ibv_cq *ibv_cq = NULL; + DAPL_HCA *hca; + + /* POLLIN on channel FD */ + ret = ibv_get_cq_event(tp->rch, &ibv_cq, (void *)&hca); + if (ret == 0) { + ibv_ack_cq_events(ibv_cq, 1); + } +retry: + ret = ibv_poll_cq(tp->rcq, 10, wc); + if (ret <= 0) { + if (!ret && !notify) { + ibv_req_notify_cq(tp->rcq, 0); + notify = 1; + goto retry; + } + return; + } else + notify = 0; + + for (i = 0; i < ret; i++) { + msg = (ib_cm_msg_t*) (uintptr_t) wc[i].wr_id; + + dapl_dbg_log(DAPL_DBG_TYPE_CM, + " ucm_recv: stat=%d op=%s ln=%d id=%p sqp=%x\n", + wc[i].status, dapl_cm_op_str(ntohs(msg->op)), + wc[i].byte_len, + (void*)wc[i].wr_id, wc[i].src_qp); + + /* validate CM message, version */ + if (ntohs(msg->ver) != DCM_VER) { + dapl_log(DAPL_DBG_TYPE_WARN, + " ucm_recv: UNKNOWN msg %p, ver %d\n", + msg, msg->ver); + ucm_post_rmsg(tp, msg); + continue; + } + if (!(cm = ucm_cm_find(tp, msg))) { + ucm_post_rmsg(tp, msg); + continue; + } + + /* match, process it */ + ucm_process_recv(tp, msg, cm); + ucm_post_rmsg(tp, msg); + } + + /* finished this batch of WC's, poll and rearm */ + goto retry; +} + +/* ACTIVE/PASSIVE: build and send CM message out of CM object */ +static int ucm_send(ib_hca_transport_t *tp, ib_cm_msg_t *msg, DAT_PVOID p_data, DAT_COUNT p_size) +{ + ib_cm_msg_t *smsg = NULL; + struct ibv_send_wr wr, *bad_wr; + struct ibv_sge sge; + int len, ret = -1; + uint16_t dlid = ntohs(msg->daddr.ib.lid); + + /* Get message from send queue, copy data, and send */ + dapl_os_lock(&tp->slock); + if ((smsg = ucm_get_smsg(tp)) == NULL) { + dapl_log(DAPL_DBG_TYPE_ERR, + " ucm_send ERR: get_smsg(hd=%d,tl=%d) \n", + tp->s_hd, tp->s_tl); + goto bail; + } + + len = (sizeof(*msg) - DCM_MAX_PDATA_SIZE); + dapl_os_memcpy(smsg, msg, len); + if (p_size) { + smsg->p_size = ntohs(p_size); + dapl_os_memcpy(&smsg->p_data, p_data, p_size); + } + + wr.next = NULL; + wr.sg_list = &sge; + wr.num_sge = 1; + wr.opcode = IBV_WR_SEND; + wr.wr_id = (unsigned long)tp->s_hd; + wr.send_flags = (wr.wr_id % tp->burst) ? 0 : IBV_SEND_SIGNALED; + if (len <= tp->max_inline_send) + wr.send_flags |= IBV_SEND_INLINE; + + sge.length = len + p_size; + sge.lkey = tp->mr_sbuf->lkey; + sge.addr = (uintptr_t)smsg; + + dapl_dbg_log(DAPL_DBG_TYPE_CM, + " ucm_send: op %s ln %d lid %x c_qpn %x rport %x\n", + dapl_cm_op_str(ntohs(smsg->op)), + sge.length, htons(smsg->daddr.ib.lid), + htonl(smsg->dqpn), htons(smsg->dport)); + + /* empty slot, then create AH */ + if (!tp->ah[dlid]) { + tp->ah[dlid] = + dapls_create_ah(tp->hca, tp->pd, tp->qp, + htons(dlid), NULL); + if (!tp->ah[dlid]) + goto bail; + } + + wr.wr.ud.ah = tp->ah[dlid]; + wr.wr.ud.remote_qpn = ntohl(smsg->dqpn); + wr.wr.ud.remote_qkey = DAT_UD_QKEY; + + ret = ibv_post_send(tp->qp, &wr, &bad_wr); + if (ret) { + dapl_log(DAPL_DBG_TYPE_ERR, + " ucm_send ERR: post_send() %s\n", + strerror(errno) ); + } + +bail: + dapl_os_unlock(&tp->slock); + return ret; +} + +/* ACTIVE/PASSIVE: CM objects */ +static void dapli_cm_dealloc(dp_ib_cm_handle_t cm) { + + dapl_os_assert(!cm->ref_count); + dapl_os_lock_destroy(&cm->lock); + dapl_os_wait_object_destroy(&cm->event); + dapl_os_free(cm, sizeof(*cm)); +} + +void dapls_cm_acquire(dp_ib_cm_handle_t cm) +{ + dapl_os_lock(&cm->lock); + cm->ref_count++; + dapl_os_unlock(&cm->lock); +} + +void dapls_cm_release(dp_ib_cm_handle_t cm) +{ + dapl_os_lock(&cm->lock); + cm->ref_count--; + if (cm->ref_count) { + dapl_os_unlock(&cm->lock); + return; + } + /* client, release local conn id port */ + if (!cm->sp && cm->msg.sport) + ucm_free_port(&cm->hca->ib_trans, ntohs(cm->msg.sport)); + + /* clean up any UD address handles */ + if (cm->ah) { + ibv_destroy_ah(cm->ah); + cm->ah = NULL; + } + dapl_os_unlock(&cm->lock); + dapli_cm_dealloc(cm); +} + +dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep) +{ + dp_ib_cm_handle_t cm; + + /* Allocate CM, init lock, and initialize */ + if ((cm = dapl_os_alloc(sizeof(*cm))) == NULL) + return NULL; + + (void)dapl_os_memzero(cm, sizeof(*cm)); + if (dapl_os_lock_init(&cm->lock)) + goto bail; + + if (dapl_os_wait_object_init(&cm->event)) { + dapl_os_lock_destroy(&cm->lock); + goto bail; + } + dapls_cm_acquire(cm); + + cm->msg.ver = htons(DCM_VER); + *(DAT_UINT32*)cm->msg.resv = htonl(dapl_os_getpid()); /* exchange PID for debugging */ + + /* ACTIVE: init source address QP info from local EP */ + if (ep) { + DAPL_HCA *hca = ep->header.owner_ia->hca_ptr; + + cm->msg.sport = htons(ucm_get_port(&hca->ib_trans, 0)); + if (!cm->msg.sport) { + dapl_os_wait_object_destroy(&cm->event); + dapl_os_lock_destroy(&cm->lock); + goto bail; + } + /* link CM object to EP */ + dapl_ep_link_cm(ep, cm); + cm->hca = hca; + cm->ep = ep; + + /* IB info in network order */ + cm->msg.sqpn = htonl(hca->ib_trans.qp->qp_num); /* ucm */ + cm->msg.saddr.ib.qpn = htonl(ep->qp_handle->qp_num); /* ep */ + cm->msg.saddr.ib.qp_type = ep->qp_handle->qp_type; + cm->msg.saddr.ib.lid = hca->ib_trans.addr.ib.lid; + dapl_os_memcpy(&cm->msg.saddr.ib.gid[0], + &hca->ib_trans.addr.ib.gid, 16); + } + return cm; +bail: + dapl_os_free(cm, sizeof(*cm)); + return NULL; +} + +/* schedule destruction of CM object */ +void dapli_cm_free(dp_ib_cm_handle_t cm) +{ + dapl_log(DAPL_DBG_TYPE_CM, + " dapli_cm_free: cm %p %s ep %p refs=%d\n", + cm, dapl_cm_state_str(cm->state), + cm->ep, cm->ref_count); + + dapl_os_lock(&cm->lock); + cm->state = DCM_FREE; + dapls_thread_signal(&cm->hca->ib_trans.signal); + dapl_os_unlock(&cm->lock); +} + +/* Blocking, ONLY called from dat_ep_free */ +void dapls_cm_free(dp_ib_cm_handle_t cm) +{ + dapl_log(DAPL_DBG_TYPE_CM, + " dapl_cm_free: cm %p %s ep %p refs=%d\n", + cm, dapl_cm_state_str(cm->state), + cm->ep, cm->ref_count); + + /* free from internal workq, wait until EP is last ref */ + dapl_os_lock(&cm->lock); + if (cm->state != DCM_FREE) + cm->state = DCM_FREE; + + while (cm->ref_count != 1) { + dapl_os_unlock(&cm->lock); + dapls_thread_signal(&cm->hca->ib_trans.signal); + dapl_os_sleep_usec(10000); + dapl_os_lock(&cm->lock); + } + dapl_os_unlock(&cm->lock); + + /* unlink, dequeue from EP. Final ref so release will destroy */ + dapl_ep_unlink_cm(cm->ep, cm); +} + +/* ACTIVE/PASSIVE: queue up connection object on CM list */ +static void dapli_queue_conn(dp_ib_cm_handle_t cm) +{ + /* add to work queue, list, for cm thread processing */ + dapl_llist_init_entry((DAPL_LLIST_ENTRY *)&cm->local_entry); + dapl_os_lock(&cm->hca->ib_trans.lock); + dapls_cm_acquire(cm); + dapl_llist_add_tail(&cm->hca->ib_trans.list, + (DAPL_LLIST_ENTRY *)&cm->local_entry, cm); + dapl_os_unlock(&cm->hca->ib_trans.lock); + dapls_thread_signal(&cm->hca->ib_trans.signal); +} + +/* PASSIVE: queue up listen object on listen list */ +static void dapli_queue_listen(dp_ib_cm_handle_t cm) +{ + /* add to work queue, llist, for cm thread processing */ + dapl_llist_init_entry((DAPL_LLIST_ENTRY *)&cm->local_entry); + dapl_os_lock(&cm->hca->ib_trans.llock); + dapls_cm_acquire(cm); + dapl_llist_add_tail(&cm->hca->ib_trans.llist, + (DAPL_LLIST_ENTRY *)&cm->local_entry, cm); + dapl_os_unlock(&cm->hca->ib_trans.llock); +} + +static void dapli_dequeue_listen(dp_ib_cm_handle_t cm) +{ + DAPL_HCA *hca = cm->hca; + + dapl_os_lock(&hca->ib_trans.llock); + dapl_llist_remove_entry(&hca->ib_trans.llist, + (DAPL_LLIST_ENTRY *)&cm->local_entry); + dapls_cm_release(cm); + dapl_os_unlock(&hca->ib_trans.llock); +} + +/* called with local LIST and CM object lock */ +static void dapli_cm_dequeue(dp_ib_cm_handle_t cm) +{ + /* Remove from work queue, cr thread processing */ + dapl_llist_remove_entry(&cm->hca->ib_trans.list, + (DAPL_LLIST_ENTRY *)&cm->local_entry); + dapls_cm_release(cm); +} + +static void ucm_disconnect_final(dp_ib_cm_handle_t cm) +{ + /* no EP attachment or not RC, nothing to process */ + if (cm->ep == NULL || + cm->ep->param.ep_attr.service_type != DAT_SERVICE_TYPE_RC) + return; + + dapl_os_lock(&cm->lock); + if ((cm->state == DCM_DISCONNECTED) || (cm->state == DCM_FREE)) { + dapl_os_unlock(&cm->lock); + return; + } + + cm->state = DCM_DISCONNECTED; + dapl_os_unlock(&cm->lock); + + if (cm->sp) + dapls_cr_callback(cm, IB_CME_DISCONNECTED, NULL, 0, cm->sp); + else + dapl_evd_connection_callback(cm, IB_CME_DISCONNECTED, NULL, 0, cm->ep); + +} + +/* + * called from consumer thread via ep_disconnect/ep_free or + * from cm_thread when receiving DREQ + */ +DAT_RETURN dapli_cm_disconnect(dp_ib_cm_handle_t cm) +{ + int finalize = 1; + int wakeup = 0; + + dapl_os_lock(&cm->lock); + switch (cm->state) { + case DCM_CONNECTED: + /* CONSUMER: move to err state to flush, if not UD */ + if (cm->ep->qp_handle->qp_type != IBV_QPT_UD) + dapls_modify_qp_state(cm->ep->qp_handle, IBV_QPS_ERR,0,0,0); + + /* send DREQ, event after DREP or DREQ timeout */ + cm->state = DCM_DISC_PENDING; + cm->msg.op = htons(DCM_DREQ); + finalize = 0; /* wait for DREP, wakeup timer after DREQ sent */ + wakeup = 1; + break; + case DCM_DISC_PENDING: + /* DREQ timeout, resend until retries exhausted */ + cm->msg.op = htons(DCM_DREQ); + if (cm->retries >= cm->hca->ib_trans.retries) { + dapl_log(DAPL_DBG_TYPE_ERR, + " CM_DREQ: RETRIES EXHAUSTED:" + " %x %x %x -> %x %x %x\n", + htons(cm->msg.saddr.ib.lid), + htonl(cm->msg.saddr.ib.qpn), + htons(cm->msg.sport), + htons(cm->msg.daddr.ib.lid), + htonl(cm->msg.dqpn), + htons(cm->msg.dport)); + finalize = 1; + } + break; + case DCM_DISC_RECV: + /* CM_THREAD: move to err state to flush, if not UD */ + if (cm->ep->qp_handle->qp_type != IBV_QPT_UD) + dapls_modify_qp_state(cm->ep->qp_handle, IBV_QPS_ERR,0,0,0); + + /* DREQ received, send DREP and schedule event, finalize */ + cm->msg.op = htons(DCM_DREP); + break; + case DCM_DISCONNECTED: + dapl_os_unlock(&cm->lock); + return DAT_SUCCESS; + default: + dapl_log(DAPL_DBG_TYPE_WARN, + " disconnect UNKNOWN state: ep %p cm %p %s %s" + " %x %x %x %s %x %x %x r_pid %x (%x)\n", + cm->ep, cm, + cm->msg.saddr.ib.qp_type == IBV_QPT_RC ? "RC" : "UD", + dapl_cm_state_str(cm->state), + ntohs(cm->msg.saddr.ib.lid), + ntohs(cm->msg.sport), + ntohl(cm->msg.saddr.ib.qpn), + cm->sp ? "<-" : "->", + ntohs(cm->msg.daddr.ib.lid), + ntohs(cm->msg.dport), + ntohl(cm->msg.daddr.ib.qpn), + ntohs(cm->msg.op) == DCM_REQ ? 0 : ntohl(*(DAT_UINT32*)cm->msg.resv), + ntohs(cm->msg.op) == DCM_REQ ? 0 : ntohl(*(DAT_UINT32*)cm->msg.resv)); + + dapl_os_unlock(&cm->lock); + return DAT_SUCCESS; + } + + dapl_os_get_time(&cm->timer); /* reply expected */ + ucm_send(&cm->hca->ib_trans, &cm->msg, NULL, 0); + dapl_os_unlock(&cm->lock); + + if (wakeup) + dapls_thread_signal(&cm->hca->ib_trans.signal); + + if (finalize) + ucm_disconnect_final(cm); + + return DAT_SUCCESS; +} + +/* + * ACTIVE: get remote CM SID server info from r_addr. + * send, or resend CM msg via UD CM QP + */ +DAT_RETURN +dapli_cm_connect(DAPL_EP *ep, dp_ib_cm_handle_t cm) +{ + dapl_log(DAPL_DBG_TYPE_EP, + " connect: lid %x i_qpn %x lport %x p_sz=%d -> " + " lid %x c_qpn %x rport %x\n", + htons(cm->msg.saddr.ib.lid), htonl(cm->msg.saddr.ib.qpn), + htons(cm->msg.sport), htons(cm->msg.p_size), + htons(cm->msg.daddr.ib.lid), htonl(cm->msg.dqpn), + htons(cm->msg.dport)); + + dapl_os_lock(&cm->lock); + if (cm->state != DCM_REP_PENDING) { + dapl_os_unlock(&cm->lock); + return DAT_INVALID_STATE; + } + + if (cm->retries == cm->hca->ib_trans.retries) { + dapl_log(DAPL_DBG_TYPE_ERR, + " CM_REQ: RETRIES EXHAUSTED:" + " 0x%x %x 0x%x -> 0x%x %x 0x%x\n", + htons(cm->msg.saddr.ib.lid), + htonl(cm->msg.saddr.ib.qpn), + htons(cm->msg.sport), + htons(cm->msg.daddr.ib.lid), + htonl(cm->msg.dqpn), + htons(cm->msg.dport)); + + dapl_os_unlock(&cm->lock); + +#ifdef DAPL_COUNTERS + /* called from check_timers in cm_thread, cm lock held */ + if (g_dapl_dbg_type & DAPL_DBG_TYPE_CM_LIST) { + dapl_os_unlock(&cm->hca->ib_trans.lock); + dapls_print_cm_list(ep->header.owner_ia); + dapl_os_lock(&cm->hca->ib_trans.lock); + } +#endif + dapl_evd_connection_callback(cm, + IB_CME_DESTINATION_UNREACHABLE, + NULL, 0, ep); + + return DAT_ERROR(DAT_INVALID_ADDRESS, + DAT_INVALID_ADDRESS_UNREACHABLE); + } + dapl_os_unlock(&cm->lock); + + cm->msg.op = htons(DCM_REQ); + dapl_os_get_time(&cm->timer); /* reply expected */ + if (ucm_send(&cm->hca->ib_trans, &cm->msg, + &cm->msg.p_data, ntohs(cm->msg.p_size))) + goto bail; + + /* first time through, link EP and CM, put on work queue */ + if (!cm->retries) { + dapli_queue_conn(cm); + } + return DAT_SUCCESS; + +bail: + dapl_log(DAPL_DBG_TYPE_WARN, + " connect: ERR %s -> cm_lid %x cm_qpn %x r_psp %x p_sz=%d\n", + strerror(errno), htons(cm->msg.daddr.ib.lid), + htonl(cm->msg.dqpn), htons(cm->msg.dport), + htonl(cm->msg.p_size)); + + dapli_cm_free(cm); + return DAT_INSUFFICIENT_RESOURCES; +} + +/* + * ACTIVE: exchange QP information, called from CR thread + */ +static void ucm_connect_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg) +{ + DAPL_EP *ep = cm->ep; + ib_cm_events_t event = IB_CME_CONNECTED; + + dapl_os_lock(&cm->lock); + if (cm->state != DCM_REP_PENDING) { + dapl_log(DAPL_DBG_TYPE_WARN, + " CONN_RTU: UNEXPECTED state:" + " op %s, st %s <- lid %x sqpn %x sport %x\n", + dapl_cm_op_str(ntohs(msg->op)), + dapl_cm_state_str(cm->state), + ntohs(msg->saddr.ib.lid), ntohl(msg->saddr.ib.qpn), + ntohs(msg->sport)); + dapl_os_unlock(&cm->lock); + return; + } + + /* save remote address information to EP and CM */ + dapl_os_memcpy(&ep->remote_ia_address, + &msg->saddr, sizeof(union dcm_addr)); + dapl_os_memcpy(&cm->msg.daddr, + &msg->saddr, sizeof(union dcm_addr)); + + /* validate private data size, and copy if necessary */ + if (msg->p_size) { + if (ntohs(msg->p_size) > DCM_MAX_PDATA_SIZE) { + dapl_log(DAPL_DBG_TYPE_WARN, + " CONN_RTU: invalid p_size %d:" + " st %s <- lid %x sqpn %x spsp %x\n", + ntohs(msg->p_size), + dapl_cm_state_str(cm->state), + ntohs(msg->saddr.ib.lid), + ntohl(msg->saddr.ib.qpn), + ntohs(msg->sport)); + dapl_os_unlock(&cm->lock); + goto bail; + } + dapl_os_memcpy(cm->msg.p_data, + msg->p_data, ntohs(msg->p_size)); + } + + dapl_dbg_log(DAPL_DBG_TYPE_CM, + " CONN_RTU: DST lid=%x," + " iqp=%x, qp_type=%d, port=%x psize=%d\n", + ntohs(cm->msg.daddr.ib.lid), + ntohl(cm->msg.daddr.ib.qpn), cm->msg.daddr.ib.qp_type, + ntohs(msg->sport), ntohs(msg->p_size)); + + if (ntohs(msg->op) == DCM_REP) + event = IB_CME_CONNECTED; + else if (ntohs(msg->op) == DCM_REJ_USER) + event = IB_CME_DESTINATION_REJECT_PRIVATE_DATA; + else + event = IB_CME_DESTINATION_REJECT; + + if (event != IB_CME_CONNECTED) { + dapl_log(DAPL_DBG_TYPE_CM, + " ACTIVE: CM_REQ REJECTED:" + " cm %p op %s, st %s dlid %x iqp %x port %x <-" + " slid %x iqp %x port %x\n", cm, + dapl_cm_op_str(ntohs(msg->op)), + dapl_cm_state_str(cm->state), + ntohs(msg->daddr.ib.lid), ntohl(msg->daddr.ib.qpn), + ntohs(msg->dport), ntohs(msg->saddr.ib.lid), + ntohl(msg->saddr.ib.qpn), ntohs(msg->sport)); + + cm->state = DCM_REJECTED; + dapl_os_unlock(&cm->lock); + +#ifdef DAT_EXTENSIONS + if (cm->msg.daddr.ib.qp_type == IBV_QPT_UD) + goto ud_bail; + else +#endif + goto bail; + } + dapl_os_unlock(&cm->lock); + + /* modify QP to RTR and then to RTS with remote info */ + dapl_os_lock(&cm->ep->header.lock); + if (dapls_modify_qp_state(cm->ep->qp_handle, + IBV_QPS_RTR, + cm->msg.daddr.ib.qpn, + cm->msg.daddr.ib.lid, + (ib_gid_handle_t)cm->msg.daddr.ib.gid) != DAT_SUCCESS) { + dapl_log(DAPL_DBG_TYPE_ERR, + " CONN_RTU: QPS_RTR ERR %s <- lid %x iqp %x\n", + strerror(errno), ntohs(cm->msg.daddr.ib.lid), + ntohl(cm->msg.daddr.ib.qpn)); + dapl_os_unlock(&cm->ep->header.lock); + event = IB_CME_LOCAL_FAILURE; + goto bail; + } + if (dapls_modify_qp_state(cm->ep->qp_handle, + IBV_QPS_RTS, + cm->msg.daddr.ib.qpn, + cm->msg.daddr.ib.lid, + NULL) != DAT_SUCCESS) { + dapl_log(DAPL_DBG_TYPE_ERR, + " CONN_RTU: QPS_RTS ERR %s <- lid %x iqp %x\n", + strerror(errno), ntohs(cm->msg.daddr.ib.lid), + ntohl(cm->msg.daddr.ib.qpn)); + dapl_os_unlock(&cm->ep->header.lock); + event = IB_CME_LOCAL_FAILURE; + goto bail; + } + dapl_os_unlock(&cm->ep->header.lock); + + /* Send RTU, no private data */ + cm->msg.op = htons(DCM_RTU); + + dapl_os_lock(&cm->lock); + cm->state = DCM_CONNECTED; + dapl_os_unlock(&cm->lock); + + if (ucm_send(&cm->hca->ib_trans, &cm->msg, NULL, 0)) + goto bail; + + /* init cm_handle and post the event with private data */ + dapl_dbg_log(DAPL_DBG_TYPE_EP, " ACTIVE: connected!\n"); + +#ifdef DAT_EXTENSIONS +ud_bail: + if (cm->msg.daddr.ib.qp_type == IBV_QPT_UD) { + DAT_IB_EXTENSION_EVENT_DATA xevent; + uint16_t lid = ntohs(cm->msg.daddr.ib.lid); + + /* post EVENT, modify_qp, AH already created, ucm msg */ + xevent.status = 0; + xevent.type = DAT_IB_UD_REMOTE_AH; + xevent.remote_ah.qpn = ntohl(cm->msg.daddr.ib.qpn); + xevent.remote_ah.ah = dapls_create_ah(cm->hca, + cm->ep->qp_handle->pd, + cm->ep->qp_handle, + htons(lid), + NULL); + if (xevent.remote_ah.ah == NULL) { + dapl_log(DAPL_DBG_TYPE_ERR, + " active UD RTU: ERR create_ah" + " for qpn 0x%x lid 0x%x\n", + xevent.remote_ah.qpn, lid); + event = IB_CME_LOCAL_FAILURE; + goto bail; + } + cm->ah = xevent.remote_ah.ah; /* keep ref to destroy */ + + dapl_os_memcpy(&xevent.remote_ah.ia_addr, + &cm->msg.daddr, + sizeof(union dcm_addr)); + + /* remote ia_addr reference includes ucm qpn, not IB qpn */ + ((union dcm_addr*) + &xevent.remote_ah.ia_addr)->ib.qpn = cm->msg.dqpn; + + dapl_dbg_log(DAPL_DBG_TYPE_EP, + " ACTIVE: UD xevent ah %p qpn %x lid %x\n", + xevent.remote_ah.ah, xevent.remote_ah.qpn, lid); + dapl_dbg_log(DAPL_DBG_TYPE_EP, + " ACTIVE: UD xevent ia_addr qp_type %d" + " lid 0x%x qpn 0x%x gid 0x"F64x" 0x"F64x" \n", + ((union dcm_addr*) + &xevent.remote_ah.ia_addr)->ib.qp_type, + ntohs(((union dcm_addr*) + &xevent.remote_ah.ia_addr)->ib.lid), + ntohl(((union dcm_addr*) + &xevent.remote_ah.ia_addr)->ib.qpn), + ntohll(*(uint64_t*)&cm->msg.daddr.ib.gid[0]), + ntohll(*(uint64_t*)&cm->msg.daddr.ib.gid[8])); + + if (event == IB_CME_CONNECTED) + event = DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED; + else { + xevent.type = DAT_IB_UD_CONNECT_REJECT; + event = DAT_IB_UD_CONNECTION_REJECT_EVENT; + } + + dapls_evd_post_connection_event_ext( + (DAPL_EVD *)cm->ep->param.connect_evd_handle, + event, + (DAT_EP_HANDLE)ep, + (DAT_COUNT)ntohs(cm->msg.p_size), + (DAT_PVOID *)cm->msg.p_data, + (DAT_PVOID *)&xevent); + } else +#endif + { + dapl_evd_connection_callback(cm, + IB_CME_CONNECTED, + cm->msg.p_data, ntohs(cm->msg.p_size), cm->ep); + } + dapl_log(DAPL_DBG_TYPE_CM_EST, + " UCM_ACTIVE_CONN %d [lid port qpn] %x %x %x -> %x %x %x\n", + cm->retries, ntohs(cm->msg.saddr.ib.lid), + ntohs(cm->msg.sport), ntohl(cm->msg.saddr.ib.qpn), + ntohs(cm->msg.daddr.ib.lid), ntohs(cm->msg.dport), + ntohl(cm->msg.dqpn)); + return; +bail: + dapl_evd_connection_callback(NULL, event, cm->msg.p_data, ntohs(cm->msg.p_size), cm->ep); + dapli_cm_free(cm); +} + +/* + * PASSIVE: Accept on listen CM PSP. + * create new CM object for this CR, + * receive peer QP information, private data, + * and post cr_event + */ +static void ucm_accept(ib_cm_srvc_handle_t cm, ib_cm_msg_t *msg) +{ + dp_ib_cm_handle_t acm; + + /* Allocate accept CM and setup passive references */ + if ((acm = dapls_ib_cm_create(NULL)) == NULL) { + dapl_log(DAPL_DBG_TYPE_WARN, " accept: ERR cm_create\n"); + return; + } + + /* dest CM info from CR msg, source CM info from listen */ + acm->sp = cm->sp; + acm->hca = cm->hca; + acm->msg.op = msg->op; + acm->msg.dport = msg->sport; + acm->msg.dqpn = msg->sqpn; + acm->msg.sport = cm->msg.sport; + acm->msg.sqpn = cm->msg.sqpn; + acm->msg.p_size = msg->p_size; + + /* CR saddr is CM daddr info, need EP for local saddr */ + dapl_os_memcpy(&acm->msg.daddr, &msg->saddr, sizeof(union dcm_addr)); + + dapl_log(DAPL_DBG_TYPE_CM, + " accept: DST port=%x lid=%x, iqp=%x, psize=%d\n", + ntohs(acm->msg.dport), ntohs(acm->msg.daddr.ib.lid), + htonl(acm->msg.daddr.ib.qpn), htons(acm->msg.p_size)); + + /* validate private data size before reading */ + if (ntohs(msg->p_size) > DCM_MAX_PDATA_SIZE) { + dapl_log(DAPL_DBG_TYPE_WARN, " accept: psize (%d) wrong\n", + ntohs(msg->p_size)); + goto bail; + } + + /* read private data into cm_handle if any present */ + if (msg->p_size) + dapl_os_memcpy(acm->msg.p_data, + msg->p_data, ntohs(msg->p_size)); + + acm->state = DCM_ACCEPTING; + dapli_queue_conn(acm); + +#ifdef DAT_EXTENSIONS + if (acm->msg.daddr.ib.qp_type == IBV_QPT_UD) { + DAT_IB_EXTENSION_EVENT_DATA xevent; + + /* post EVENT, modify_qp created ah */ + xevent.status = 0; + xevent.type = DAT_IB_UD_CONNECT_REQUEST; + + dapls_evd_post_cr_event_ext(acm->sp, + DAT_IB_UD_CONNECTION_REQUEST_EVENT, + acm, + (DAT_COUNT)ntohs(acm->msg.p_size), + (DAT_PVOID *)acm->msg.p_data, + (DAT_PVOID *)&xevent); + } else +#endif + /* trigger CR event and return SUCCESS */ + dapls_cr_callback(acm, + IB_CME_CONNECTION_REQUEST_PENDING, + acm->msg.p_data, ntohs(msg->p_size), acm->sp); + return; + +bail: + /* schedule work thread cleanup */ + dapli_cm_free(acm); + return; +} + +/* + * PASSIVE: read RTU from active peer, post CONN event + */ +static void ucm_accept_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg) +{ + dapl_os_lock(&cm->lock); + if ((ntohs(msg->op) != DCM_RTU) || (cm->state != DCM_RTU_PENDING)) { + dapl_log(DAPL_DBG_TYPE_WARN, + " accept_rtu: UNEXPECTED op, state:" + " op %s, st %s <- lid %x iqp %x sport %x\n", + dapl_cm_op_str(ntohs(msg->op)), + dapl_cm_state_str(cm->state), + ntohs(msg->saddr.ib.lid), ntohl(msg->saddr.ib.qpn), + ntohs(msg->sport)); + dapl_os_unlock(&cm->lock); + goto bail; + } + cm->state = DCM_CONNECTED; + dapl_os_unlock(&cm->lock); + + /* final data exchange if remote QP state is good to go */ + dapl_dbg_log(DAPL_DBG_TYPE_CM, " PASSIVE: connected!\n"); + +#ifdef DAT_EXTENSIONS + if (cm->msg.saddr.ib.qp_type == IBV_QPT_UD) { + DAT_IB_EXTENSION_EVENT_DATA xevent; + uint16_t lid = ntohs(cm->msg.daddr.ib.lid); + + /* post EVENT, modify_qp, AH already created, ucm msg */ + xevent.status = 0; + xevent.type = DAT_IB_UD_PASSIVE_REMOTE_AH; + xevent.remote_ah.qpn = ntohl(cm->msg.daddr.ib.qpn); + xevent.remote_ah.ah = dapls_create_ah(cm->hca, + cm->ep->qp_handle->pd, + cm->ep->qp_handle, + htons(lid), + NULL); + if (xevent.remote_ah.ah == NULL) { + dapl_log(DAPL_DBG_TYPE_ERR, + " passive UD RTU: ERR create_ah" + " for qpn 0x%x lid 0x%x\n", + xevent.remote_ah.qpn, lid); + goto bail; + } + cm->ah = xevent.remote_ah.ah; /* keep ref to destroy */ + dapl_os_memcpy(&xevent.remote_ah.ia_addr, + &cm->msg.daddr, + sizeof(union dcm_addr)); + + /* remote ia_addr reference includes ucm qpn, not IB qpn */ + ((union dcm_addr*) + &xevent.remote_ah.ia_addr)->ib.qpn = cm->msg.dqpn; + + dapl_dbg_log(DAPL_DBG_TYPE_EP, + " PASSIVE: UD xevent ah %p qpn %x lid %x\n", + xevent.remote_ah.ah, xevent.remote_ah.qpn, lid); + dapl_dbg_log(DAPL_DBG_TYPE_EP, + " PASSIVE: UD xevent ia_addr qp_type %d" + " lid 0x%x qpn 0x%x gid 0x"F64x" 0x"F64x" \n", + ((union dcm_addr*) + &xevent.remote_ah.ia_addr)->ib.qp_type, + ntohs(((union dcm_addr*) + &xevent.remote_ah.ia_addr)->ib.lid), + ntohl(((union dcm_addr*) + &xevent.remote_ah.ia_addr)->ib.qpn), + ntohll(*(uint64_t*)&cm->msg.daddr.ib.gid[0]), + ntohll(*(uint64_t*)&cm->msg.daddr.ib.gid[8])); + + dapls_evd_post_connection_event_ext( + (DAPL_EVD *)cm->ep->param.connect_evd_handle, + DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED, + (DAT_EP_HANDLE)cm->ep, + (DAT_COUNT)ntohs(cm->msg.p_size), + (DAT_PVOID *)cm->msg.p_data, + (DAT_PVOID *)&xevent); + } else { +#endif + dapls_cr_callback(cm, IB_CME_CONNECTED, NULL, 0, cm->sp); + } + dapl_log(DAPL_DBG_TYPE_CM_EST, + " UCM_PASSIVE_CONN %d [lid port qpn] %x %x %x <- %x %x %x\n", + cm->retries, ntohs(cm->msg.saddr.ib.lid), + ntohs(cm->msg.sport), ntohl(cm->msg.saddr.ib.qpn), + ntohs(cm->msg.daddr.ib.lid), ntohs(cm->msg.dport), + ntohl(cm->msg.dqpn)); + return; +bail: + dapls_cr_callback(cm, IB_CME_LOCAL_FAILURE, NULL, 0, cm->sp); + dapli_cm_free(cm); +} + +/* + * PASSIVE: user accepted, send reply message with pdata + */ +static int ucm_reply(dp_ib_cm_handle_t cm) +{ + dapl_os_lock(&cm->lock); + if (cm->state != DCM_RTU_PENDING) { + dapl_log(DAPL_DBG_TYPE_ERR, + " CM_REPLY: wrong state %s", + dapl_cm_state_str(cm->state)); + dapl_os_unlock(&cm->lock); + return -1; + } + + if (cm->retries == cm->hca->ib_trans.retries) { + dapl_log(DAPL_DBG_TYPE_ERR, + " CM_REPLY: RETRIES EXHAUSTED (lid port qpn)" + " %x %x %x -> %x %x %x\n", + htons(cm->msg.saddr.ib.lid), + htons(cm->msg.sport), + htonl(cm->msg.saddr.ib.qpn), + htons(cm->msg.daddr.ib.lid), + htons(cm->msg.dport), + htonl(cm->msg.daddr.ib.qpn)); + + dapl_os_unlock(&cm->lock); +#ifdef DAPL_COUNTERS + /* called from check_timers in cm_thread, cm lock held */ + if (g_dapl_dbg_type & DAPL_DBG_TYPE_CM_LIST) { + dapl_os_unlock(&cm->hca->ib_trans.lock); + dapls_print_cm_list(dapl_llist_peek_head(&cm->hca->ia_list_head)); + dapl_os_lock(&cm->hca->ib_trans.lock); + } +#endif +#ifdef DAT_EXTENSIONS + if (cm->msg.saddr.ib.qp_type == IBV_QPT_UD) { + DAT_IB_EXTENSION_EVENT_DATA xevent; + + /* post REJECT event with CONN_REQ p_data */ + xevent.status = 0; + xevent.type = DAT_IB_UD_CONNECT_ERROR; + + dapls_evd_post_connection_event_ext( + (DAPL_EVD *)cm->ep->param.connect_evd_handle, + DAT_IB_UD_CONNECTION_ERROR_EVENT, + (DAT_EP_HANDLE)cm->ep, + (DAT_COUNT)ntohs(cm->msg.p_size), + (DAT_PVOID *)cm->msg.p_data, + (DAT_PVOID *)&xevent); + } else +#endif + dapls_cr_callback(cm, IB_CME_LOCAL_FAILURE, + NULL, 0, cm->sp); + return -1; + } + dapl_os_get_time(&cm->timer); /* RTU expected */ + dapl_os_unlock(&cm->lock); + if (ucm_send(&cm->hca->ib_trans, &cm->msg, cm->p_data, cm->p_size)) { + dapl_log(DAPL_DBG_TYPE_ERR," accept ERR: ucm reply send()\n"); + return -1; + } + return 0; +} + + +/* + * PASSIVE: consumer accept, send local QP information, private data, + * queue on work thread to receive RTU information to avoid blocking + * user thread. + */ +DAT_RETURN +dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data) +{ + DAPL_IA *ia = ep->header.owner_ia; + dp_ib_cm_handle_t cm = cr->ib_cm_handle; + + if (p_size > DCM_MAX_PDATA_SIZE) + return DAT_LENGTH_ERROR; + + dapl_os_lock(&cm->lock); + if (cm->state != DCM_ACCEPTING) { + dapl_os_unlock(&cm->lock); + return DAT_INVALID_STATE; + } + dapl_os_unlock(&cm->lock); + + dapl_dbg_log(DAPL_DBG_TYPE_CM, + " ACCEPT_USR: remote lid=%x" + " iqp=%x qp_type %d, psize=%d\n", + ntohs(cm->msg.daddr.ib.lid), + ntohl(cm->msg.daddr.ib.qpn), cm->msg.daddr.ib.qp_type, + p_size); + + dapl_dbg_log(DAPL_DBG_TYPE_CM, + " ACCEPT_USR: remote GID subnet %016llx id %016llx\n", + (unsigned long long) + htonll(*(uint64_t*)&cm->msg.daddr.ib.gid[0]), + (unsigned long long) + htonll(*(uint64_t*)&cm->msg.daddr.ib.gid[8])); + +#ifdef DAT_EXTENSIONS + if (cm->msg.daddr.ib.qp_type == IBV_QPT_UD && + ep->qp_handle->qp_type != IBV_QPT_UD) { + dapl_log(DAPL_DBG_TYPE_ERR, + " ACCEPT_USR: ERR remote QP is UD," + ", but local QP is not\n"); + return (DAT_INVALID_HANDLE | DAT_INVALID_HANDLE_EP); + } +#endif + + /* modify QP to RTR and then to RTS with remote info already read */ + dapl_os_lock(&ep->header.lock); + if (dapls_modify_qp_state(ep->qp_handle, + IBV_QPS_RTR, + cm->msg.daddr.ib.qpn, + cm->msg.daddr.ib.lid, + (ib_gid_handle_t)cm->msg.daddr.ib.gid) != DAT_SUCCESS) { + dapl_log(DAPL_DBG_TYPE_ERR, + " ACCEPT_USR: QPS_RTR ERR %s -> lid %x qpn %x\n", + strerror(errno), ntohs(cm->msg.daddr.ib.lid), + ntohl(cm->msg.daddr.ib.qpn)); + dapl_os_unlock(&ep->header.lock); + goto bail; + } + if (dapls_modify_qp_state(ep->qp_handle, + IBV_QPS_RTS, + cm->msg.daddr.ib.qpn, + cm->msg.daddr.ib.lid, + NULL) != DAT_SUCCESS) { + dapl_log(DAPL_DBG_TYPE_ERR, + " ACCEPT_USR: QPS_RTS ERR %s -> lid %x qpn %x\n", + strerror(errno), ntohs(cm->msg.daddr.ib.lid), + ntohl(cm->msg.daddr.ib.qpn)); + dapl_os_unlock(&ep->header.lock); + goto bail; + } + dapl_os_unlock(&ep->header.lock); + + /* save remote address information */ + dapl_os_memcpy(&ep->remote_ia_address, + &cm->msg.saddr, sizeof(union dcm_addr)); + + /* setup local QP info and type from EP, copy pdata, for reply */ + cm->msg.op = htons(DCM_REP); + cm->msg.saddr.ib.qpn = htonl(ep->qp_handle->qp_num); + cm->msg.saddr.ib.qp_type = ep->qp_handle->qp_type; + cm->msg.saddr.ib.lid = cm->hca->ib_trans.addr.ib.lid; + dapl_os_memcpy(&cm->msg.saddr.ib.gid[0], + &cm->hca->ib_trans.addr.ib.gid, 16); + + /* + * UD: deliver p_data with REQ and EST event, keep REQ p_data in + * cm->msg.p_data and save REPLY accept data in cm->p_data for retries + */ + cm->p_size = p_size; + dapl_os_memcpy(&cm->p_data, p_data, p_size); + + /* save state and setup valid reference to EP, HCA */ + dapl_ep_link_cm(ep, cm); + cm->ep = ep; + cm->hca = ia->hca_ptr; + + dapl_os_lock(&cm->lock); + dapl_os_get_time(&cm->timer); /* RTU expected */ + cm->state = DCM_RTU_PENDING; + dapl_os_unlock(&cm->lock); + + if (ucm_reply(cm)) { + dapl_ep_unlink_cm(ep, cm); + goto bail; + } + dapl_dbg_log(DAPL_DBG_TYPE_CM, " PASSIVE: accepted!\n"); + dapls_thread_signal(&cm->hca->ib_trans.signal); + return DAT_SUCCESS; +bail: + dapli_cm_free(cm); + return DAT_INTERNAL_ERROR; +} + + +/* + * dapls_ib_connect + * + * Initiate a connection with the passive listener on another node + * + * Input: + * ep_handle, + * remote_ia_address, + * remote_conn_qual, + * prd_size size of private data and structure + * prd_prt pointer to private data structure + * + * Output: + * none + * + * Returns: + * DAT_SUCCESS + * DAT_INSUFFICIENT_RESOURCES + * DAT_INVALID_PARAMETER + * + */ +DAT_RETURN +dapls_ib_connect(IN DAT_EP_HANDLE ep_handle, + IN DAT_IA_ADDRESS_PTR r_addr, + IN DAT_CONN_QUAL r_psp, + IN DAT_COUNT p_size, IN void *p_data) +{ + DAPL_EP *ep = (DAPL_EP *)ep_handle; + dp_ib_cm_handle_t cm; + + /* create CM object, initialize SRC info from EP */ + cm = dapls_ib_cm_create(ep); + if (cm == NULL) + return DAT_INSUFFICIENT_RESOURCES; + + /* remote hca and port: lid, gid, network order */ + dapl_os_memcpy(&cm->msg.daddr, r_addr, sizeof(union dcm_addr)); + + /* remote uCM information, comes from consumer provider r_addr */ + cm->msg.dport = htons((uint16_t)r_psp); + cm->msg.dqpn = cm->msg.daddr.ib.qpn; + cm->msg.daddr.ib.qpn = 0; /* don't have a remote qpn until reply */ + + if (p_size) { + cm->msg.p_size = htons(p_size); + dapl_os_memcpy(&cm->msg.p_data, p_data, p_size); + } + + cm->state = DCM_REP_PENDING; + + /* build connect request, send to remote CM based on r_addr info */ + return (dapli_cm_connect(ep, cm)); +} + +/* + * dapls_ib_disconnect + * + * Disconnect an EP + * + * Input: + * ep_handle, + * disconnect_flags + * + * Output: + * none + * + * Returns: + * DAT_SUCCESS + */ +DAT_RETURN +dapls_ib_disconnect(IN DAPL_EP *ep_ptr, IN DAT_CLOSE_FLAGS close_flags) +{ + dp_ib_cm_handle_t cm_ptr = dapl_get_cm_from_ep(ep_ptr); + + dapl_os_lock(&ep_ptr->header.lock); + if (ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECTED || + ep_ptr->param.ep_attr.service_type != DAT_SERVICE_TYPE_RC || + cm_ptr == NULL) { + dapl_os_unlock(&ep_ptr->header.lock); + return DAT_SUCCESS; + } + dapl_os_unlock(&ep_ptr->header.lock); + + dapli_cm_disconnect(cm_ptr); + + /* ABRUPT close, wait for callback and DISCONNECTED state */ + if (close_flags == DAT_CLOSE_ABRUPT_FLAG) { + dapl_os_lock(&ep_ptr->header.lock); + while (ep_ptr->param.ep_state != DAT_EP_STATE_DISCONNECTED) { + dapl_os_unlock(&ep_ptr->header.lock); + dapl_os_sleep_usec(10000); + dapl_os_lock(&ep_ptr->header.lock); + } + dapl_os_unlock(&ep_ptr->header.lock); + } + + return DAT_SUCCESS; +} + +/* + * dapls_ib_disconnect_clean + * + * Clean up outstanding connection data. This routine is invoked + * after the final disconnect callback has occurred. Only on the + * ACTIVE side of a connection. It is also called if dat_ep_connect + * times out using the consumer supplied timeout value. + * + * Input: + * ep_ptr DAPL_EP + * active Indicates active side of connection + * + * Output: + * none + * + * Returns: + * void + * + */ +void +dapls_ib_disconnect_clean(IN DAPL_EP *ep, + IN DAT_BOOLEAN active, + IN const ib_cm_events_t ib_cm_event) +{ + if (ib_cm_event == IB_CME_TIMEOUT) { + dp_ib_cm_handle_t cm_ptr; + + if ((cm_ptr = dapl_get_cm_from_ep(ep)) == NULL) + return; + + dapl_log(DAPL_DBG_TYPE_WARN, + "dapls_ib_disc_clean: CONN_TIMEOUT ep %p cm %p %s\n", + ep, cm_ptr, dapl_cm_state_str(cm_ptr->state)); + + /* schedule release of socket and local resources */ + dapli_cm_free(cm_ptr); + } +} + +/* + * dapl_ib_setup_conn_listener + * + * Have the CM set up a connection listener. + * + * Input: + * ibm_hca_handle HCA handle + * qp_handle QP handle + * + * Output: + * none + * + * Returns: + * DAT_SUCCESS + * DAT_INSUFFICIENT_RESOURCES + * DAT_INTERNAL_ERROR + * DAT_CONN_QUAL_UNAVAILBLE + * DAT_CONN_QUAL_IN_USE + * + */ +DAT_RETURN +dapls_ib_setup_conn_listener(IN DAPL_IA *ia, + IN DAT_UINT64 sid, + IN DAPL_SP *sp) +{ + ib_cm_srvc_handle_t cm = NULL; + + dapl_dbg_log(DAPL_DBG_TYPE_EP, + " listen(ia %p ServiceID %x sp %p)\n", + ia, sid, sp); + + /* reserve local port, then allocate CM object */ + if (!ucm_get_port(&ia->hca_ptr->ib_trans, (uint16_t)sid)) { + dapl_dbg_log(DAPL_DBG_TYPE_WARN, + " listen: ERROR %s on conn_qual %x\n", + strerror(errno), sid); + return DAT_CONN_QUAL_IN_USE; + } + + /* cm_create will setup saddr for listen server */ + if ((cm = dapls_ib_cm_create(NULL)) == NULL) + return DAT_INSUFFICIENT_RESOURCES; + + /* LISTEN: init DST address and QP info to local CM server info */ + cm->sp = sp; + cm->hca = ia->hca_ptr; + cm->msg.sport = htons((uint16_t)sid); + cm->msg.sqpn = htonl(ia->hca_ptr->ib_trans.qp->qp_num); + cm->msg.saddr.ib.qp_type = IBV_QPT_UD; + cm->msg.saddr.ib.lid = ia->hca_ptr->ib_trans.addr.ib.lid; + dapl_os_memcpy(&cm->msg.saddr.ib.gid[0], + &cm->hca->ib_trans.addr.ib.gid, 16); + + /* save cm_handle reference in service point */ + sp->cm_srvc_handle = cm; + + /* queue up listen socket to process inbound CR's */ + cm->state = DCM_LISTEN; + dapli_queue_listen(cm); + + return DAT_SUCCESS; +} + + +/* + * dapl_ib_remove_conn_listener + * + * Have the CM remove a connection listener. + * + * Input: + * ia_handle IA handle + * ServiceID IB Channel Service ID + * + * Output: + * none + * + * Returns: + * DAT_SUCCESS + * DAT_INVALID_STATE + * + */ +DAT_RETURN +dapls_ib_remove_conn_listener(IN DAPL_IA *ia, IN DAPL_SP *sp) +{ + ib_cm_srvc_handle_t cm = sp->cm_srvc_handle; + + /* free cm_srvc_handle and port, and mark CM for cleanup */ + if (cm) { + dapl_dbg_log(DAPL_DBG_TYPE_EP, + " remove_listener(ia %p sp %p cm %p psp=%x)\n", + ia, sp, cm, ntohs(cm->msg.dport)); + + sp->cm_srvc_handle = NULL; + dapli_dequeue_listen(cm); + ucm_free_port(&cm->hca->ib_trans, ntohs(cm->msg.sport)); + dapls_cm_release(cm); /* last ref, dealloc */ + } + return DAT_SUCCESS; +} + +/* + * dapls_ib_accept_connection + * + * Perform necessary steps to accept a connection + * + * Input: + * cr_handle + * ep_handle + * private_data_size + * private_data + * + * Output: + * none + * + * Returns: + * DAT_SUCCESS + * DAT_INSUFFICIENT_RESOURCES + * DAT_INTERNAL_ERROR + * + */ +DAT_RETURN +dapls_ib_accept_connection(IN DAT_CR_HANDLE cr_handle, + IN DAT_EP_HANDLE ep_handle, + IN DAT_COUNT p_size, + IN const DAT_PVOID p_data) +{ + DAPL_CR *cr = (DAPL_CR *)cr_handle; + DAPL_EP *ep = (DAPL_EP *)ep_handle; + + dapl_dbg_log(DAPL_DBG_TYPE_EP, + " accept_connection(cr %p ep %p prd %p,%d)\n", + cr, ep, p_data, p_size); + + /* allocate and attach a QP if necessary */ + if (ep->qp_state == DAPL_QP_STATE_UNATTACHED) { + DAT_RETURN status; + status = dapls_ib_qp_alloc(ep->header.owner_ia, + ep, ep); + if (status != DAT_SUCCESS) + return status; + } + return (dapli_accept_usr(ep, cr, p_size, p_data)); +} + +/* + * dapls_ib_reject_connection + * + * Reject a connection + * + * Input: + * cr_handle + * + * Output: + * none + * + * Returns: + * DAT_SUCCESS + * DAT_INTERNAL_ERROR + * + */ +DAT_RETURN +dapls_ib_reject_connection(IN dp_ib_cm_handle_t cm, + IN int reason, + IN DAT_COUNT psize, IN const DAT_PVOID pdata) +{ + dapl_dbg_log(DAPL_DBG_TYPE_EP, + " reject(cm %p reason %x, pdata %p, psize %d)\n", + cm, reason, pdata, psize); + + if (psize > DCM_MAX_PDATA_SIZE) + return DAT_LENGTH_ERROR; + + /* cr_thread will destroy CR, update saddr lid, gid, qp_type info */ + dapl_os_lock(&cm->lock); + dapl_log(DAPL_DBG_TYPE_CM, + " PASSIVE: REJECTING CM_REQ:" + " cm %p op %s, st %s slid %x iqp %x port %x ->" + " dlid %x iqp %x port %x\n", cm, + dapl_cm_op_str(ntohs(cm->msg.op)), + dapl_cm_state_str(cm->state), + ntohs(cm->hca->ib_trans.addr.ib.lid), + ntohl(cm->msg.saddr.ib.qpn), + ntohs(cm->msg.sport), ntohs(cm->msg.daddr.ib.lid), + ntohl(cm->msg.daddr.ib.qpn), ntohs(cm->msg.dport)); + + cm->state = DCM_REJECTED; + cm->msg.saddr.ib.lid = cm->hca->ib_trans.addr.ib.lid; + cm->msg.saddr.ib.qp_type = cm->msg.daddr.ib.qp_type; + dapl_os_memcpy(&cm->msg.saddr.ib.gid[0], + &cm->hca->ib_trans.addr.ib.gid, 16); + cm->msg.op = htons(DCM_REJ_USER); + + if (ucm_send(&cm->hca->ib_trans, &cm->msg, pdata, psize)) { + dapl_log(DAPL_DBG_TYPE_WARN, + " cm_reject: send ERR: %s\n", strerror(errno)); + dapl_os_unlock(&cm->lock); + return DAT_INTERNAL_ERROR; + } + dapl_os_unlock(&cm->lock); + dapli_cm_free(cm); + return DAT_SUCCESS; +} + +/* + * dapls_ib_cm_remote_addr + * + * Obtain the remote IP address given a connection + * + * Input: + * cr_handle + * + * Output: + * remote_ia_address: where to place the remote address + * + * Returns: + * DAT_SUCCESS + * DAT_INVALID_HANDLE + * + */ +DAT_RETURN +dapls_ib_cm_remote_addr(IN DAT_HANDLE dat_handle, + OUT DAT_SOCK_ADDR6 * remote_ia_address) +{ + DAPL_HEADER *header; + dp_ib_cm_handle_t cm; + + dapl_dbg_log(DAPL_DBG_TYPE_EP, + "dapls_ib_cm_remote_addr(dat_handle %p, ....)\n", + dat_handle); + + header = (DAPL_HEADER *) dat_handle; + + if (header->magic == DAPL_MAGIC_EP) + cm = dapl_get_cm_from_ep((DAPL_EP *) dat_handle); + else if (header->magic == DAPL_MAGIC_CR) + cm = ((DAPL_CR *) dat_handle)->ib_cm_handle; + else + return DAT_INVALID_HANDLE; + + dapl_os_memcpy(remote_ia_address, + &cm->msg.daddr, sizeof(DAT_SOCK_ADDR6)); + + return DAT_SUCCESS; +} + +int dapls_ib_private_data_size( + IN DAPL_HCA *hca_ptr) +{ + return DCM_MAX_PDATA_SIZE; +} + +#if defined(_WIN32) || defined(_WIN64) + +void cm_thread(void *arg) +{ + struct dapl_hca *hca = arg; + dp_ib_cm_handle_t cm, next; + DWORD time_ms; + + dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cm_thread: ENTER hca %p\n", hca); + dapl_os_lock(&hca->ib_trans.lock); + for (hca->ib_trans.cm_state = IB_THREAD_RUN; + hca->ib_trans.cm_state == IB_THREAD_RUN || + !dapl_llist_is_empty(&hca->ib_trans.list); + dapl_os_lock(&hca->ib_trans.lock)) { + + time_ms = INFINITE; + CompSetZero(&hca->ib_trans.signal.set); + CompSetAdd(&hca->ib_hca_handle->channel, &hca->ib_trans.signal.set); + CompSetAdd(&hca->ib_trans.rch->comp_channel, &hca->ib_trans.signal.set); + CompSetAdd(&hca->ib_trans.ib_cq->comp_channel, &hca->ib_trans.signal.set); + + next = dapl_llist_is_empty(&hca->ib_trans.list) ? NULL : + dapl_llist_peek_head(&hca->ib_trans.list); + + while (next) { + cm = next; + next = dapl_llist_next_entry(&hca->ib_trans.list, + (DAPL_LLIST_ENTRY *)&cm->local_entry); + dapls_cm_acquire(cm); /* hold thread ref */ + dapl_os_lock(&cm->lock); + if (cm->state == DCM_FREE || + hca->ib_trans.cm_state != IB_THREAD_RUN) { + dapl_os_unlock(&cm->lock); + dapl_log(DAPL_DBG_TYPE_CM, + " CM FREE: %p ep=%p st=%s refs=%d\n", + cm, cm->ep, dapl_cm_state_str(cm->state), + cm->ref_count); + + dapls_cm_release(cm); /* release alloc ref */ + dapli_cm_dequeue(cm); /* release workq ref */ + dapls_cm_release(cm); /* release thread ref */ + continue; + } + dapl_os_unlock(&cm->lock); + ucm_check_timers(cm, &time_ms); + dapls_cm_release(cm); /* release thread ref */ + } + + dapl_os_unlock(&hca->ib_trans.lock); + + hca->ib_hca_handle->channel.Milliseconds = time_ms; + hca->ib_trans.rch->comp_channel.Milliseconds = time_ms; + hca->ib_trans.ib_cq->comp_channel.Milliseconds = time_ms; + CompSetPoll(&hca->ib_trans.signal.set, time_ms); + + hca->ib_hca_handle->channel.Milliseconds = 0; + hca->ib_trans.rch->comp_channel.Milliseconds = 0; + hca->ib_trans.ib_cq->comp_channel.Milliseconds = 0; + + ucm_recv(&hca->ib_trans); + ucm_async_event(hca); + dapli_cq_event_cb(&hca->ib_trans); + } + + dapl_os_unlock(&hca->ib_trans.lock); + hca->ib_trans.cm_state = IB_THREAD_EXIT; + dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cm_thread(hca %p) exit\n", hca); +} + +#else // _WIN32 || _WIN64 + +void cm_thread(void *arg) +{ + struct dapl_hca *hca = arg; + dp_ib_cm_handle_t cm, next; + struct dapl_fd_set *set; + char rbuf[2]; + int time_ms; + + dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cm_thread: ENTER hca %p\n", hca); + set = dapl_alloc_fd_set(); + if (!set) + goto out; + + dapl_os_lock(&hca->ib_trans.lock); + hca->ib_trans.cm_state = IB_THREAD_RUN; + + while (1) { + time_ms = -1; /* reset to blocking */ + dapl_fd_zero(set); + dapl_fd_set(hca->ib_trans.signal.scm[0], set, DAPL_FD_READ); + dapl_fd_set(hca->ib_hca_handle->async_fd, set, DAPL_FD_READ); + dapl_fd_set(hca->ib_trans.rch->fd, set, DAPL_FD_READ); + dapl_fd_set(hca->ib_trans.ib_cq->fd, set, DAPL_FD_READ); + + if (!dapl_llist_is_empty(&hca->ib_trans.list)) + next = dapl_llist_peek_head(&hca->ib_trans.list); + else + next = NULL; + + while (next) { + cm = next; + next = dapl_llist_next_entry( + &hca->ib_trans.list, + (DAPL_LLIST_ENTRY *)&cm->local_entry); + dapls_cm_acquire(cm); /* hold thread ref */ + dapl_os_lock(&cm->lock); + if (cm->state == DCM_FREE || + hca->ib_trans.cm_state != IB_THREAD_RUN) { + dapl_os_unlock(&cm->lock); + dapl_log(DAPL_DBG_TYPE_CM, + " CM FREE: %p ep=%p st=%s refs=%d\n", + cm, cm->ep, dapl_cm_state_str(cm->state), + cm->ref_count); + + dapls_cm_release(cm); /* release alloc ref */ + dapli_cm_dequeue(cm); /* release workq ref */ + dapls_cm_release(cm); /* release thread ref */ + continue; + } + dapl_os_unlock(&cm->lock); + ucm_check_timers(cm, &time_ms); + dapls_cm_release(cm); /* release thread ref */ + } + + /* set to exit and all resources destroyed */ + if ((hca->ib_trans.cm_state != IB_THREAD_RUN) && + (dapl_llist_is_empty(&hca->ib_trans.list))) + break; + + dapl_os_unlock(&hca->ib_trans.lock); + dapl_select(set, time_ms); + + /* Process events: CM, ASYNC, NOTIFY THREAD */ + if (dapl_poll(hca->ib_trans.rch->fd, + DAPL_FD_READ) == DAPL_FD_READ) { + ucm_recv(&hca->ib_trans); + } + if (dapl_poll(hca->ib_hca_handle->async_fd, + DAPL_FD_READ) == DAPL_FD_READ) { + ucm_async_event(hca); + } + if (dapl_poll(hca->ib_trans.ib_cq->fd, + DAPL_FD_READ) == DAPL_FD_READ) { + dapli_cq_event_cb(&hca->ib_trans); + } + while (dapl_poll(hca->ib_trans.signal.scm[0], + DAPL_FD_READ) == DAPL_FD_READ) { + recv(hca->ib_trans.signal.scm[0], rbuf, 2, 0); + } + dapl_os_lock(&hca->ib_trans.lock); + + /* set to exit and all resources destroyed */ + if ((hca->ib_trans.cm_state != IB_THREAD_RUN) && + (dapl_llist_is_empty(&hca->ib_trans.list))) + break; + } + + dapl_os_unlock(&hca->ib_trans.lock); + free(set); +out: + hca->ib_trans.cm_state = IB_THREAD_EXIT; + dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cm_thread(hca %p) exit\n", hca); +} +#endif + +#ifdef DAPL_COUNTERS +static char _ctr_host_[128]; +/* Debug aid: List all Connections in process and state */ +void dapls_print_cm_list(IN DAPL_IA *ia_ptr) +{ + /* Print in process CM's for this IA, if debug type set */ + int i = 0; + dp_ib_cm_handle_t cm, next_cm; + struct dapl_llist_entry **list; + DAPL_OS_LOCK *lock; + + /* LISTEN LIST */ + list = &ia_ptr->hca_ptr->ib_trans.llist; + lock = &ia_ptr->hca_ptr->ib_trans.llock; + + dapl_os_lock(lock); + if (!dapl_llist_is_empty((DAPL_LLIST_HEAD*)list)) + next_cm = dapl_llist_peek_head((DAPL_LLIST_HEAD*)list); + else + next_cm = NULL; + + gethostname(_ctr_host_, sizeof(_ctr_host_)); + printf("\n [%s:%x] DAPL IA LISTEN/CONNECTIONS IN PROCESS:\n", + _ctr_host_ , dapl_os_getpid()); + + while (next_cm) { + cm = next_cm; + next_cm = dapl_llist_next_entry((DAPL_LLIST_HEAD*)list, + (DAPL_LLIST_ENTRY*)&cm->local_entry); + + printf( " LISTEN[%d]: sp %p %s uCM_QP: %x %x %x l_pid %x (%x)\n", + i, cm->sp, dapl_cm_state_str(cm->state), + ntohs(cm->msg.saddr.ib.lid), ntohs(cm->msg.sport), + ntohl(cm->msg.sqpn), ntohl(*(DAT_UINT32*)cm->msg.resv), + ntohl(*(DAT_UINT32*)cm->msg.resv)); + i++; + } + dapl_os_unlock(lock); + + /* CONNECTION LIST */ + list = &ia_ptr->hca_ptr->ib_trans.list; + lock = &ia_ptr->hca_ptr->ib_trans.lock; + + dapl_os_lock(lock); + if (!dapl_llist_is_empty((DAPL_LLIST_HEAD*)list)) + next_cm = dapl_llist_peek_head((DAPL_LLIST_HEAD*)list); + else + next_cm = NULL; + + while (next_cm) { + cm = next_cm; + next_cm = dapl_llist_next_entry((DAPL_LLIST_HEAD*)list, + (DAPL_LLIST_ENTRY*)&cm->local_entry); + + printf( " CONN[%d]: ep %p cm %p %s %s" + " %x %x %x %s %x %x %x r_pid %x (%x)\n", + i, cm->ep, cm, + cm->msg.saddr.ib.qp_type == IBV_QPT_RC ? "RC" : "UD", + dapl_cm_state_str(cm->state), + ntohs(cm->msg.saddr.ib.lid), + ntohs(cm->msg.sport), + ntohl(cm->msg.saddr.ib.qpn), + cm->sp ? "<-" : "->", + ntohs(cm->msg.daddr.ib.lid), + ntohs(cm->msg.dport), + ntohl(cm->msg.daddr.ib.qpn), + ntohs(cm->msg.op) == DCM_REQ ? 0 : ntohl(*(DAT_UINT32*)cm->msg.resv), + ntohs(cm->msg.op) == DCM_REQ ? 0 : ntohl(*(DAT_UINT32*)cm->msg.resv)); + i++; + } + printf("\n"); + dapl_os_unlock(lock); +} +#endif diff --git a/trunk/ulp/dapl2/dapl/openib_ucm/dapl_ib_util.h b/trunk/ulp/dapl2/dapl/openib_ucm/dapl_ib_util.h index 25ce963e..920b6c45 100644 --- a/trunk/ulp/dapl2/dapl/openib_ucm/dapl_ib_util.h +++ b/trunk/ulp/dapl2/dapl/openib_ucm/dapl_ib_util.h @@ -1,136 +1,138 @@ -/* - * Copyright (c) 2009 Intel Corporation. All rights reserved. - * - * This Software is licensed under one of the following licenses: - * - * 1) under the terms of the "Common Public License 1.0" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/cpl.php. - * - * 2) under the terms of the "The BSD License" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/bsd-license.php. - * - * 3) under the terms of the "GNU General Public License (GPL) Version 2" a - * copy of which is available from the Open Source Initiative, see - * http://www.opensource.org/licenses/gpl-license.php. - * - * Licensee has the right to choose one of the above licenses. - * - * Redistributions of source code must retain the above copyright - * notice and one of the license notices. - * - * Redistributions in binary form must reproduce both the above copyright - * notice, one of the license notices in the documentation - * and/or other materials provided with the distribution. - */ - -#ifndef _DAPL_IB_UTIL_H_ -#define _DAPL_IB_UTIL_H_ -#define _OPENIB_SCM_ - -#include -#include "openib_osd.h" -#include "dapl_ib_common.h" - -/* DAPL CM objects MUST include list_entry, ref_count, event for EP linking */ -struct ib_cm_handle -{ - struct dapl_llist_entry list_entry; - struct dapl_llist_entry local_entry; - DAPL_OS_WAIT_OBJECT event; - DAPL_OS_LOCK lock; - DAPL_OS_TIMEVAL timer; - int ref_count; - int state; - int retries; - struct dapl_hca *hca; - struct dapl_sp *sp; - struct dapl_ep *ep; - struct ibv_ah *ah; - uint16_t p_size; /* accept p_data, for retries */ - uint8_t p_data[DCM_MAX_PDATA_SIZE]; - ib_cm_msg_t msg; -}; - -typedef struct ib_cm_handle *dp_ib_cm_handle_t; -typedef dp_ib_cm_handle_t ib_cm_srvc_handle_t; - -/* Definitions */ -#define IB_INVALID_HANDLE NULL - -/* ib_hca_transport_t, specific to this implementation */ -typedef struct _ib_hca_transport -{ - struct ibv_device *ib_dev; - struct dapl_hca *hca; - struct ibv_context *ib_ctx; - struct ibv_comp_channel *ib_cq; - ib_cq_handle_t ib_cq_empty; - int destroy; - int cm_state; - DAPL_OS_THREAD thread; - DAPL_OS_LOCK lock; /* connect list */ - struct dapl_llist_entry *list; - DAPL_OS_LOCK llock; /* listen list */ - struct dapl_llist_entry *llist; - ib_async_handler_t async_unafiliated; - void *async_un_ctx; - ib_async_cq_handler_t async_cq_error; - ib_async_dto_handler_t async_cq; - ib_async_qp_handler_t async_qp_error; - union dcm_addr addr; /* lid, port, qp_num, gid */ - int max_inline_send; - int rd_atom_in; - int rd_atom_out; - uint8_t ack_timer; - uint8_t ack_retry; - uint8_t rnr_timer; - uint8_t rnr_retry; - uint8_t global; - uint8_t hop_limit; - uint8_t tclass; - uint8_t mtu; - DAT_NAMED_ATTR named_attr; - struct dapl_thread_signal signal; - int cqe; - int qpe; - int retries; - int cm_timer; - int rep_time; - int rtu_time; - DAPL_OS_LOCK slock; - int s_hd; - int s_tl; - struct ibv_pd *pd; - struct ibv_cq *scq; - struct ibv_cq *rcq; - struct ibv_qp *qp; - struct ibv_mr *mr_rbuf; - struct ibv_mr *mr_sbuf; - ib_cm_msg_t *sbuf; - ib_cm_msg_t *rbuf; - struct ibv_comp_channel *rch; - struct ibv_ah **ah; - DAPL_OS_LOCK plock; - uint8_t *sid; /* Sevice IDs, port space, bitarray? */ - uint8_t sl; - uint16_t pkey; - int pkey_idx; - -} ib_hca_transport_t; - -/* prototypes */ -void cm_thread(void *arg); -void ucm_async_event(struct dapl_hca *hca); -void dapli_cq_event_cb(struct _ib_hca_transport *tp); -void dapls_cm_acquire(dp_ib_cm_handle_t cm_ptr); -void dapls_cm_release(dp_ib_cm_handle_t cm_ptr); -void dapls_cm_free(dp_ib_cm_handle_t cm_ptr); - -#ifdef DAPL_COUNTERS -void dapls_print_cm_list(IN DAPL_IA *ia_ptr); -#endif - -#endif /* _DAPL_IB_UTIL_H_ */ - +/* + * Copyright (c) 2009 Intel Corporation. All rights reserved. + * + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + */ + +#ifndef _DAPL_IB_UTIL_H_ +#define _DAPL_IB_UTIL_H_ +#define _OPENIB_SCM_ + +#include +#include "openib_osd.h" +#include "dapl_ib_common.h" + +/* DAPL CM objects MUST include list_entry, ref_count, event for EP linking */ +struct ib_cm_handle +{ + struct dapl_llist_entry list_entry; + struct dapl_llist_entry local_entry; + DAPL_OS_WAIT_OBJECT event; + DAPL_OS_LOCK lock; + DAPL_OS_TIMEVAL timer; + int ref_count; + int state; + int retries; + struct dapl_hca *hca; + struct dapl_sp *sp; + struct dapl_ep *ep; + struct ibv_ah *ah; + uint16_t p_size; /* accept p_data, for retries */ + uint8_t p_data[DCM_MAX_PDATA_SIZE]; + ib_cm_msg_t msg; +}; + +typedef struct ib_cm_handle *dp_ib_cm_handle_t; +typedef dp_ib_cm_handle_t ib_cm_srvc_handle_t; + +/* Definitions */ +#define IB_INVALID_HANDLE NULL + +/* ib_hca_transport_t, specific to this implementation */ +typedef struct _ib_hca_transport +{ + struct ibv_device *ib_dev; + struct dapl_hca *hca; + struct ibv_context *ib_ctx; + struct ibv_comp_channel *ib_cq; + ib_cq_handle_t ib_cq_empty; + int destroy; + int cm_state; + DAPL_OS_THREAD thread; + DAPL_OS_LOCK lock; /* connect list */ + struct dapl_llist_entry *list; + DAPL_OS_LOCK llock; /* listen list */ + struct dapl_llist_entry *llist; + ib_async_handler_t async_unafiliated; + void *async_un_ctx; + ib_async_cq_handler_t async_cq_error; + ib_async_dto_handler_t async_cq; + ib_async_qp_handler_t async_qp_error; + union dcm_addr addr; /* lid, port, qp_num, gid */ + int max_inline_send; + int rd_atom_in; + int rd_atom_out; + uint8_t ack_timer; + uint8_t ack_retry; + uint8_t rnr_timer; + uint8_t rnr_retry; + uint8_t global; + uint8_t hop_limit; + uint8_t tclass; + uint8_t mtu; + DAT_NAMED_ATTR named_attr; + struct dapl_thread_signal signal; + int cqe; + int qpe; + int burst; + int retries; + int cm_timer; + int rep_time; + int rtu_time; + DAPL_OS_LOCK slock; + int s_hd; + int s_tl; + struct ibv_pd *pd; + struct ibv_cq *scq; + struct ibv_cq *rcq; + struct ibv_qp *qp; + struct ibv_mr *mr_rbuf; + struct ibv_mr *mr_sbuf; + ib_cm_msg_t *sbuf; + ib_cm_msg_t *rbuf; + struct ibv_comp_channel *rch; + struct ibv_ah **ah; + DAPL_OS_LOCK plock; + uint16_t lid; + uint8_t *sid; /* Sevice IDs, port space, bitarray? */ + uint8_t sl; + uint16_t pkey; + int pkey_idx; + +} ib_hca_transport_t; + +/* prototypes */ +void cm_thread(void *arg); +void ucm_async_event(struct dapl_hca *hca); +void dapli_cq_event_cb(struct _ib_hca_transport *tp); +void dapls_cm_acquire(dp_ib_cm_handle_t cm_ptr); +void dapls_cm_release(dp_ib_cm_handle_t cm_ptr); +void dapls_cm_free(dp_ib_cm_handle_t cm_ptr); + +#ifdef DAPL_COUNTERS +void dapls_print_cm_list(IN DAPL_IA *ia_ptr); +#endif + +#endif /* _DAPL_IB_UTIL_H_ */ + diff --git a/trunk/ulp/dapl2/dapl/openib_ucm/device.c b/trunk/ulp/dapl2/dapl/openib_ucm/device.c index b7d9efd3..ff038c6a 100644 --- a/trunk/ulp/dapl2/dapl/openib_ucm/device.c +++ b/trunk/ulp/dapl2/dapl/openib_ucm/device.c @@ -1,670 +1,673 @@ -/* - * Copyright (c) 2009 Intel Corporation. All rights reserved. - * - * This Software is licensed under one of the following licenses: - * - * 1) under the terms of the "Common Public License 1.0" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/cpl.php. - * - * 2) under the terms of the "The BSD License" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/bsd-license.php. - * - * 3) under the terms of the "GNU General Public License (GPL) Version 2" a - * copy of which is available from the Open Source Initiative, see - * http://www.opensource.org/licenses/gpl-license.php. - * - * Licensee has the right to choose one of the above licenses. - * - * Redistributions of source code must retain the above copyright - * notice and one of the license notices. - * - * Redistributions in binary form must reproduce both the above copyright - * notice, one of the license notices in the documentation - * and/or other materials provided with the distribution. - */ - -#include "openib_osd.h" -#include "dapl.h" -#include "dapl_adapter_util.h" -#include "dapl_ib_util.h" -#include "dapl_osd.h" - -#include - -static void ucm_service_destroy(IN DAPL_HCA *hca); -static int ucm_service_create(IN DAPL_HCA *hca); - -#if defined (_WIN32) -#include - -static int32_t create_os_signal(IN DAPL_HCA * hca_ptr) -{ - return CompSetInit(&hca_ptr->ib_trans.signal.set); -} - -static void destroy_os_signal(IN DAPL_HCA * hca_ptr) -{ - CompSetCleanup(&hca_ptr->ib_trans.signal.set); -} - -static int dapls_config_verbs(struct ibv_context *verbs) -{ - verbs->channel.Milliseconds = 0; - return 0; -} - -static int dapls_config_comp_channel(struct ibv_comp_channel *channel) -{ - channel->comp_channel.Milliseconds = 0; - return 0; -} - -#else // _WIN32 - -static int32_t create_os_signal(IN DAPL_HCA * hca_ptr) -{ - DAPL_SOCKET listen_socket; - struct sockaddr_in addr; - socklen_t addrlen = sizeof(addr); - int ret; - - listen_socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); - if (listen_socket == DAPL_INVALID_SOCKET) - return 1; - - memset(&addr, 0, sizeof addr); - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl(0x7f000001); - ret = bind(listen_socket, (struct sockaddr *)&addr, sizeof addr); - if (ret) - goto err1; - - ret = getsockname(listen_socket, (struct sockaddr *)&addr, &addrlen); - if (ret) - goto err1; - - ret = listen(listen_socket, 0); - if (ret) - goto err1; - - hca_ptr->ib_trans.signal.scm[1] = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); - if (hca_ptr->ib_trans.signal.scm[1] == DAPL_INVALID_SOCKET) - goto err1; - - ret = connect(hca_ptr->ib_trans.signal.scm[1], - (struct sockaddr *)&addr, sizeof(addr)); - if (ret) - goto err2; - - hca_ptr->ib_trans.signal.scm[0] = accept(listen_socket, NULL, NULL); - if (hca_ptr->ib_trans.signal.scm[0] == DAPL_INVALID_SOCKET) - goto err2; - - closesocket(listen_socket); - return 0; - - err2: - closesocket(hca_ptr->ib_trans.signal.scm[1]); - err1: - closesocket(listen_socket); - return 1; -} - -static void destroy_os_signal(IN DAPL_HCA * hca_ptr) -{ - closesocket(hca_ptr->ib_trans.signal.scm[0]); - closesocket(hca_ptr->ib_trans.signal.scm[1]); -} - -static int dapls_config_fd(int fd) -{ - int opts; - - opts = fcntl(fd, F_GETFL); - if (opts < 0 || fcntl(fd, F_SETFL, opts | O_NONBLOCK) < 0) { - dapl_log(DAPL_DBG_TYPE_ERR, - " dapls_config_fd: fcntl on fd %d ERR %d %s\n", - fd, opts, strerror(errno)); - return errno; - } - - return 0; -} - -static int dapls_config_verbs(struct ibv_context *verbs) -{ - return dapls_config_fd(verbs->async_fd); -} - -static int dapls_config_comp_channel(struct ibv_comp_channel *channel) -{ - return dapls_config_fd(channel->fd); -} - -#endif - -/* - * dapls_ib_init, dapls_ib_release - * - * Initialize Verb related items for device open - * - * Input: - * none - * - * Output: - * none - * - * Returns: - * 0 success, -1 error - * - */ -int32_t dapls_ib_init(void) -{ - return 0; -} - -int32_t dapls_ib_release(void) -{ - return 0; -} - -/* - * dapls_ib_open_hca - * - * Open HCA - * - * Input: - * *hca_name pointer to provider device name - * *ib_hca_handle_p pointer to provide HCA handle - * - * Output: - * none - * - * Return: - * DAT_SUCCESS - * dapl_convert_errno - * - */ -DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, IN DAPL_HCA * hca_ptr) -{ - struct ibv_device **dev_list; - struct ibv_port_attr port_attr; - int i; - DAT_RETURN dat_status; - - /* Get list of all IB devices, find match, open */ - dev_list = ibv_get_device_list(NULL); - if (!dev_list) { - dapl_dbg_log(DAPL_DBG_TYPE_ERR, - " open_hca: ibv_get_device_list() failed\n", - hca_name); - return DAT_INTERNAL_ERROR; - } - - for (i = 0; dev_list[i]; ++i) { - hca_ptr->ib_trans.ib_dev = dev_list[i]; - if (!strcmp(ibv_get_device_name(hca_ptr->ib_trans.ib_dev), - hca_name)) - goto found; - } - - dapl_log(DAPL_DBG_TYPE_ERR, - " open_hca: device %s not found\n", hca_name); - goto err; - -found: - - hca_ptr->ib_hca_handle = ibv_open_device(hca_ptr->ib_trans.ib_dev); - if (!hca_ptr->ib_hca_handle) { - dapl_log(DAPL_DBG_TYPE_ERR, - " open_hca: dev open failed for %s, err=%s\n", - ibv_get_device_name(hca_ptr->ib_trans.ib_dev), - strerror(errno)); - goto err; - } - hca_ptr->ib_trans.ib_ctx = hca_ptr->ib_hca_handle; - dapls_config_verbs(hca_ptr->ib_hca_handle); - - /* get lid for this hca-port, network order */ - if (ibv_query_port(hca_ptr->ib_hca_handle, - (uint8_t)hca_ptr->port_num, &port_attr)) { - dapl_log(DAPL_DBG_TYPE_ERR, - " open_hca: get lid ERR for %s, err=%s\n", - ibv_get_device_name(hca_ptr->ib_trans.ib_dev), - strerror(errno)); - goto err; - } else { - hca_ptr->ib_trans.addr.ib.lid = htons(port_attr.lid); - } - - /* get gid for this hca-port, network order */ - if (ibv_query_gid(hca_ptr->ib_hca_handle, - (uint8_t) hca_ptr->port_num, 0, - (union ibv_gid *)&hca_ptr->ib_trans.addr.ib.gid)) { - dapl_log(DAPL_DBG_TYPE_ERR, - " open_hca: query GID ERR for %s, err=%s\n", - ibv_get_device_name(hca_ptr->ib_trans.ib_dev), - strerror(errno)); - goto err; - } - - /* set RC tunables via enviroment or default */ - hca_ptr->ib_trans.max_inline_send = - dapl_os_get_env_val("DAPL_MAX_INLINE", INLINE_SEND_IB_DEFAULT); - hca_ptr->ib_trans.ack_retry = - dapl_os_get_env_val("DAPL_ACK_RETRY", DCM_ACK_RETRY); - hca_ptr->ib_trans.ack_timer = - dapl_os_get_env_val("DAPL_ACK_TIMER", DCM_ACK_TIMER); - hca_ptr->ib_trans.rnr_retry = - dapl_os_get_env_val("DAPL_RNR_RETRY", DCM_RNR_RETRY); - hca_ptr->ib_trans.rnr_timer = - dapl_os_get_env_val("DAPL_RNR_TIMER", DCM_RNR_TIMER); - hca_ptr->ib_trans.global = - dapl_os_get_env_val("DAPL_GLOBAL_ROUTING", DCM_GLOBAL); - hca_ptr->ib_trans.hop_limit = - dapl_os_get_env_val("DAPL_HOP_LIMIT", DCM_HOP_LIMIT); - hca_ptr->ib_trans.tclass = - dapl_os_get_env_val("DAPL_TCLASS", DCM_TCLASS); - hca_ptr->ib_trans.mtu = - dapl_ib_mtu(dapl_os_get_env_val("DAPL_IB_MTU", DCM_IB_MTU)); - - /* initialize CM list, LISTEN, SND queue, PSP array, locks */ - if ((dapl_os_lock_init(&hca_ptr->ib_trans.lock)) != DAT_SUCCESS) - goto err; - - if ((dapl_os_lock_init(&hca_ptr->ib_trans.llock)) != DAT_SUCCESS) - goto err; - - if ((dapl_os_lock_init(&hca_ptr->ib_trans.slock)) != DAT_SUCCESS) - goto err; - - if ((dapl_os_lock_init(&hca_ptr->ib_trans.plock)) != DAT_SUCCESS) - goto err; - - /* EVD events without direct CQ channels, CNO support */ - hca_ptr->ib_trans.ib_cq = - ibv_create_comp_channel(hca_ptr->ib_hca_handle); - if (hca_ptr->ib_trans.ib_cq == NULL) { - dapl_log(DAPL_DBG_TYPE_ERR, - " open_hca: ibv_create_comp_channel ERR %s\n", - strerror(errno)); - goto bail; - } - dapls_config_comp_channel(hca_ptr->ib_trans.ib_cq); - - /* initialize CM and listen lists on this HCA uCM QP */ - dapl_llist_init_head(&hca_ptr->ib_trans.list); - dapl_llist_init_head(&hca_ptr->ib_trans.llist); - - /* create uCM qp services */ - if (ucm_service_create(hca_ptr)) - goto bail; - - if (create_os_signal(hca_ptr)) { - dapl_log(DAPL_DBG_TYPE_ERR, - " open_hca: failed to init cr pipe - %s\n", - strerror(errno)); - goto bail; - } - - /* create thread to process inbound connect request */ - hca_ptr->ib_trans.cm_state = IB_THREAD_INIT; - dat_status = dapl_os_thread_create(cm_thread, - (void *)hca_ptr, - &hca_ptr->ib_trans.thread); - if (dat_status != DAT_SUCCESS) { - dapl_log(DAPL_DBG_TYPE_ERR, - " open_hca: failed to create thread\n"); - goto bail; - } - - dapl_dbg_log(DAPL_DBG_TYPE_UTIL, - " open_hca: devname %s, ctx %p port %d, hostname_IP %s\n", - ibv_get_device_name(hca_ptr->ib_trans.ib_dev), - hca_ptr->ib_hca_handle, - hca_ptr->port_num, - inet_ntoa(((struct sockaddr_in *) - &hca_ptr->hca_address)->sin_addr)); - dapl_dbg_log(DAPL_DBG_TYPE_UTIL, - " open_hca: QPN 0x%x LID 0x%x GID Subnet 0x" F64x "" - " ID 0x" F64x "\n", - ntohl(hca_ptr->ib_trans.addr.ib.qpn), - ntohs(hca_ptr->ib_trans.addr.ib.lid), - (unsigned long long) - ntohll(*(uint64_t*)&hca_ptr->ib_trans.addr.ib.gid[0]), - (unsigned long long) - ntohll(*(uint64_t*)&hca_ptr->ib_trans.addr.ib.gid[8])); - - /* save LID, GID, QPN, PORT address information, for ia_queries */ - /* Set AF_INET6 to insure callee address storage of 28 bytes */ - hca_ptr->ib_trans.hca = hca_ptr; - hca_ptr->ib_trans.addr.ib.family = AF_INET6; - hca_ptr->ib_trans.addr.ib.qp_type = IBV_QPT_UD; - memcpy(&hca_ptr->hca_address, - &hca_ptr->ib_trans.addr, - sizeof(union dcm_addr)); - - ibv_free_device_list(dev_list); - - /* wait for cm_thread */ - while (hca_ptr->ib_trans.cm_state != IB_THREAD_RUN) - dapl_os_sleep_usec(1000); - - return dat_status; - -bail: - ucm_service_destroy(hca_ptr); - ibv_close_device(hca_ptr->ib_hca_handle); - hca_ptr->ib_hca_handle = IB_INVALID_HANDLE; - -err: - ibv_free_device_list(dev_list); - return DAT_INTERNAL_ERROR; -} - -/* - * dapls_ib_close_hca - * - * Open HCA - * - * Input: - * DAPL_HCA provide CA handle - * - * Output: - * none - * - * Return: - * DAT_SUCCESS - * dapl_convert_errno - * - */ -DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA * hca_ptr) -{ - dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " close_hca: %p\n", hca_ptr); - - if (hca_ptr->ib_trans.cm_state == IB_THREAD_RUN) { - hca_ptr->ib_trans.cm_state = IB_THREAD_CANCEL; - dapls_thread_signal(&hca_ptr->ib_trans.signal); - while (hca_ptr->ib_trans.cm_state != IB_THREAD_EXIT) { - dapl_dbg_log(DAPL_DBG_TYPE_UTIL, - " close_hca: waiting for cr_thread\n"); - dapls_thread_signal(&hca_ptr->ib_trans.signal); - dapl_os_sleep_usec(1000); - } - } - - dapl_os_lock_destroy(&hca_ptr->ib_trans.lock); - dapl_os_lock_destroy(&hca_ptr->ib_trans.llock); - destroy_os_signal(hca_ptr); - ucm_service_destroy(hca_ptr); - - if (hca_ptr->ib_trans.ib_cq) - ibv_destroy_comp_channel(hca_ptr->ib_trans.ib_cq); - - if (hca_ptr->ib_trans.ib_cq_empty) { - struct ibv_comp_channel *channel; - channel = hca_ptr->ib_trans.ib_cq_empty->channel; - ibv_destroy_cq(hca_ptr->ib_trans.ib_cq_empty); - ibv_destroy_comp_channel(channel); - } - - if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) { - if (ibv_close_device(hca_ptr->ib_hca_handle)) - return (dapl_convert_errno(errno, "ib_close_device")); - hca_ptr->ib_hca_handle = IB_INVALID_HANDLE; - } - - return (DAT_SUCCESS); -} - -/* Create uCM endpoint services, allocate remote_ah's array */ -static void ucm_service_destroy(IN DAPL_HCA *hca) -{ - ib_hca_transport_t *tp = &hca->ib_trans; - int msg_size = sizeof(ib_cm_msg_t); - - if (tp->mr_sbuf) - ibv_dereg_mr(tp->mr_sbuf); - - if (tp->mr_rbuf) - ibv_dereg_mr(tp->mr_rbuf); - - if (tp->qp) - ibv_destroy_qp(tp->qp); - - if (tp->scq) - ibv_destroy_cq(tp->scq); - - if (tp->rcq) - ibv_destroy_cq(tp->rcq); - - if (tp->rch) - ibv_destroy_comp_channel(tp->rch); - - if (tp->ah) { - int i; - - for (i = 0;i < 0xffff; i++) { - if (tp->ah[i]) - ibv_destroy_ah(tp->ah[i]); - } - dapl_os_free(tp->ah, (sizeof(*tp->ah) * 0xffff)); - } - - if (tp->pd) - ibv_dealloc_pd(tp->pd); - - if (tp->sid) - dapl_os_free(tp->sid, (sizeof(*tp->sid) * 0xffff)); - - if (tp->rbuf) - dapl_os_free(tp->rbuf, (msg_size * tp->qpe)); - - if (tp->sbuf) - dapl_os_free(tp->sbuf, (msg_size * tp->qpe)); -} - -static int ucm_service_create(IN DAPL_HCA *hca) -{ - struct ibv_qp_init_attr qp_create; - ib_hca_transport_t *tp = &hca->ib_trans; - struct ibv_recv_wr recv_wr, *recv_err; - struct ibv_sge sge; - int i, mlen = sizeof(ib_cm_msg_t); - int hlen = sizeof(struct ibv_grh); /* hdr included with UD recv */ - - dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ucm_create: \n"); - - /* setup CM timers and queue sizes */ - tp->retries = dapl_os_get_env_val("DAPL_UCM_RETRY", DCM_RETRY_CNT); - tp->rep_time = dapl_os_get_env_val("DAPL_UCM_REP_TIME", DCM_REP_TIME); - tp->rtu_time = dapl_os_get_env_val("DAPL_UCM_RTU_TIME", DCM_RTU_TIME); - tp->cm_timer = DAPL_MIN(tp->rep_time,tp->rtu_time); - tp->qpe = dapl_os_get_env_val("DAPL_UCM_QP_SIZE", DCM_QP_SIZE); - tp->cqe = dapl_os_get_env_val("DAPL_UCM_CQ_SIZE", DCM_CQ_SIZE); - tp->pd = ibv_alloc_pd(hca->ib_hca_handle); - if (!tp->pd) - goto bail; - - dapl_log(DAPL_DBG_TYPE_UTIL, - " create_service: pd %p ctx %p handle 0x%x\n", - tp->pd, tp->pd->context, tp->pd->handle); - - tp->rch = ibv_create_comp_channel(hca->ib_hca_handle); - if (!tp->rch) - goto bail; - - tp->scq = ibv_create_cq(hca->ib_hca_handle, tp->cqe, hca, NULL, 0); - if (!tp->scq) - goto bail; - - tp->rcq = ibv_create_cq(hca->ib_hca_handle, tp->cqe, hca, tp->rch, 0); - if (!tp->rcq) - goto bail; - - if(ibv_req_notify_cq(tp->rcq, 0)) - goto bail; - - dapl_os_memzero((void *)&qp_create, sizeof(qp_create)); - qp_create.qp_type = IBV_QPT_UD; - qp_create.send_cq = tp->scq; - qp_create.recv_cq = tp->rcq; - qp_create.cap.max_send_wr = qp_create.cap.max_recv_wr = tp->qpe; - qp_create.cap.max_send_sge = qp_create.cap.max_recv_sge = 1; - qp_create.cap.max_inline_data = tp->max_inline_send; - qp_create.qp_context = (void *)hca; - - tp->qp = ibv_create_qp(tp->pd, &qp_create); - if (!tp->qp) - goto bail; - - tp->ah = (ib_ah_handle_t*) dapl_os_alloc(sizeof(ib_ah_handle_t) * 0xffff); - tp->sid = (uint8_t*) dapl_os_alloc(sizeof(uint8_t) * 0xffff); - tp->rbuf = (void*) dapl_os_alloc((mlen + hlen) * tp->qpe); - tp->sbuf = (void*) dapl_os_alloc(mlen * tp->qpe); - - if (!tp->ah || !tp->rbuf || !tp->sbuf || !tp->sid) - goto bail; - - (void)dapl_os_memzero(tp->ah, (sizeof(ib_ah_handle_t) * 0xffff)); - (void)dapl_os_memzero(tp->sid, (sizeof(uint8_t) * 0xffff)); - tp->sid[0] = 1; /* resv slot 0, 0 == no ports available */ - (void)dapl_os_memzero(tp->rbuf, ((mlen + hlen) * tp->qpe)); - (void)dapl_os_memzero(tp->sbuf, (mlen * tp->qpe)); - - tp->mr_sbuf = ibv_reg_mr(tp->pd, tp->sbuf, - (mlen * tp->qpe), - IBV_ACCESS_LOCAL_WRITE); - if (!tp->mr_sbuf) - goto bail; - - tp->mr_rbuf = ibv_reg_mr(tp->pd, tp->rbuf, - ((mlen + hlen) * tp->qpe), - IBV_ACCESS_LOCAL_WRITE); - if (!tp->mr_rbuf) - goto bail; - - /* modify UD QP: init, rtr, rts */ - if ((dapls_modify_qp_ud(hca, tp->qp)) != DAT_SUCCESS) - goto bail; - - /* post receive buffers, setup head, tail pointers */ - recv_wr.next = NULL; - recv_wr.sg_list = &sge; - recv_wr.num_sge = 1; - sge.length = mlen + hlen; - sge.lkey = tp->mr_rbuf->lkey; - - for (i = 0; i < tp->qpe; i++) { - recv_wr.wr_id = - (uintptr_t)((char *)&tp->rbuf[i] + - sizeof(struct ibv_grh)); - sge.addr = (uintptr_t) &tp->rbuf[i]; - if (ibv_post_recv(tp->qp, &recv_wr, &recv_err)) - goto bail; - } - - /* save qp_num as part of ia_address, network order */ - tp->addr.ib.qpn = htonl(tp->qp->qp_num); - return 0; -bail: - dapl_log(DAPL_DBG_TYPE_ERR, - " ucm_create_services: ERR %s\n", strerror(errno)); - ucm_service_destroy(hca); - return -1; -} - -void ucm_async_event(struct dapl_hca *hca) -{ - struct ibv_async_event event; - struct _ib_hca_transport *tp = &hca->ib_trans; - - if (!ibv_get_async_event(hca->ib_hca_handle, &event)) { - - switch (event.event_type) { - case IBV_EVENT_CQ_ERR: - { - struct dapl_ep *evd_ptr = - event.element.cq->cq_context; - - dapl_log(DAPL_DBG_TYPE_ERR, - "dapl async_event CQ (%p) ERR %d\n", - evd_ptr, event.event_type); - - /* report up if async callback still setup */ - if (tp->async_cq_error) - tp->async_cq_error(hca->ib_hca_handle, - event.element.cq, - &event, (void *)evd_ptr); - break; - } - case IBV_EVENT_COMM_EST: - { - /* Received msgs on connected QP before RTU */ - dapl_log(DAPL_DBG_TYPE_UTIL, - " async_event COMM_EST(%p) rdata beat RTU\n", - event.element.qp); - - break; - } - case IBV_EVENT_QP_FATAL: - case IBV_EVENT_QP_REQ_ERR: - case IBV_EVENT_QP_ACCESS_ERR: - case IBV_EVENT_QP_LAST_WQE_REACHED: - case IBV_EVENT_SRQ_ERR: - case IBV_EVENT_SRQ_LIMIT_REACHED: - case IBV_EVENT_SQ_DRAINED: - { - struct dapl_ep *ep_ptr = - event.element.qp->qp_context; - - dapl_log(DAPL_DBG_TYPE_ERR, - "dapl async_event QP (%p) ERR %d\n", - ep_ptr, event.event_type); - - /* report up if async callback still setup */ - if (tp->async_qp_error) - tp->async_qp_error(hca->ib_hca_handle, - ep_ptr->qp_handle, - &event, (void *)ep_ptr); - break; - } - case IBV_EVENT_PATH_MIG: - case IBV_EVENT_PATH_MIG_ERR: - case IBV_EVENT_DEVICE_FATAL: - case IBV_EVENT_PORT_ACTIVE: - case IBV_EVENT_PORT_ERR: - case IBV_EVENT_LID_CHANGE: - case IBV_EVENT_PKEY_CHANGE: - case IBV_EVENT_SM_CHANGE: - { - dapl_log(DAPL_DBG_TYPE_WARN, - "dapl async_event: DEV ERR %d\n", - event.event_type); - - /* report up if async callback still setup */ - if (tp->async_unafiliated) - tp->async_unafiliated(hca->ib_hca_handle, - &event, - tp->async_un_ctx); - break; - } - case IBV_EVENT_CLIENT_REREGISTER: - /* no need to report this event this time */ - dapl_log(DAPL_DBG_TYPE_UTIL, - " async_event: IBV_CLIENT_REREGISTER\n"); - break; - - default: - dapl_log(DAPL_DBG_TYPE_WARN, - "dapl async_event: %d UNKNOWN\n", - event.event_type); - break; - - } - ibv_ack_async_event(&event); - } -} - +/* + * Copyright (c) 2009 Intel Corporation. All rights reserved. + * + * This Software is licensed under one of the following licenses: + * + * 1) under the terms of the "Common Public License 1.0" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/cpl.php. + * + * 2) under the terms of the "The BSD License" a copy of which is + * available from the Open Source Initiative, see + * http://www.opensource.org/licenses/bsd-license.php. + * + * 3) under the terms of the "GNU General Public License (GPL) Version 2" a + * copy of which is available from the Open Source Initiative, see + * http://www.opensource.org/licenses/gpl-license.php. + * + * Licensee has the right to choose one of the above licenses. + * + * Redistributions of source code must retain the above copyright + * notice and one of the license notices. + * + * Redistributions in binary form must reproduce both the above copyright + * notice, one of the license notices in the documentation + * and/or other materials provided with the distribution. + */ + +#include "openib_osd.h" +#include "dapl.h" +#include "dapl_adapter_util.h" +#include "dapl_ib_util.h" +#include "dapl_osd.h" + +#include + +static void ucm_service_destroy(IN DAPL_HCA *hca); +static int ucm_service_create(IN DAPL_HCA *hca); + +#if defined (_WIN32) +#include + +static int32_t create_os_signal(IN DAPL_HCA * hca_ptr) +{ + return CompSetInit(&hca_ptr->ib_trans.signal.set); +} + +static void destroy_os_signal(IN DAPL_HCA * hca_ptr) +{ + CompSetCleanup(&hca_ptr->ib_trans.signal.set); +} + +static int dapls_config_verbs(struct ibv_context *verbs) +{ + verbs->channel.Milliseconds = 0; + return 0; +} + +static int dapls_config_comp_channel(struct ibv_comp_channel *channel) +{ + channel->comp_channel.Milliseconds = 0; + return 0; +} + +#else // _WIN32 + +static int32_t create_os_signal(IN DAPL_HCA * hca_ptr) +{ + DAPL_SOCKET listen_socket; + struct sockaddr_in addr; + socklen_t addrlen = sizeof(addr); + int ret; + + listen_socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (listen_socket == DAPL_INVALID_SOCKET) + return 1; + + memset(&addr, 0, sizeof addr); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(0x7f000001); + ret = bind(listen_socket, (struct sockaddr *)&addr, sizeof addr); + if (ret) + goto err1; + + ret = getsockname(listen_socket, (struct sockaddr *)&addr, &addrlen); + if (ret) + goto err1; + + ret = listen(listen_socket, 0); + if (ret) + goto err1; + + hca_ptr->ib_trans.signal.scm[1] = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (hca_ptr->ib_trans.signal.scm[1] == DAPL_INVALID_SOCKET) + goto err1; + + ret = connect(hca_ptr->ib_trans.signal.scm[1], + (struct sockaddr *)&addr, sizeof(addr)); + if (ret) + goto err2; + + hca_ptr->ib_trans.signal.scm[0] = accept(listen_socket, NULL, NULL); + if (hca_ptr->ib_trans.signal.scm[0] == DAPL_INVALID_SOCKET) + goto err2; + + closesocket(listen_socket); + return 0; + + err2: + closesocket(hca_ptr->ib_trans.signal.scm[1]); + err1: + closesocket(listen_socket); + return 1; +} + +static void destroy_os_signal(IN DAPL_HCA * hca_ptr) +{ + closesocket(hca_ptr->ib_trans.signal.scm[0]); + closesocket(hca_ptr->ib_trans.signal.scm[1]); +} + +static int dapls_config_fd(int fd) +{ + int opts; + + opts = fcntl(fd, F_GETFL); + if (opts < 0 || fcntl(fd, F_SETFL, opts | O_NONBLOCK) < 0) { + dapl_log(DAPL_DBG_TYPE_ERR, + " dapls_config_fd: fcntl on fd %d ERR %d %s\n", + fd, opts, strerror(errno)); + return errno; + } + + return 0; +} + +static int dapls_config_verbs(struct ibv_context *verbs) +{ + return dapls_config_fd(verbs->async_fd); +} + +static int dapls_config_comp_channel(struct ibv_comp_channel *channel) +{ + return dapls_config_fd(channel->fd); +} + +#endif + +/* + * dapls_ib_init, dapls_ib_release + * + * Initialize Verb related items for device open + * + * Input: + * none + * + * Output: + * none + * + * Returns: + * 0 success, -1 error + * + */ +int32_t dapls_ib_init(void) +{ + return 0; +} + +int32_t dapls_ib_release(void) +{ + return 0; +} + +/* + * dapls_ib_open_hca + * + * Open HCA + * + * Input: + * *hca_name pointer to provider device name + * *ib_hca_handle_p pointer to provide HCA handle + * + * Output: + * none + * + * Return: + * DAT_SUCCESS + * dapl_convert_errno + * + */ +DAT_RETURN dapls_ib_open_hca(IN IB_HCA_NAME hca_name, IN DAPL_HCA * hca_ptr) +{ + struct ibv_device **dev_list; + struct ibv_port_attr port_attr; + int i; + DAT_RETURN dat_status; + + /* Get list of all IB devices, find match, open */ + dev_list = ibv_get_device_list(NULL); + if (!dev_list) { + dapl_dbg_log(DAPL_DBG_TYPE_ERR, + " open_hca: ibv_get_device_list() failed\n", + hca_name); + return DAT_INTERNAL_ERROR; + } + + for (i = 0; dev_list[i]; ++i) { + hca_ptr->ib_trans.ib_dev = dev_list[i]; + if (!strcmp(ibv_get_device_name(hca_ptr->ib_trans.ib_dev), + hca_name)) + goto found; + } + + dapl_log(DAPL_DBG_TYPE_ERR, + " open_hca: device %s not found\n", hca_name); + goto err; + +found: + + hca_ptr->ib_hca_handle = ibv_open_device(hca_ptr->ib_trans.ib_dev); + if (!hca_ptr->ib_hca_handle) { + dapl_log(DAPL_DBG_TYPE_ERR, + " open_hca: dev open failed for %s, err=%s\n", + ibv_get_device_name(hca_ptr->ib_trans.ib_dev), + strerror(errno)); + goto err; + } + hca_ptr->ib_trans.ib_ctx = hca_ptr->ib_hca_handle; + dapls_config_verbs(hca_ptr->ib_hca_handle); + + /* get lid for this hca-port, network order */ + if (ibv_query_port(hca_ptr->ib_hca_handle, + (uint8_t)hca_ptr->port_num, &port_attr)) { + dapl_log(DAPL_DBG_TYPE_ERR, + " open_hca: get lid ERR for %s, err=%s\n", + ibv_get_device_name(hca_ptr->ib_trans.ib_dev), + strerror(errno)); + goto err; + } else { + hca_ptr->ib_trans.addr.ib.lid = htons(port_attr.lid); + hca_ptr->ib_trans.lid = htons(port_attr.lid); + } + + /* get gid for this hca-port, network order */ + if (ibv_query_gid(hca_ptr->ib_hca_handle, + (uint8_t) hca_ptr->port_num, 0, + (union ibv_gid *)&hca_ptr->ib_trans.addr.ib.gid)) { + dapl_log(DAPL_DBG_TYPE_ERR, + " open_hca: query GID ERR for %s, err=%s\n", + ibv_get_device_name(hca_ptr->ib_trans.ib_dev), + strerror(errno)); + goto err; + } + + /* set RC tunables via enviroment or default */ + hca_ptr->ib_trans.max_inline_send = + dapl_os_get_env_val("DAPL_MAX_INLINE", INLINE_SEND_IB_DEFAULT); + hca_ptr->ib_trans.ack_retry = + dapl_os_get_env_val("DAPL_ACK_RETRY", DCM_ACK_RETRY); + hca_ptr->ib_trans.ack_timer = + dapl_os_get_env_val("DAPL_ACK_TIMER", DCM_ACK_TIMER); + hca_ptr->ib_trans.rnr_retry = + dapl_os_get_env_val("DAPL_RNR_RETRY", DCM_RNR_RETRY); + hca_ptr->ib_trans.rnr_timer = + dapl_os_get_env_val("DAPL_RNR_TIMER", DCM_RNR_TIMER); + hca_ptr->ib_trans.global = + dapl_os_get_env_val("DAPL_GLOBAL_ROUTING", DCM_GLOBAL); + hca_ptr->ib_trans.hop_limit = + dapl_os_get_env_val("DAPL_HOP_LIMIT", DCM_HOP_LIMIT); + hca_ptr->ib_trans.tclass = + dapl_os_get_env_val("DAPL_TCLASS", DCM_TCLASS); + hca_ptr->ib_trans.mtu = + dapl_ib_mtu(dapl_os_get_env_val("DAPL_IB_MTU", DCM_IB_MTU)); + + /* initialize CM list, LISTEN, SND queue, PSP array, locks */ + if ((dapl_os_lock_init(&hca_ptr->ib_trans.lock)) != DAT_SUCCESS) + goto err; + + if ((dapl_os_lock_init(&hca_ptr->ib_trans.llock)) != DAT_SUCCESS) + goto err; + + if ((dapl_os_lock_init(&hca_ptr->ib_trans.slock)) != DAT_SUCCESS) + goto err; + + if ((dapl_os_lock_init(&hca_ptr->ib_trans.plock)) != DAT_SUCCESS) + goto err; + + /* EVD events without direct CQ channels, CNO support */ + hca_ptr->ib_trans.ib_cq = + ibv_create_comp_channel(hca_ptr->ib_hca_handle); + if (hca_ptr->ib_trans.ib_cq == NULL) { + dapl_log(DAPL_DBG_TYPE_ERR, + " open_hca: ibv_create_comp_channel ERR %s\n", + strerror(errno)); + goto bail; + } + dapls_config_comp_channel(hca_ptr->ib_trans.ib_cq); + + /* initialize CM and listen lists on this HCA uCM QP */ + dapl_llist_init_head(&hca_ptr->ib_trans.list); + dapl_llist_init_head(&hca_ptr->ib_trans.llist); + + /* create uCM qp services */ + if (ucm_service_create(hca_ptr)) + goto bail; + + if (create_os_signal(hca_ptr)) { + dapl_log(DAPL_DBG_TYPE_ERR, + " open_hca: failed to init cr pipe - %s\n", + strerror(errno)); + goto bail; + } + + /* create thread to process inbound connect request */ + hca_ptr->ib_trans.cm_state = IB_THREAD_INIT; + dat_status = dapl_os_thread_create(cm_thread, + (void *)hca_ptr, + &hca_ptr->ib_trans.thread); + if (dat_status != DAT_SUCCESS) { + dapl_log(DAPL_DBG_TYPE_ERR, + " open_hca: failed to create thread\n"); + goto bail; + } + + dapl_dbg_log(DAPL_DBG_TYPE_UTIL, + " open_hca: devname %s, ctx %p port %d, hostname_IP %s\n", + ibv_get_device_name(hca_ptr->ib_trans.ib_dev), + hca_ptr->ib_hca_handle, + hca_ptr->port_num, + inet_ntoa(((struct sockaddr_in *) + &hca_ptr->hca_address)->sin_addr)); + dapl_dbg_log(DAPL_DBG_TYPE_UTIL, + " open_hca: QPN 0x%x LID 0x%x GID Subnet 0x" F64x "" + " ID 0x" F64x "\n", + ntohl(hca_ptr->ib_trans.addr.ib.qpn), + ntohs(hca_ptr->ib_trans.addr.ib.lid), + (unsigned long long) + ntohll(*(uint64_t*)&hca_ptr->ib_trans.addr.ib.gid[0]), + (unsigned long long) + ntohll(*(uint64_t*)&hca_ptr->ib_trans.addr.ib.gid[8])); + + /* save LID, GID, QPN, PORT address information, for ia_queries */ + /* Set AF_INET6 to insure callee address storage of 28 bytes */ + hca_ptr->ib_trans.hca = hca_ptr; + hca_ptr->ib_trans.addr.ib.family = AF_INET6; + hca_ptr->ib_trans.addr.ib.qp_type = IBV_QPT_UD; + memcpy(&hca_ptr->hca_address, + &hca_ptr->ib_trans.addr, + sizeof(union dcm_addr)); + + ibv_free_device_list(dev_list); + + /* wait for cm_thread */ + while (hca_ptr->ib_trans.cm_state != IB_THREAD_RUN) + dapl_os_sleep_usec(1000); + + return dat_status; + +bail: + ucm_service_destroy(hca_ptr); + ibv_close_device(hca_ptr->ib_hca_handle); + hca_ptr->ib_hca_handle = IB_INVALID_HANDLE; + +err: + ibv_free_device_list(dev_list); + return DAT_INTERNAL_ERROR; +} + +/* + * dapls_ib_close_hca + * + * Open HCA + * + * Input: + * DAPL_HCA provide CA handle + * + * Output: + * none + * + * Return: + * DAT_SUCCESS + * dapl_convert_errno + * + */ +DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA * hca_ptr) +{ + dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " close_hca: %p\n", hca_ptr); + + if (hca_ptr->ib_trans.cm_state == IB_THREAD_RUN) { + hca_ptr->ib_trans.cm_state = IB_THREAD_CANCEL; + dapls_thread_signal(&hca_ptr->ib_trans.signal); + while (hca_ptr->ib_trans.cm_state != IB_THREAD_EXIT) { + dapl_dbg_log(DAPL_DBG_TYPE_UTIL, + " close_hca: waiting for cr_thread\n"); + dapls_thread_signal(&hca_ptr->ib_trans.signal); + dapl_os_sleep_usec(1000); + } + } + + dapl_os_lock_destroy(&hca_ptr->ib_trans.lock); + dapl_os_lock_destroy(&hca_ptr->ib_trans.llock); + destroy_os_signal(hca_ptr); + ucm_service_destroy(hca_ptr); + + if (hca_ptr->ib_trans.ib_cq) + ibv_destroy_comp_channel(hca_ptr->ib_trans.ib_cq); + + if (hca_ptr->ib_trans.ib_cq_empty) { + struct ibv_comp_channel *channel; + channel = hca_ptr->ib_trans.ib_cq_empty->channel; + ibv_destroy_cq(hca_ptr->ib_trans.ib_cq_empty); + ibv_destroy_comp_channel(channel); + } + + if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) { + if (ibv_close_device(hca_ptr->ib_hca_handle)) + return (dapl_convert_errno(errno, "ib_close_device")); + hca_ptr->ib_hca_handle = IB_INVALID_HANDLE; + } + + return (DAT_SUCCESS); +} + +/* Create uCM endpoint services, allocate remote_ah's array */ +static void ucm_service_destroy(IN DAPL_HCA *hca) +{ + ib_hca_transport_t *tp = &hca->ib_trans; + int msg_size = sizeof(ib_cm_msg_t); + + if (tp->mr_sbuf) + ibv_dereg_mr(tp->mr_sbuf); + + if (tp->mr_rbuf) + ibv_dereg_mr(tp->mr_rbuf); + + if (tp->qp) + ibv_destroy_qp(tp->qp); + + if (tp->scq) + ibv_destroy_cq(tp->scq); + + if (tp->rcq) + ibv_destroy_cq(tp->rcq); + + if (tp->rch) + ibv_destroy_comp_channel(tp->rch); + + if (tp->ah) { + int i; + + for (i = 0;i < 0xffff; i++) { + if (tp->ah[i]) + ibv_destroy_ah(tp->ah[i]); + } + dapl_os_free(tp->ah, (sizeof(*tp->ah) * 0xffff)); + } + + if (tp->pd) + ibv_dealloc_pd(tp->pd); + + if (tp->sid) + dapl_os_free(tp->sid, (sizeof(*tp->sid) * 0xffff)); + + if (tp->rbuf) + dapl_os_free(tp->rbuf, (msg_size * tp->qpe)); + + if (tp->sbuf) + dapl_os_free(tp->sbuf, (msg_size * tp->qpe)); +} + +static int ucm_service_create(IN DAPL_HCA *hca) +{ + struct ibv_qp_init_attr qp_create; + ib_hca_transport_t *tp = &hca->ib_trans; + struct ibv_recv_wr recv_wr, *recv_err; + struct ibv_sge sge; + int i, mlen = sizeof(ib_cm_msg_t); + int hlen = sizeof(struct ibv_grh); /* hdr included with UD recv */ + + dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " ucm_create: \n"); + + /* setup CM timers and queue sizes */ + tp->retries = dapl_os_get_env_val("DAPL_UCM_RETRY", DCM_RETRY_CNT); + tp->rep_time = dapl_os_get_env_val("DAPL_UCM_REP_TIME", DCM_REP_TIME); + tp->rtu_time = dapl_os_get_env_val("DAPL_UCM_RTU_TIME", DCM_RTU_TIME); + tp->cm_timer = DAPL_MIN(tp->rep_time,tp->rtu_time); + tp->qpe = dapl_os_get_env_val("DAPL_UCM_QP_SIZE", DCM_QP_SIZE); + tp->cqe = dapl_os_get_env_val("DAPL_UCM_CQ_SIZE", DCM_CQ_SIZE); + tp->burst = dapl_os_get_env_val("DAPL_UCM_TX_BURST", DCM_TX_BURST); + tp->pd = ibv_alloc_pd(hca->ib_hca_handle); + if (!tp->pd) + goto bail; + + dapl_log(DAPL_DBG_TYPE_UTIL, + " create_service: pd %p ctx %p handle 0x%x\n", + tp->pd, tp->pd->context, tp->pd->handle); + + tp->rch = ibv_create_comp_channel(hca->ib_hca_handle); + if (!tp->rch) + goto bail; + + tp->scq = ibv_create_cq(hca->ib_hca_handle, tp->cqe, hca, NULL, 0); + if (!tp->scq) + goto bail; + + tp->rcq = ibv_create_cq(hca->ib_hca_handle, tp->cqe, hca, tp->rch, 0); + if (!tp->rcq) + goto bail; + + if(ibv_req_notify_cq(tp->rcq, 0)) + goto bail; + + dapl_os_memzero((void *)&qp_create, sizeof(qp_create)); + qp_create.qp_type = IBV_QPT_UD; + qp_create.send_cq = tp->scq; + qp_create.recv_cq = tp->rcq; + qp_create.cap.max_send_wr = qp_create.cap.max_recv_wr = tp->qpe; + qp_create.cap.max_send_sge = qp_create.cap.max_recv_sge = 1; + qp_create.cap.max_inline_data = tp->max_inline_send; + qp_create.qp_context = (void *)hca; + + tp->qp = ibv_create_qp(tp->pd, &qp_create); + if (!tp->qp) + goto bail; + + tp->ah = (ib_ah_handle_t*) dapl_os_alloc(sizeof(ib_ah_handle_t) * 0xffff); + tp->sid = (uint8_t*) dapl_os_alloc(sizeof(uint8_t) * 0xffff); + tp->rbuf = (void*) dapl_os_alloc((mlen + hlen) * tp->qpe); + tp->sbuf = (void*) dapl_os_alloc(mlen * tp->qpe); + tp->s_hd = tp->s_tl = 0; + + if (!tp->ah || !tp->rbuf || !tp->sbuf || !tp->sid) + goto bail; + + (void)dapl_os_memzero(tp->ah, (sizeof(ib_ah_handle_t) * 0xffff)); + (void)dapl_os_memzero(tp->sid, (sizeof(uint8_t) * 0xffff)); + tp->sid[0] = 1; /* resv slot 0, 0 == no ports available */ + (void)dapl_os_memzero(tp->rbuf, ((mlen + hlen) * tp->qpe)); + (void)dapl_os_memzero(tp->sbuf, (mlen * tp->qpe)); + + tp->mr_sbuf = ibv_reg_mr(tp->pd, tp->sbuf, + (mlen * tp->qpe), + IBV_ACCESS_LOCAL_WRITE); + if (!tp->mr_sbuf) + goto bail; + + tp->mr_rbuf = ibv_reg_mr(tp->pd, tp->rbuf, + ((mlen + hlen) * tp->qpe), + IBV_ACCESS_LOCAL_WRITE); + if (!tp->mr_rbuf) + goto bail; + + /* modify UD QP: init, rtr, rts */ + if ((dapls_modify_qp_ud(hca, tp->qp)) != DAT_SUCCESS) + goto bail; + + /* post receive buffers, setup head, tail pointers */ + recv_wr.next = NULL; + recv_wr.sg_list = &sge; + recv_wr.num_sge = 1; + sge.length = mlen + hlen; + sge.lkey = tp->mr_rbuf->lkey; + + for (i = 0; i < tp->qpe; i++) { + recv_wr.wr_id = + (uintptr_t)((char *)&tp->rbuf[i] + + sizeof(struct ibv_grh)); + sge.addr = (uintptr_t) &tp->rbuf[i]; + if (ibv_post_recv(tp->qp, &recv_wr, &recv_err)) + goto bail; + } + + /* save qp_num as part of ia_address, network order */ + tp->addr.ib.qpn = htonl(tp->qp->qp_num); + return 0; +bail: + dapl_log(DAPL_DBG_TYPE_ERR, + " ucm_create_services: ERR %s\n", strerror(errno)); + ucm_service_destroy(hca); + return -1; +} + +void ucm_async_event(struct dapl_hca *hca) +{ + struct ibv_async_event event; + struct _ib_hca_transport *tp = &hca->ib_trans; + + if (!ibv_get_async_event(hca->ib_hca_handle, &event)) { + + switch (event.event_type) { + case IBV_EVENT_CQ_ERR: + { + struct dapl_ep *evd_ptr = + event.element.cq->cq_context; + + dapl_log(DAPL_DBG_TYPE_ERR, + "dapl async_event CQ (%p) ERR %d\n", + evd_ptr, event.event_type); + + /* report up if async callback still setup */ + if (tp->async_cq_error) + tp->async_cq_error(hca->ib_hca_handle, + event.element.cq, + &event, (void *)evd_ptr); + break; + } + case IBV_EVENT_COMM_EST: + { + /* Received msgs on connected QP before RTU */ + dapl_log(DAPL_DBG_TYPE_UTIL, + " async_event COMM_EST(%p) rdata beat RTU\n", + event.element.qp); + + break; + } + case IBV_EVENT_QP_FATAL: + case IBV_EVENT_QP_REQ_ERR: + case IBV_EVENT_QP_ACCESS_ERR: + case IBV_EVENT_QP_LAST_WQE_REACHED: + case IBV_EVENT_SRQ_ERR: + case IBV_EVENT_SRQ_LIMIT_REACHED: + case IBV_EVENT_SQ_DRAINED: + { + struct dapl_ep *ep_ptr = + event.element.qp->qp_context; + + dapl_log(DAPL_DBG_TYPE_ERR, + "dapl async_event QP (%p) ERR %d\n", + ep_ptr, event.event_type); + + /* report up if async callback still setup */ + if (tp->async_qp_error) + tp->async_qp_error(hca->ib_hca_handle, + ep_ptr->qp_handle, + &event, (void *)ep_ptr); + break; + } + case IBV_EVENT_PATH_MIG: + case IBV_EVENT_PATH_MIG_ERR: + case IBV_EVENT_DEVICE_FATAL: + case IBV_EVENT_PORT_ACTIVE: + case IBV_EVENT_PORT_ERR: + case IBV_EVENT_LID_CHANGE: + case IBV_EVENT_PKEY_CHANGE: + case IBV_EVENT_SM_CHANGE: + { + dapl_log(DAPL_DBG_TYPE_WARN, + "dapl async_event: DEV ERR %d\n", + event.event_type); + + /* report up if async callback still setup */ + if (tp->async_unafiliated) + tp->async_unafiliated(hca->ib_hca_handle, + &event, + tp->async_un_ctx); + break; + } + case IBV_EVENT_CLIENT_REREGISTER: + /* no need to report this event this time */ + dapl_log(DAPL_DBG_TYPE_UTIL, + " async_event: IBV_CLIENT_REREGISTER\n"); + break; + + default: + dapl_log(DAPL_DBG_TYPE_WARN, + "dapl async_event: %d UNKNOWN\n", + event.event_type); + break; + + } + ibv_ack_async_event(&event); + } +} + -- 2.41.0