From: stansmith Date: Mon, 3 Mar 2008 19:01:39 +0000 (+0000) Subject: made a copy X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=bf0314673d2e416ab8bf2e299ec44321dfe0be40;p=~shefty%2Frdma-win.git made a copy git-svn-id: svn://openib.tc.cornell.edu/gen1@964 ad392aa1-c5ef-ae45-8dd8-e69d62a5ef86 --- diff --git a/branches/IBFD/hw/dirs b/branches/IBFD/hw/dirs new file mode 100644 index 00000000..5905f6c2 --- /dev/null +++ b/branches/IBFD/hw/dirs @@ -0,0 +1,2 @@ +DIRS=\ + mthca diff --git a/branches/IBFD/hw/mt23108/dirs b/branches/IBFD/hw/mt23108/dirs new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/kernel/Makefile b/branches/IBFD/hw/mt23108/kernel/Makefile new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/kernel/SOURCES b/branches/IBFD/hw/mt23108/kernel/SOURCES new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/kernel/hca.rc b/branches/IBFD/hw/mt23108/kernel/hca.rc new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/kernel/hca_data.c b/branches/IBFD/hw/mt23108/kernel/hca_data.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/kernel/hca_data.h b/branches/IBFD/hw/mt23108/kernel/hca_data.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/kernel/hca_debug.h b/branches/IBFD/hw/mt23108/kernel/hca_debug.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/kernel/hca_debug.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/kernel/hca_direct.c b/branches/IBFD/hw/mt23108/kernel/hca_direct.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/kernel/hca_driver.c b/branches/IBFD/hw/mt23108/kernel/hca_driver.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/kernel/hca_driver.h b/branches/IBFD/hw/mt23108/kernel/hca_driver.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/kernel/hca_mcast.c b/branches/IBFD/hw/mt23108/kernel/hca_mcast.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/kernel/hca_memory.c b/branches/IBFD/hw/mt23108/kernel/hca_memory.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/kernel/hca_memory.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/kernel/hca_smp.c b/branches/IBFD/hw/mt23108/kernel/hca_smp.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/kernel/hca_verbs.c b/branches/IBFD/hw/mt23108/kernel/hca_verbs.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/kernel/infinihost.inf b/branches/IBFD/hw/mt23108/kernel/infinihost.inf new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/user/Makefile b/branches/IBFD/hw/mt23108/user/Makefile new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/user/SOURCES b/branches/IBFD/hw/mt23108/user/SOURCES new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/user/hca_data.h b/branches/IBFD/hw/mt23108/user/hca_data.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/user/mlnx_ual_av.c b/branches/IBFD/hw/mt23108/user/mlnx_ual_av.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/user/mlnx_ual_av.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/user/mlnx_ual_ca.c b/branches/IBFD/hw/mt23108/user/mlnx_ual_ca.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/user/mlnx_ual_ca.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/user/mlnx_ual_cq.c b/branches/IBFD/hw/mt23108/user/mlnx_ual_cq.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/user/mlnx_ual_cq.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/user/mlnx_ual_main.c b/branches/IBFD/hw/mt23108/user/mlnx_ual_main.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/user/mlnx_ual_main.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/user/mlnx_ual_main.h b/branches/IBFD/hw/mt23108/user/mlnx_ual_main.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/user/mlnx_ual_mcast.c b/branches/IBFD/hw/mt23108/user/mlnx_ual_mcast.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/user/mlnx_ual_mrw.c b/branches/IBFD/hw/mt23108/user/mlnx_ual_mrw.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/user/mlnx_ual_osbypass.c b/branches/IBFD/hw/mt23108/user/mlnx_ual_osbypass.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/user/mlnx_ual_pd.c b/branches/IBFD/hw/mt23108/user/mlnx_ual_pd.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/user/mlnx_ual_qp.c b/branches/IBFD/hw/mt23108/user/mlnx_ual_qp.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/user/mlnx_ual_qp.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/user/uvpd.rc b/branches/IBFD/hw/mt23108/user/uvpd.rc new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/user/uvpd_exports.src b/branches/IBFD/hw/mt23108/user/uvpd_exports.src new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hh.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hh.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hh.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hh.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hh.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hh_common.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hh_common.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hh_common.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hh_common.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hh_init.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hh_init.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hh_rx_stub.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hh_rx_stub.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hh_rx_stub.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hh_stub_defines.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hh_stub_defines.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hh_tx_stub.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hh_tx_stub.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hh_tx_stub.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hh_tx_stub_defines.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hh_tx_stub_defines.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hhenosys.ic b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hhenosys.ic new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hhenosys.ic @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hhul.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hhul.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hhul.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hhul.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hhul.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hhul_obj.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hhul_obj.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hhul_stub.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hhul_stub.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hhul_stub.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hhulenosys.ic b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hhulenosys.ic new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/hhulenosys.ic @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/invalid.ic b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/invalid.ic new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/invalid.ic @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/rx_stub.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/rx_stub.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/rx_stub.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/cmdif/cmd_types.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/cmdif/cmd_types.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/cmdif/cmd_types.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/cmdif/cmdif.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/cmdif/cmdif.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/cmdif/cmdif.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/cmdif/cmdif.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/cmdif/cmdif_priv.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/cmdif/cmdif_priv.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/cmdif/cmds_wrap.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/cmdif/cmds_wrap.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/ddrmm/tddrmm.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/ddrmm/tddrmm.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/ddrmm/tddrmm.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/ddrmm/tddrmm.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/eventp/event_irqh.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/eventp/event_irqh.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/eventp/eventp.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/eventp/eventp.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/eventp/eventp.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/eventp/eventp.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/eventp/eventp.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/eventp/eventp_priv.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/eventp/eventp_priv.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/mcgm/mcgm.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/mcgm/mcgm.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/mcgm/mcgm.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/mcgm/mcgm.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/mrwm/tmrwm.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/mrwm/tmrwm.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/mrwm/tmrwm.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/mrwm/tmrwm.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/os_dep/win/thh_kl.def b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/os_dep/win/thh_kl.def new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/os_dep/win/thh_kl.def @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/os_dep/win/thh_mod_obj.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/os_dep/win/thh_mod_obj.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/os_dep/win/thh_mod_obj.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/os_dep/win/thh_mod_obj.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/os_dep/win/thh_mod_obj.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/os_dep/win/thhul_kl.def b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/os_dep/win/thhul_kl.def new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/os_dep/win/thhul_kl.def @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/os_dep/win/thhul_mod_obj.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/os_dep/win/thhul_mod_obj.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/os_dep/win/thhul_mod_obj.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_common.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_common.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_cqm/tcqm.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_cqm/tcqm.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_cqm/tcqm.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_cqm/tcqm.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_default_profile.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_default_profile.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_hob/thh_hob.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_hob/thh_hob.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_hob/thh_hob.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_hob/thh_hob.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_hob/thh_hob_priv.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_hob/thh_hob_priv.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_init.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_init.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_init.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_init.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_init.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_qpm/tqpm.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_qpm/tqpm.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_qpm/tqpm.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_qpm/tqpm.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_requested_profile.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_requested_profile.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_srqm/thh_srqm.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_srqm/thh_srqm.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_srqm/thh_srqm.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_srqm/thh_srqm.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thh_srqm/thh_srqm.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_cqm/thhul_cqm.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_cqm/thhul_cqm.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_cqm/thhul_cqm.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_cqm/thhul_cqm.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_cqm/thhul_cqm.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_hob/thhul_hob.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_hob/thhul_hob.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_hob/thhul_hob.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_hob/thhul_hob.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_hob/thhul_hob.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_mwm/thhul_mwm.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_mwm/thhul_mwm.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_mwm/thhul_mwm.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_mwm/thhul_mwm.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_mwm/thhul_mwm.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_pdm/thhul_pdm.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_pdm/thhul_pdm.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_pdm/thhul_pdm.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_pdm/thhul_pdm.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_pdm/thhul_pdm.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_pdm/thhul_pdm_priv.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_pdm/thhul_pdm_priv.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_qpm/thhul_qpm.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_qpm/thhul_qpm.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_qpm/thhul_qpm.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_qpm/thhul_qpm.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_qpm/thhul_qpm.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_qpm/thhul_qpm_ibal.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_qpm/thhul_qpm_ibal.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_qpm/thhul_qpm_ibal.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_srqm/thhul_srqm.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_srqm/thhul_srqm.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_srqm/thhul_srqm.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_srqm/thhul_srqm.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/thhul_srqm/thhul_srqm.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/uar/uar.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/uar/uar.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/uar/uar.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/uar/uar.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/udavm/udavm.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/udavm/udavm.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/udavm/udavm.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/udavm/udavm.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/udavm/udavm.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/uldm/thh_uldm.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/uldm/thh_uldm.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/uldm/thh_uldm.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/uldm/thh_uldm.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/uldm/thh_uldm.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/uldm/thh_uldm_priv.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/uldm/thh_uldm_priv.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/util/epool.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/util/epool.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/util/epool.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/util/epool.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/util/extbuddy.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/util/extbuddy.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/util/extbuddy.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/util/extbuddy.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/util/sm_mad.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/util/sm_mad.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/util/sm_mad.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/util/sm_mad.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/util/tlog2.c b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/util/tlog2.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/util/tlog2.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/util/tlog2.h b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/tavor/util/tlog2.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/zombie.ic b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/zombie.ic new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/hcahal/zombie.ic @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/allocator.h b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/allocator.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/os_dep/win/vapi_common.def b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/os_dep/win/vapi_common.def new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/os_dep/win/vapi_common_kl.def b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/os_dep/win/vapi_common_kl.def new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/os_dep/win/vapi_mod_obj.c b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/os_dep/win/vapi_mod_obj.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/os_dep/win/vapi_mod_obj.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/os_dep/win/vip_imp.h b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/os_dep/win/vip_imp.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/os_dep/win/vip_imp.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vapi_common.c b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vapi_common.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vapi_common.h b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vapi_common.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_array.c b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_array.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_array.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_array.h b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_array.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_cirq.c b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_cirq.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_cirq.h b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_cirq.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_common.h b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_common.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_delay_unlock.c b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_delay_unlock.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_delay_unlock.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_delay_unlock.h b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_delay_unlock.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_delay_unlock.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_delay_unlock_priv.h b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_delay_unlock_priv.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_delay_unlock_priv.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_hash.c b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_hash.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_hash.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_hash.h b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_hash.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_hash.ic b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_hash.ic new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_hash.ic @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_hash.ih b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_hash.ih new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_hash.ih @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_hash64p.h b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_hash64p.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_hashp.h b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_hashp.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_hashp2p.h b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_hashp2p.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_hashv4p.h b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_hashv4p.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/common/vip_hashv4p.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/verbs/evapi.h b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/evapi.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/verbs/vapi.h b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/vapi.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/vapi.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/verbs/vapi_features.h b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/vapi_features.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/Hca/verbs/vapi_types.h b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/vapi_types.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/Hca/verbs/vapi_types.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/dirs b/branches/IBFD/hw/mt23108/vapi/dirs new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/kernel/Makefile b/branches/IBFD/hw/mt23108/vapi/kernel/Makefile new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/kernel/SOURCES b/branches/IBFD/hw/mt23108/vapi/kernel/SOURCES new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/kernel/hh_kl_sources.c b/branches/IBFD/hw/mt23108/vapi/kernel/hh_kl_sources.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/kernel/mdmsg.h b/branches/IBFD/hw/mt23108/vapi/kernel/mdmsg.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/kernel/mdmsg.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/kernel/mosal_kl_sources.c b/branches/IBFD/hw/mt23108/vapi/kernel/mosal_kl_sources.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/kernel/mpga_kl_sources.c b/branches/IBFD/hw/mt23108/vapi/kernel/mpga_kl_sources.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/kernel/mt23108.def b/branches/IBFD/hw/mt23108/vapi/kernel/mt23108.def new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/kernel/mt23108.rc b/branches/IBFD/hw/mt23108/vapi/kernel/mt23108.rc new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/kernel/mtl_common_kl_sources.c b/branches/IBFD/hw/mt23108/vapi/kernel/mtl_common_kl_sources.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/kernel/tdriver_sources.c b/branches/IBFD/hw/mt23108/vapi/kernel/tdriver_sources.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/kernel/thh_kl_sources.c b/branches/IBFD/hw/mt23108/vapi/kernel/thh_kl_sources.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/kernel/thhul_kl_sources.c b/branches/IBFD/hw/mt23108/vapi/kernel/thhul_kl_sources.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/kernel/vapi_common_kl_sources.c b/branches/IBFD/hw/mt23108/vapi/kernel/vapi_common_kl_sources.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_gen.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_gen.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_gen_nos.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_gen_nos.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_i2c.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_i2c.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_iobuf.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_iobuf.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_k2u_cbk.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_k2u_cbk.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_k2u_cbk_priv.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_k2u_cbk_priv.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_mem.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_mem.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_mlock.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_mlock.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_prot_ctx.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_prot_ctx.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_que.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_que.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_sync.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_sync.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_thread.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_thread.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_thread.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_timer.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_timer.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosal_timer.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosalu_socket.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosalu_socket.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/mosalu_socket.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal.def b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal.def new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_arch.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_arch.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_bus.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_bus.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_bus.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_bus.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_bus.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_driver.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_driver.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_gen.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_gen.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_gen.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_gen_priv.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_gen_priv.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_iobuf.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_iobuf.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_iobuf.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_iobuf_imp.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_iobuf_imp.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_k2u_cbk.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_k2u_cbk.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_k2u_cbk.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_kl.def b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_kl.def new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_mem.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_mem.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_mem.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_mem_imp.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_mem_imp.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_mem_imp.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_mem_priv.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_mem_priv.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_mem_priv.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_mlock.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_mlock.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_mlock.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_mlock_priv.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_mlock_priv.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_ntddk.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_ntddk.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_ntddk.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_ntddk.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_ntddk.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_priv.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_priv.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_priv.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_prot_ctx_imp.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_prot_ctx_imp.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_que.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_que.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_que.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_que_priv.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_que_priv.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_sync.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_sync.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_sync.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_sync_imp.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_sync_imp.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_sync_priv.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_sync_priv.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_thread.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_thread.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_thread.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_thread_imp.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_thread_imp.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_thread_imp.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_timer.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_timer.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_timer.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_timer_imp.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_timer_imp.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_timer_imp.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_timer_priv.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_timer_priv.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_types.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_types.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_types.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_util.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_util.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_util.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_util.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_util.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosal_util.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosalu_driver.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosalu_driver.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosalu_k2u_cbk.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosalu_k2u_cbk.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosalu_k2u_cbk.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosalu_k2u_cbk.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosalu_k2u_cbk.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosalu_k2u_cbk.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosalu_mem.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosalu_mem.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosalu_mem.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosalu_socket.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosalu_socket.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosalu_socket.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosalu_socket_imp.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosalu_socket_imp.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosalu_socket_imp.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosalu_sync.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosalu_sync.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosalu_thread.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosalu_thread.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mosal/os_dep/win/mosalu_thread.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/MPGA_headers.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/MPGA_headers.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/ib_opcodes.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/ib_opcodes.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/internal_functions.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/internal_functions.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/internal_functions.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/internal_functions.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/mpga.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/mpga.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/mpga.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/mpga.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/mpga_sv.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/mpga_sv.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/mpga_sv.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/mpga_sv.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/nMPGA.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/nMPGA.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/nMPGA.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/nMPGA.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/nMPGA_packet_append.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/nMPGA_packet_append.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/nMPGA_packet_append.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/nMPGA_packet_append.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/os_dep/win/mpga.def b/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/os_dep/win/mpga.def new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/os_dep/win/mpga_driver.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/os_dep/win/mpga_driver.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/os_dep/win/mpga_driver.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/os_dep/win/mpga_kl.def b/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/os_dep/win/mpga_kl.def new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/packet_append.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/packet_append.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/packet_append.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/packet_append.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/packet_utilities.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/packet_utilities.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/packet_utilities.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/packet_utilities.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/packet_utilities.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/packet_utilities.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/ud_pack_fmt.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mpga/ud_pack_fmt.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_common/mtl_common.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_common/mtl_common.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_common/mtl_common.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_common/mtl_common.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_common/mtl_common.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_common/mtl_log.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_common/mtl_log.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_common/os_dep/win/mtl_common.def b/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_common/os_dep/win/mtl_common.def new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_common/os_dep/win/mtl_common_kl.def b/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_common/os_dep/win/mtl_common_kl.def new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_common/os_dep/win/mtl_common_kl.def @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_common/os_dep/win/mtl_log_win.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_common/os_dep/win/mtl_log_win.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/bit_ops.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/bit_ops.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/ib_defs.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/ib_defs.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/mtl_errno.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/mtl_errno.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/mtl_errno.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/mtl_pci_types.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/mtl_pci_types.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/mtl_pci_types.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/mtl_types.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/mtl_types.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/win/MdIoctl.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/win/MdIoctl.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/win/MdIoctlSpec.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/win/MdIoctlSpec.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/win/MdIoctlSpec.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/win/endian.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/win/endian.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/win/mtl_sys_defs.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/win/mtl_sys_defs.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/win/mtl_sys_defs.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/win/mtl_sys_types.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/win/mtl_sys_types.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/win/unistd.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/win/unistd.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/win/win/mtl_arch_types.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/mtl_types/win/win/mtl_arch_types.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/Md.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/Md.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/Md.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/Md.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/Md.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/Md.rc b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/Md.rc new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/Md.rc @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdCard.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdCard.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdConf.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdConf.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdConf.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdConf.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdConfPriv.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdConfPriv.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdCtl.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdCtl.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdCtl.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdCtl.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdCtl.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdCtl.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdDbg.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdDbg.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdDbg.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdDbg.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdDbg.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdDbg.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdGen.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdGen.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdGuid.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdGuid.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdGuid.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdIoctl.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdIoctl.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdIoctl.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdPci.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdPci.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdPci.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdPci.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdPci.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdPnp.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdPnp.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdPnp.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdPwr.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdPwr.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdPwr.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdRdWr.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdRdWr.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdRdWr.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdUtil.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdUtil.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdUtil.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdUtil.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MdUtil.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MddLib.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/MddLib.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/infinihost.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/infinihost.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/mdmsg/MdMsg.c b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/mdmsg/MdMsg.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/mdmsg/MdMsg.mc b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/mdmsg/MdMsg.mc new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/mdmsg/MdMsg.mc @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/resource.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/resource.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/tavor_csp.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/tavor_csp.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/version.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/version.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/mlxsys/os_dep/win/tdriver/version.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/mlxsys/tools/mtperf.h b/branches/IBFD/hw/mt23108/vapi/mlxsys/tools/mtperf.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/tavor_arch_db/MT23108.h b/branches/IBFD/hw/mt23108/vapi/tavor_arch_db/MT23108.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/tavor_arch_db/MT23108_PRM.h b/branches/IBFD/hw/mt23108/vapi/tavor_arch_db/MT23108_PRM.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/tavor_arch_db/MT23108_PRM_append.h b/branches/IBFD/hw/mt23108/vapi/tavor_arch_db/MT23108_PRM_append.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/tavor_arch_db/cr_types.h b/branches/IBFD/hw/mt23108/vapi/tavor_arch_db/cr_types.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/tavor_arch_db/cr_types.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/tavor_arch_db/tavor_dev_defs.h b/branches/IBFD/hw/mt23108/vapi/tavor_arch_db/tavor_dev_defs.h new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/tavor_arch_db/tavor_if_defs.h b/branches/IBFD/hw/mt23108/vapi/tavor_arch_db/tavor_if_defs.h new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/tavor_arch_db/tavor_if_defs.h @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/user/Makefile b/branches/IBFD/hw/mt23108/vapi/user/Makefile new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/user/SOURCES b/branches/IBFD/hw/mt23108/vapi/user/SOURCES new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/user/hh_ul_sources.c b/branches/IBFD/hw/mt23108/vapi/user/hh_ul_sources.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/user/hh_ul_sources.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/user/mosal_ul_sources.c b/branches/IBFD/hw/mt23108/vapi/user/mosal_ul_sources.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/user/mpga_ul_sources.c b/branches/IBFD/hw/mt23108/vapi/user/mpga_ul_sources.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/user/mpga_ul_sources.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/user/mtl_common_ul_sources.c b/branches/IBFD/hw/mt23108/vapi/user/mtl_common_ul_sources.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mt23108/vapi/user/thhul_ul_sources.c b/branches/IBFD/hw/mt23108/vapi/user/thhul_ul_sources.c new file mode 100644 index 00000000..d3f5a12f --- /dev/null +++ b/branches/IBFD/hw/mt23108/vapi/user/thhul_ul_sources.c @@ -0,0 +1 @@ + diff --git a/branches/IBFD/hw/mt23108/vapi/user/vapi_common_ul_sources.c b/branches/IBFD/hw/mt23108/vapi/user/vapi_common_ul_sources.c new file mode 100644 index 00000000..e69de29b diff --git a/branches/IBFD/hw/mthca/dirs b/branches/IBFD/hw/mthca/dirs new file mode 100644 index 00000000..aa698135 --- /dev/null +++ b/branches/IBFD/hw/mthca/dirs @@ -0,0 +1,3 @@ +DIRS=\ + kernel \ + user diff --git a/branches/IBFD/hw/mthca/hca_utils.c b/branches/IBFD/hw/mthca/hca_utils.c new file mode 100644 index 00000000..a86b5ab5 --- /dev/null +++ b/branches/IBFD/hw/mthca/hca_utils.c @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + + +#include "mthca_dev.h" + + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "hca_data.tmh" +#endif + + +mthca_qp_access_t +map_qp_ibal_acl( + IN ib_access_t ibal_acl) +{ +#define IBAL_ACL(ifl,mfl) if (ibal_acl & ifl) mthca_acl |= mfl + mthca_qp_access_t mthca_acl = 0; + + IBAL_ACL(IB_AC_RDMA_READ,MTHCA_ACCESS_REMOTE_READ); + IBAL_ACL(IB_AC_RDMA_WRITE,MTHCA_ACCESS_REMOTE_WRITE); + IBAL_ACL(IB_AC_ATOMIC,MTHCA_ACCESS_REMOTE_ATOMIC); + IBAL_ACL(IB_AC_LOCAL_WRITE,MTHCA_ACCESS_LOCAL_WRITE); + IBAL_ACL(IB_AC_MW_BIND,MTHCA_ACCESS_MW_BIND); + + return mthca_acl; +} + +///////////////////////////////////////////////////////// +///////////////////////////////////////////////////////// +ib_access_t +map_qp_mthca_acl( + IN mthca_qp_access_t mthca_acl) +{ +#define ACL_IBAL(mfl,ifl) if (mthca_acl & mfl) ibal_acl |= ifl + ib_access_t ibal_acl = 0; + + ACL_IBAL(MTHCA_ACCESS_REMOTE_READ,IB_AC_RDMA_READ); + ACL_IBAL(MTHCA_ACCESS_REMOTE_WRITE,IB_AC_RDMA_WRITE); + ACL_IBAL(MTHCA_ACCESS_REMOTE_ATOMIC,IB_AC_ATOMIC); + ACL_IBAL(MTHCA_ACCESS_LOCAL_WRITE,IB_AC_LOCAL_WRITE); + ACL_IBAL(MTHCA_ACCESS_MW_BIND,IB_AC_MW_BIND); + + return ibal_acl; +} + + diff --git a/branches/IBFD/hw/mthca/hca_utils.h b/branches/IBFD/hw/mthca/hca_utils.h new file mode 100644 index 00000000..9b8a5683 --- /dev/null +++ b/branches/IBFD/hw/mthca/hca_utils.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#ifndef __HCA_UTILS_H__ +#define __HCA_UTILS_H__ + +#include +#include + +mthca_qp_access_t +map_qp_ibal_acl( + IN ib_access_t ibal_acl) +; + +ib_access_t +map_qp_mthca_acl( + IN mthca_qp_access_t mthca_acl); + +#endif + diff --git a/branches/IBFD/hw/mthca/kernel/Makefile b/branches/IBFD/hw/mthca/kernel/Makefile new file mode 100644 index 00000000..1c8f2940 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/Makefile @@ -0,0 +1,6 @@ +# DO NOT EDIT THIS FILE!!! Edit .\sources. if you want to add a new source +# file to this component. This file merely indirects to the real make file +# that is shared by all the driver components of the Windows NT DDK +# + +!INCLUDE ..\..\..\inc\openib.def# diff --git a/branches/IBFD/hw/mthca/kernel/SOURCES b/branches/IBFD/hw/mthca/kernel/SOURCES new file mode 100644 index 00000000..bff6b391 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/SOURCES @@ -0,0 +1,86 @@ +TRUNK=..\..\.. + +TARGETNAME=mthca +TARGETPATH=$(TRUNK)\bin\kernel\obj$(BUILD_ALT_DIR) +TARGETTYPE=DRIVER + +!if $(FREEBUILD) +ENABLE_EVENT_TRACING=1 +!else +#ENABLE_EVENT_TRACING=1 +!endif + +SOURCES= \ + mthca_log.mc \ + mthca_log.rc \ + hca.rc \ + mthca_log.c \ + \ + ..\hca_utils.c \ + ..\mt_utils.c \ + \ + hca_data.c \ + hca_direct.c \ + hca_driver.c \ + hca_mcast.c \ + hca_memory.c \ + hca_pci.c \ + hca_pnp.c \ + hca_verbs.c \ + \ + mt_cache.c \ + mt_device.c \ + mt_l2w.c \ + mt_memory.c \ + mt_packer.c \ + mt_reset_tavor.c \ + mt_ud_header.c \ + mt_uverbs.c \ + mt_verbs.c \ + mt_pa_cash.c \ + \ + mthca_allocator.c \ + mthca_av.c \ + mthca_catas.c \ + mthca_cmd.c \ + mthca_cq.c \ + mthca_eq.c \ + mthca_mad.c \ + mthca_main.c \ + mthca_mcg.c \ + mthca_memfree.c \ + mthca_mr.c \ + mthca_pd.c \ + mthca_profile.c \ + mthca_provider.c \ + mthca_qp.c \ + mthca_srq.c \ + mthca_uar.c \ + + +INCLUDES=\ + ..; \ + $(TRUNK)\inc; \ + $(TRUNK)\inc\kernel; + +C_DEFINES=$(C_DEFINES) -DDRIVER -DDEPRECATE_DDK_FUNCTIONS -D__LITTLE_ENDIAN + +TARGETLIBS= \ + $(TARGETPATH)\*\complib.lib \ + $(DDK_LIB_PATH)\wdmguid.lib + + +#LINKER_FLAGS=/MAP + +!IFDEF ENABLE_EVENT_TRACING + +C_DEFINES = $(C_DEFINES) -DEVENT_TRACING +RUN_WPP= $(SOURCES) -km -ext: .c .h .C .H \ + -scan:hca_debug.h \ + -func:HCA_PRINT(LEVEL,FLAGS,(MSG,...)) +!ENDIF + +# -func:HCA_PRINT_EV(LEVEL,FLAGS,(MSG,...)) \ + +MSC_OPTIMIZATION=/Oi +MSC_WARNING_LEVEL= /W4 diff --git a/branches/IBFD/hw/mthca/kernel/hca.rc b/branches/IBFD/hw/mthca/kernel/hca.rc new file mode 100644 index 00000000..345f4397 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/hca.rc @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + + +#include + +#define VER_FILETYPE VFT_DRV +#define VER_FILESUBTYPE VFT2_UNKNOWN +#ifdef DBG +#define VER_FILEDESCRIPTION_STR "HCA Driver (checked)" +#else +#define VER_FILEDESCRIPTION_STR "HCA Driver" +#endif +#define VER_INTERNALNAME_STR "mthca.sys" +#define VER_ORIGINALFILENAME_STR "mthca.sys" +#include diff --git a/branches/IBFD/hw/mthca/kernel/hca_data.c b/branches/IBFD/hw/mthca/kernel/hca_data.c new file mode 100644 index 00000000..5805306e --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/hca_data.c @@ -0,0 +1,907 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + + +#include "hca_driver.h" +#include "hca_utils.h" + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "hca_data.tmh" +#endif + +#include "mthca_dev.h" +#include + +static cl_spinlock_t hob_lock; + + + +uint32_t g_mlnx_dpc2thread = 0; + + +cl_qlist_t mlnx_hca_list; + +mlnx_hob_t mlnx_hob_array[MLNX_NUM_HOBKL]; // kernel HOB - one per HCA (cmdif access) +mlnx_hobul_t *mlnx_hobul_array[MLNX_NUM_HOBUL]; // kernel HOBUL - one per HCA (kar access) + +///////////////////////////////////////////////////////// +// ### HCA +///////////////////////////////////////////////////////// +void +mlnx_hca_insert( + IN mlnx_hca_t *p_hca ) +{ + cl_spinlock_acquire( &hob_lock ); + cl_qlist_insert_tail( &mlnx_hca_list, &p_hca->list_item ); + cl_spinlock_release( &hob_lock ); +} + +void +mlnx_hca_remove( + IN mlnx_hca_t *p_hca ) +{ + cl_spinlock_acquire( &hob_lock ); + cl_qlist_remove_item( &mlnx_hca_list, &p_hca->list_item ); + cl_spinlock_release( &hob_lock ); +} + +mlnx_hca_t* +mlnx_hca_from_guid( + IN ib_net64_t guid ) +{ + cl_list_item_t *p_item; + mlnx_hca_t *p_hca = NULL; + + cl_spinlock_acquire( &hob_lock ); + p_item = cl_qlist_head( &mlnx_hca_list ); + while( p_item != cl_qlist_end( &mlnx_hca_list ) ) + { + p_hca = PARENT_STRUCT( p_item, mlnx_hca_t, list_item ); + if( p_hca->guid == guid ) + break; + p_item = cl_qlist_next( p_item ); + p_hca = NULL; + } + cl_spinlock_release( &hob_lock ); + return p_hca; +} + +/* +void +mlnx_names_from_guid( + IN ib_net64_t guid, + OUT char **hca_name_p, + OUT char **dev_name_p) +{ + unsigned int idx; + + if (!hca_name_p) return; + if (!dev_name_p) return; + + for (idx = 0; idx < mlnx_num_hca; idx++) + { + if (mlnx_hca_array[idx].ifx.guid == guid) + { + *hca_name_p = mlnx_hca_array[idx].hca_name_p; + *dev_name_p = mlnx_hca_array[idx].dev_name_p; + } + } +} +*/ + +///////////////////////////////////////////////////////// +// ### HCA +///////////////////////////////////////////////////////// +cl_status_t +mlnx_hcas_init( void ) +{ + cl_qlist_init( &mlnx_hca_list ); + return cl_spinlock_init( &hob_lock ); +} + + +///////////////////////////////////////////////////////// +///////////////////////////////////////////////////////// +ib_api_status_t +mlnx_hobs_set_cb( + IN mlnx_hob_t *hob_p, + IN ci_completion_cb_t comp_cb_p, + IN ci_async_event_cb_t async_cb_p, + IN const void* const ib_context) +{ + cl_status_t cl_status; + + // Setup the callbacks + if (!hob_p->async_proc_mgr_p) + { + hob_p->async_proc_mgr_p = cl_malloc( sizeof( cl_async_proc_t ) ); + if( !hob_p->async_proc_mgr_p ) + { + return IB_INSUFFICIENT_MEMORY; + } + cl_async_proc_construct( hob_p->async_proc_mgr_p ); + cl_status = cl_async_proc_init( hob_p->async_proc_mgr_p, MLNX_NUM_CB_THR, "CBthread" ); + if( cl_status != CL_SUCCESS ) + { + cl_async_proc_destroy( hob_p->async_proc_mgr_p ); + cl_free(hob_p->async_proc_mgr_p); + hob_p->async_proc_mgr_p = NULL; + return IB_INSUFFICIENT_RESOURCES; + } + } + + hob_p->comp_cb_p = comp_cb_p; + hob_p->async_cb_p = async_cb_p; + hob_p->ca_context = ib_context; // This is the context our CB forwards to IBAL + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM,("CL: hca_idx %d context 0x%p\n", (int)(hob_p - mlnx_hob_array), ib_context)); + return IB_SUCCESS; +} + +///////////////////////////////////////////////////////// +///////////////////////////////////////////////////////// +void +mlnx_hobs_remove( + IN mlnx_hob_t *hob_p) +{ + cl_async_proc_t *p_async_proc; + + + cl_spinlock_acquire( &hob_lock ); + + hob_p->mark = E_MARK_INVALID; + + p_async_proc = hob_p->async_proc_mgr_p; + hob_p->async_proc_mgr_p = NULL; + + hob_p->comp_cb_p = NULL; + hob_p->async_cb_p = NULL; + hob_p->ca_context = NULL; + hob_p->cl_device_h = NULL; + + cl_spinlock_release( &hob_lock ); + + if( p_async_proc ) + { + cl_async_proc_destroy( p_async_proc ); + cl_free( p_async_proc ); + } + + + + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM,("CL: hobs_remove idx %d \n", (int)(hob_p - mlnx_hob_array))); +} + +///////////////////////////////////////////////////////// +///////////////////////////////////////////////////////// +void +mthca_port_cap_to_ibal( + IN u32 mthca_port_cap, + OUT ib_port_cap_t *ibal_port_cap_p) +{ +#define SET_CAP(flag,cap) if (mthca_port_cap & flag) ibal_port_cap_p->cap = TRUE + + SET_CAP(IB_PORT_CM_SUP,cm); + SET_CAP(IB_PORT_SNMP_TUNNEL_SUP,snmp); + SET_CAP(IB_PORT_DEVICE_MGMT_SUP,dev_mgmt); + SET_CAP(IB_PORT_VENDOR_CLASS_SUP,vend); + SET_CAP(IB_PORT_SM_DISABLED,sm_disable); + SET_CAP(IB_PORT_SM,sm); + SET_CAP(IB_PORT_NOTICE_SUP,notice); + SET_CAP(IB_PORT_TRAP_SUP,trap); + SET_CAP(IB_PORT_AUTO_MIGR_SUP,apm); + SET_CAP(IB_PORT_SL_MAP_SUP,slmap); + SET_CAP(IB_PORT_LED_INFO_SUP,ledinfo); + SET_CAP(IB_PORT_CAP_MASK_NOTICE_SUP,capm_notice); + SET_CAP(IB_PORT_CLIENT_REG_SUP,client_reregister); + SET_CAP(IB_PORT_SYS_IMAGE_GUID_SUP,sysguid); + SET_CAP(IB_PORT_BOOT_MGMT_SUP,boot_mgmt); + SET_CAP(IB_PORT_DR_NOTICE_SUP,dr_notice); + SET_CAP(IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP,pkey_switch_ext_port); + SET_CAP(IB_PORT_LINK_LATENCY_SUP,link_rtl); + SET_CAP(IB_PORT_REINIT_SUP,reinit); + SET_CAP(IB_PORT_OPT_IPD_SUP,ipd); + SET_CAP(IB_PORT_MKEY_NVRAM,mkey_nvram); + SET_CAP(IB_PORT_PKEY_NVRAM,pkey_nvram); + // there no MTHCA flags for qkey_ctr, pkey_ctr, port_active, bm IBAL capabilities; +} + + +///////////////////////////////////////////////////////// +void +mlnx_conv_hca_cap( + IN struct ib_device *ib_dev, + IN struct ib_device_attr *hca_info_p, + IN struct ib_port_attr *hca_ports, + OUT ib_ca_attr_t *ca_attr_p) +{ + uint8_t port_num; + ib_port_attr_t *ibal_port_p; + struct ib_port_attr *mthca_port_p; + + ca_attr_p->vend_id = hca_info_p->vendor_id; + ca_attr_p->dev_id = (uint16_t)hca_info_p->vendor_part_id; + ca_attr_p->revision = (uint16_t)hca_info_p->hw_ver; + ca_attr_p->fw_ver = hca_info_p->fw_ver; + ca_attr_p->ca_guid = *(UNALIGNED64 uint64_t *)&ib_dev->node_guid; + ca_attr_p->num_ports = ib_dev->phys_port_cnt; + ca_attr_p->max_qps = hca_info_p->max_qp; + ca_attr_p->max_wrs = hca_info_p->max_qp_wr; + ca_attr_p->max_sges = hca_info_p->max_sge; + ca_attr_p->max_rd_sges = hca_info_p->max_sge_rd; + ca_attr_p->max_cqs = hca_info_p->max_cq; + ca_attr_p->max_cqes = hca_info_p->max_cqe; + ca_attr_p->max_pds = hca_info_p->max_pd; + ca_attr_p->init_regions = hca_info_p->max_mr; + ca_attr_p->init_windows = hca_info_p->max_mw; + ca_attr_p->init_region_size = hca_info_p->max_mr_size; + ca_attr_p->max_addr_handles = hca_info_p->max_ah; + ca_attr_p->atomicity = hca_info_p->atomic_cap; + ca_attr_p->max_partitions = hca_info_p->max_pkeys; + ca_attr_p->max_qp_resp_res =(uint8_t) hca_info_p->max_qp_rd_atom; + ca_attr_p->max_resp_res = (uint8_t)hca_info_p->max_res_rd_atom; + ca_attr_p->max_qp_init_depth = (uint8_t)hca_info_p->max_qp_init_rd_atom; + ca_attr_p->max_ipv6_qps = hca_info_p->max_raw_ipv6_qp; + ca_attr_p->max_ether_qps = hca_info_p->max_raw_ethy_qp; + ca_attr_p->max_mcast_grps = hca_info_p->max_mcast_grp; + ca_attr_p->max_mcast_qps = hca_info_p->max_total_mcast_qp_attach; + ca_attr_p->max_qps_per_mcast_grp = hca_info_p->max_mcast_qp_attach; + ca_attr_p->max_fmr = hca_info_p->max_fmr; + ca_attr_p->max_map_per_fmr = hca_info_p->max_map_per_fmr; + ca_attr_p->max_srq = hca_info_p->max_srq; + ca_attr_p->max_srq_wrs = hca_info_p->max_srq_wr; + ca_attr_p->max_srq_sges = hca_info_p->max_srq_sge; + + ca_attr_p->local_ack_delay = hca_info_p->local_ca_ack_delay; + ca_attr_p->bad_pkey_ctr_support = hca_info_p->device_cap_flags & IB_DEVICE_BAD_PKEY_CNTR; + ca_attr_p->bad_qkey_ctr_support = hca_info_p->device_cap_flags & IB_DEVICE_BAD_QKEY_CNTR; + ca_attr_p->raw_mcast_support = hca_info_p->device_cap_flags & IB_DEVICE_RAW_MULTI; + ca_attr_p->apm_support = hca_info_p->device_cap_flags & IB_DEVICE_AUTO_PATH_MIG; + ca_attr_p->av_port_check = hca_info_p->device_cap_flags & IB_DEVICE_UD_AV_PORT_ENFORCE; + ca_attr_p->change_primary_port = hca_info_p->device_cap_flags & IB_DEVICE_CHANGE_PHY_PORT; + ca_attr_p->modify_wr_depth = hca_info_p->device_cap_flags & IB_DEVICE_RESIZE_MAX_WR; + ca_attr_p->modify_srq_depth = hca_info_p->device_cap_flags & IB_DEVICE_SRQ_RESIZE; + ca_attr_p->hw_agents = FALSE; // in the context of IBAL then agent is implemented on the host + + ca_attr_p->num_page_sizes = 1; + ca_attr_p->p_page_size[0] = PAGE_SIZE; // TBD: extract an array of page sizes from HCA cap + + for (port_num = 0; port_num <= end_port(ib_dev) - start_port(ib_dev); ++port_num) + { + // Setup port pointers + ibal_port_p = &ca_attr_p->p_port_attr[port_num]; + mthca_port_p = &hca_ports[port_num]; + + // Port Cabapilities + cl_memclr(&ibal_port_p->cap, sizeof(ib_port_cap_t)); + mthca_port_cap_to_ibal(mthca_port_p->port_cap_flags, &ibal_port_p->cap); + + // Port Atributes + ibal_port_p->port_num = port_num + start_port(ib_dev); + ibal_port_p->port_guid = ibal_port_p->p_gid_table[0].unicast.interface_id; + ibal_port_p->lid = cl_ntoh16(mthca_port_p->lid); + ibal_port_p->lmc = mthca_port_p->lmc; + ibal_port_p->max_vls = mthca_port_p->max_vl_num; + ibal_port_p->sm_lid = cl_ntoh16(mthca_port_p->sm_lid); + ibal_port_p->sm_sl = mthca_port_p->sm_sl; + ibal_port_p->link_state = (mthca_port_p->state != 0) ? (uint8_t)mthca_port_p->state : IB_LINK_DOWN; + ibal_port_p->num_gids = (uint16_t)mthca_port_p->gid_tbl_len; + ibal_port_p->num_pkeys = mthca_port_p->pkey_tbl_len; + ibal_port_p->pkey_ctr = (uint16_t)mthca_port_p->bad_pkey_cntr; + ibal_port_p->qkey_ctr = (uint16_t)mthca_port_p->qkey_viol_cntr; + ibal_port_p->max_msg_size = mthca_port_p->max_msg_sz; + ibal_port_p->mtu = (uint8_t)mthca_port_p->max_mtu; + + ibal_port_p->subnet_timeout = mthca_port_p->subnet_timeout; + // ibal_port_p->local_ack_timeout = 3; // TBD: currently ~32 usec + HCA_PRINT(TRACE_LEVEL_VERBOSE, HCA_DBG_SHIM ,("Port %d port_guid 0x%I64x\n", + ibal_port_p->port_num, cl_ntoh64(ibal_port_p->port_guid))); + } +} + +void cq_comp_handler(struct ib_cq *cq, void *context) +{ + mlnx_hob_t *hob_p = (mlnx_hob_t *)context; + struct mthca_cq *mcq =(struct mthca_cq *)cq; + HCA_ENTER(HCA_DBG_CQ); + if (hob_p && hob_p->comp_cb_p) { + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_CQ ,("Invoking completion callback\n")); + (hob_p->comp_cb_p)(mcq->cq_context); + } + else { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_CQ ,("Incorrect context. Completion callback was not invoked\n")); + } + HCA_EXIT(HCA_DBG_CQ); +} + +void ca_event_handler(struct ib_event *ev, void *context) +{ + mlnx_hob_t *hob_p = (mlnx_hob_t *)context; + ib_event_rec_t event_rec; + + // prepare parameters + event_rec.context = (void *)hob_p->ca_context; + event_rec.trap.info.port_num = ev->element.port_num; + event_rec.type = ev->event; + if (event_rec.type > IB_AE_UNKNOWN) { + // CL_ASSERT(0); // This shouldn't happen + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SHIM,("Unmapped E_EV_CA event of type 0x%x. Replaced by 0x%x (IB_AE_LOCAL_FATAL)\n", + event_rec.type, IB_AE_LOCAL_FATAL)); + event_rec.type = IB_AE_LOCAL_FATAL; + } + + // call the user callback + if (hob_p && hob_p->async_cb_p) + (hob_p->async_cb_p)(&event_rec); + else { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Incorrect context. Async callback was not invoked\n")); + } +} + +void srq_event_handler(struct ib_event *ev, void *context) +{ + mlnx_hob_t *hob_p = (mlnx_hob_t *)context; + ib_event_rec_t event_rec; + struct mthca_srq *srq_p; + + // prepare parameters + event_rec.type = ev->event; + event_rec.vendor_specific = ev->vendor_specific; + srq_p = (struct mthca_srq *)ev->element.srq; + event_rec.context = srq_p->srq_context; + + // call the user callback + if (hob_p) + (hob_p->async_cb_p)(&event_rec); + else { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Incorrect context. Async callback was not invoked\n")); + } +} + + +void qp_event_handler(struct ib_event *ev, void *context) +{ + mlnx_hob_t *hob_p = (mlnx_hob_t *)context; + ib_event_rec_t event_rec; + struct mthca_qp *qp_p; + + // prepare parameters + event_rec.type = ev->event; + event_rec.vendor_specific = ev->vendor_specific; + qp_p = (struct mthca_qp *)ev->element.qp; + event_rec.context = qp_p->qp_context; + + // call the user callback + if (hob_p) + (hob_p->async_cb_p)(&event_rec); + else { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Incorrect context. Async callback was not invoked\n")); + } +} + +void cq_event_handler(struct ib_event *ev, void *context) +{ + mlnx_hob_t *hob_p = (mlnx_hob_t *)context; + ib_event_rec_t event_rec; + struct mthca_cq *cq_p; + + // prepare parameters + event_rec.type = ev->event; + cq_p = (struct mthca_cq *)ev->element.cq; + event_rec.context = cq_p->cq_context; + + // call the user callback + if (hob_p) + (hob_p->async_cb_p)(&event_rec); + else { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Incorrect context. Async callback was not invoked\n")); + } +} + +ib_qp_state_t mlnx_qps_to_ibal(enum ib_qp_state qps) +{ +#define MAP_QPS(val1,val2) case val1: ib_qps = val2; break + ib_qp_state_t ib_qps; + switch (qps) { + MAP_QPS( IBQPS_RESET, IB_QPS_RESET ); + MAP_QPS( IBQPS_INIT, IB_QPS_INIT ); + MAP_QPS( IBQPS_RTR, IB_QPS_RTR ); + MAP_QPS( IBQPS_RTS, IB_QPS_RTS ); + MAP_QPS( IBQPS_SQD, IB_QPS_SQD ); + MAP_QPS( IBQPS_SQE, IB_QPS_SQERR ); + MAP_QPS( IBQPS_ERR, IB_QPS_ERROR ); + default: + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Unmapped MTHCA qp_state %d\n", qps)); + ib_qps = 0xffffffff; + } + return ib_qps; +} + +enum ib_qp_state mlnx_qps_from_ibal(ib_qp_state_t ib_qps) +{ +#define MAP_IBQPS(val1,val2) case val1: qps = val2; break + enum ib_qp_state qps; + switch (ib_qps) { + MAP_IBQPS( IB_QPS_RESET, IBQPS_RESET ); + MAP_IBQPS( IB_QPS_INIT, IBQPS_INIT ); + MAP_IBQPS( IB_QPS_RTR, IBQPS_RTR ); + MAP_IBQPS( IB_QPS_RTS, IBQPS_RTS ); + MAP_IBQPS( IB_QPS_SQD, IBQPS_SQD ); + MAP_IBQPS( IB_QPS_SQD_DRAINING, IBQPS_SQD ); + MAP_IBQPS( IB_QPS_SQD_DRAINED, IBQPS_SQD ); + MAP_IBQPS( IB_QPS_SQERR, IBQPS_SQE ); + MAP_IBQPS( IB_QPS_ERROR, IBQPS_ERR ); + default: + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Unmapped IBAL qp_state %d\n", ib_qps)); + qps = 0xffffffff; + } + return qps; +} + +ib_api_status_t +mlnx_conv_qp_modify_attr( + IN const struct ib_qp *ib_qp_p, + IN ib_qp_type_t qp_type, + IN const ib_qp_mod_t *modify_attr_p, + OUT struct ib_qp_attr *qp_attr_p, + OUT int *qp_attr_mask_p + ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct mthca_qp *qp_p = (struct mthca_qp *)ib_qp_p; + + RtlZeroMemory( qp_attr_p, sizeof *qp_attr_p ); + *qp_attr_mask_p = IB_QP_STATE; + qp_attr_p->qp_state = mlnx_qps_from_ibal( modify_attr_p->req_state ); + + // skipped cases + if (qp_p->state == IBQPS_RESET && modify_attr_p->req_state != IB_QPS_INIT) + return IB_NOT_DONE; + + switch (modify_attr_p->req_state) { + case IB_QPS_RESET: + case IB_QPS_ERROR: + case IB_QPS_SQERR: + case IB_QPS_TIME_WAIT: + break; + + case IB_QPS_INIT: + + switch (qp_type) { + case IB_QPT_RELIABLE_CONN: + case IB_QPT_UNRELIABLE_CONN: + *qp_attr_mask_p |= IB_QP_PORT | IB_QP_PKEY_INDEX |IB_QP_ACCESS_FLAGS; + qp_attr_p->qp_access_flags = map_qp_ibal_acl(modify_attr_p->state.init.access_ctrl); + break; + case IB_QPT_UNRELIABLE_DGRM: + case IB_QPT_QP0: + case IB_QPT_QP1: + default: + *qp_attr_mask_p |= IB_QP_PORT | IB_QP_QKEY | IB_QP_PKEY_INDEX ; + qp_attr_p->qkey = cl_ntoh32 (modify_attr_p->state.init.qkey); + break; + } + + // IB_QP_PORT + qp_attr_p->port_num = modify_attr_p->state.init.primary_port; + + // IB_QP_PKEY_INDEX + qp_attr_p->pkey_index = modify_attr_p->state.init.pkey_index; + + break; + + case IB_QPS_RTR: + /* modifying the WQE depth is not supported */ + if( modify_attr_p->state.rtr.opts & IB_MOD_QP_SQ_DEPTH || + modify_attr_p->state.rtr.opts & IB_MOD_QP_RQ_DEPTH ) { + status = IB_UNSUPPORTED; + break; + } + + switch (qp_type) { + case IB_QPT_RELIABLE_CONN: + *qp_attr_mask_p |= /* required flags */ + IB_QP_DEST_QPN |IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | + IB_QP_AV |IB_QP_PATH_MTU | IB_QP_MIN_RNR_TIMER; + + // IB_QP_DEST_QPN + qp_attr_p->dest_qp_num = cl_ntoh32 (modify_attr_p->state.rtr.dest_qp); + + // IB_QP_RQ_PSN + qp_attr_p->rq_psn = cl_ntoh32 (modify_attr_p->state.rtr.rq_psn); + + // IB_QP_MAX_DEST_RD_ATOMIC + qp_attr_p->max_dest_rd_atomic = modify_attr_p->state.rtr.resp_res; + + // IB_QP_AV, IB_QP_PATH_MTU: Convert primary RC AV (mandatory) + err = mlnx_conv_ibal_av(ib_qp_p->device, + &modify_attr_p->state.rtr.primary_av, &qp_attr_p->ah_attr); + if (err) { + status = IB_ERROR; + break; + } + qp_attr_p->path_mtu = modify_attr_p->state.rtr.primary_av.conn.path_mtu; // MTU + qp_attr_p->timeout = modify_attr_p->state.rtr.primary_av.conn.local_ack_timeout; // MTU + qp_attr_p->retry_cnt = modify_attr_p->state.rtr.primary_av.conn.seq_err_retry_cnt; // MTU + qp_attr_p->rnr_retry = modify_attr_p->state.rtr.primary_av.conn.rnr_retry_cnt; // MTU + + // IB_QP_MIN_RNR_TIMER, required in RTR, optional in RTS. + qp_attr_p->min_rnr_timer = modify_attr_p->state.rtr.rnr_nak_timeout; + + // IB_QP_ACCESS_FLAGS: Convert Remote Atomic Flags + if (modify_attr_p->state.rtr.opts & IB_MOD_QP_ACCESS_CTRL) { + *qp_attr_mask_p |= IB_QP_ACCESS_FLAGS; /* optional flag */ + qp_attr_p->qp_access_flags = map_qp_ibal_acl(modify_attr_p->state.rtr.access_ctrl); + } + + // IB_QP_ALT_PATH: Convert alternate RC AV + if (modify_attr_p->state.rtr.opts & IB_MOD_QP_ALTERNATE_AV) { + *qp_attr_mask_p |= IB_QP_ALT_PATH; /* required flag */ + err = mlnx_conv_ibal_av(ib_qp_p->device, + &modify_attr_p->state.rtr.alternate_av, &qp_attr_p->alt_ah_attr); + if (err) { + status = IB_ERROR; + break; + } + qp_attr_p->alt_timeout = modify_attr_p->state.rtr.alternate_av.conn.local_ack_timeout; // XXX: conv + } + + // IB_QP_PKEY_INDEX + if (modify_attr_p->state.rtr.opts & IB_MOD_QP_PKEY) { + *qp_attr_mask_p |= IB_QP_PKEY_INDEX; + qp_attr_p->pkey_index = modify_attr_p->state.rtr.pkey_index; + } + break; + + case IB_QPT_UNRELIABLE_CONN: + *qp_attr_mask_p |= /* required flags */ + IB_QP_DEST_QPN |IB_QP_RQ_PSN | IB_QP_AV | IB_QP_PATH_MTU; + + // IB_QP_DEST_QPN + qp_attr_p->dest_qp_num = cl_ntoh32 (modify_attr_p->state.rtr.dest_qp); + + // IB_QP_RQ_PSN + qp_attr_p->rq_psn = cl_ntoh32 (modify_attr_p->state.rtr.rq_psn); + + // IB_QP_PATH_MTU + qp_attr_p->path_mtu = modify_attr_p->state.rtr.primary_av.conn.path_mtu; + + // IB_QP_AV: Convert primary AV (mandatory) + err = mlnx_conv_ibal_av(ib_qp_p->device, + &modify_attr_p->state.rtr.primary_av, &qp_attr_p->ah_attr); + if (err) { + status = IB_ERROR; + break; + } + + // IB_QP_ACCESS_FLAGS: Convert Remote Atomic Flags + if (modify_attr_p->state.rtr.opts & IB_MOD_QP_ACCESS_CTRL) { + *qp_attr_mask_p |= IB_QP_ACCESS_FLAGS; /* optional flag */ + qp_attr_p->qp_access_flags = map_qp_ibal_acl(modify_attr_p->state.rtr.access_ctrl); + } + + // IB_QP_ALT_PATH: Convert alternate RC AV + if (modify_attr_p->state.rtr.opts & IB_MOD_QP_ALTERNATE_AV) { + *qp_attr_mask_p |= IB_QP_ALT_PATH; /* required flag */ + err = mlnx_conv_ibal_av(ib_qp_p->device, + &modify_attr_p->state.rtr.alternate_av, &qp_attr_p->alt_ah_attr); + if (err) { + status = IB_ERROR; + break; + } + } + + // IB_QP_PKEY_INDEX + if (modify_attr_p->state.rtr.opts & IB_MOD_QP_PKEY) { + *qp_attr_mask_p |= IB_QP_PKEY_INDEX; + qp_attr_p->pkey_index = modify_attr_p->state.rtr.pkey_index; + } + break; + + case IB_QPT_UNRELIABLE_DGRM: + case IB_QPT_QP0: + case IB_QPT_QP1: + default: + // IB_QP_PKEY_INDEX + if (modify_attr_p->state.rtr.opts & IB_MOD_QP_PKEY) { + *qp_attr_mask_p |= IB_QP_PKEY_INDEX; + qp_attr_p->pkey_index = modify_attr_p->state.rtr.pkey_index; + } + + // IB_QP_QKEY + if (modify_attr_p->state.rtr.opts & IB_MOD_QP_QKEY) { + *qp_attr_mask_p |= IB_QP_QKEY; + qp_attr_p->qkey = cl_ntoh32 (modify_attr_p->state.rtr.qkey); + } + break; + + } + break; + + case IB_QPS_RTS: + /* modifying the WQE depth is not supported */ + if( modify_attr_p->state.rts.opts & IB_MOD_QP_SQ_DEPTH || + modify_attr_p->state.rts.opts & IB_MOD_QP_RQ_DEPTH ) + { + status = IB_UNSUPPORTED; + break; + } + + switch (qp_type) { + case IB_QPT_RELIABLE_CONN: + if (qp_p->state != IBQPS_RTS) + *qp_attr_mask_p |= /* required flags */ + IB_QP_SQ_PSN |IB_QP_MAX_QP_RD_ATOMIC | IB_QP_TIMEOUT | + IB_QP_RETRY_CNT |IB_QP_RNR_RETRY; + + // IB_QP_MAX_QP_RD_ATOMIC + qp_attr_p->max_rd_atomic = modify_attr_p->state.rts.init_depth; + + // IB_QP_TIMEOUT + qp_attr_p->timeout = modify_attr_p->state.rts.local_ack_timeout; // XXX: conv + + // IB_QP_RETRY_CNT + qp_attr_p->retry_cnt = modify_attr_p->state.rts.retry_cnt; + + // IB_QP_RNR_RETRY + qp_attr_p->rnr_retry = modify_attr_p->state.rts.rnr_retry_cnt; + + // IB_QP_MAX_DEST_RD_ATOMIC: Update the responder resources for RDMA/ATOMIC (optional for SQD->RTS) + if (modify_attr_p->state.rts.opts & IB_MOD_QP_RESP_RES) { + *qp_attr_mask_p |= IB_QP_MAX_DEST_RD_ATOMIC; + qp_attr_p->max_dest_rd_atomic = modify_attr_p->state.rts.resp_res; + } + +#ifdef WIN_TO_BE_REMOVED + //TODO: do we need that ? + // Linux patch 4793: PKEY_INDEX is not a legal parameter in the RTR->RTS transition. + + // IB_QP_PKEY_INDEX + if (modify_attr_p->state.rts.opts & IB_MOD_QP_PKEY) { + *qp_attr_mask_p |= IB_QP_PKEY_INDEX; + qp_attr_p->pkey_index = modify_attr_p->state.rts.pkey_index; + } +#endif + + // IB_QP_MIN_RNR_TIMER + if (modify_attr_p->state.rts.opts & IB_MOD_QP_RNR_NAK_TIMEOUT) { + *qp_attr_mask_p |= IB_QP_MIN_RNR_TIMER; + qp_attr_p->min_rnr_timer = modify_attr_p->state.rts.rnr_nak_timeout; + } + + // IB_QP_PATH_MIG_STATE + if (modify_attr_p->state.rts.opts & IB_MOD_QP_APM_STATE) { + *qp_attr_mask_p |= IB_QP_PATH_MIG_STATE; + qp_attr_p->path_mig_state = modify_attr_p->state.rts.apm_state; + } + + // IB_QP_ACCESS_FLAGS + if (modify_attr_p->state.rts.opts & IB_MOD_QP_ACCESS_CTRL) { + *qp_attr_mask_p |= IB_QP_ACCESS_FLAGS; /* optional flags */ + qp_attr_p->qp_access_flags = map_qp_ibal_acl(modify_attr_p->state.rts.access_ctrl); + } + + // IB_QP_ALT_PATH: Convert alternate RC AV + if (modify_attr_p->state.rts.opts & IB_MOD_QP_ALTERNATE_AV) { + *qp_attr_mask_p |= IB_QP_ALT_PATH; /* optional flag */ + err = mlnx_conv_ibal_av(ib_qp_p->device, + &modify_attr_p->state.rts.alternate_av, &qp_attr_p->alt_ah_attr); + if (err) { + status = IB_ERROR; + break; + } + qp_attr_p->alt_timeout = modify_attr_p->state.rts.alternate_av.conn.local_ack_timeout; // XXX: conv + } + break; + + case IB_QPT_UNRELIABLE_CONN: + if (qp_p->state != IBQPS_RTS) + *qp_attr_mask_p |= /* required flags */ + IB_QP_SQ_PSN; + + // IB_QP_MAX_DEST_RD_ATOMIC: Update the responder resources for RDMA/ATOMIC (optional for SQD->RTS) + if (modify_attr_p->state.rts.opts & IB_MOD_QP_RESP_RES) { + *qp_attr_mask_p |= IB_QP_MAX_DEST_RD_ATOMIC; + qp_attr_p->max_dest_rd_atomic = modify_attr_p->state.rts.resp_res; + } + +#ifdef WIN_TO_BE_REMOVED + //TODO: do we need that ? + // Linux patch 4793: PKEY_INDEX is not a legal parameter in the RTR->RTS transition. + + // IB_QP_PKEY_INDEX + if (modify_attr_p->state.rts.opts & IB_MOD_QP_PKEY) { + *qp_attr_mask_p |= IB_QP_PKEY_INDEX; + qp_attr_p->pkey_index = modify_attr_p->state.rts.pkey_index; + } +#endif + + // IB_QP_PATH_MIG_STATE + if (modify_attr_p->state.rts.opts & IB_MOD_QP_APM_STATE) { + *qp_attr_mask_p |= IB_QP_PATH_MIG_STATE; + qp_attr_p->path_mig_state = modify_attr_p->state.rts.apm_state; + } + + // IB_QP_ACCESS_FLAGS + if (modify_attr_p->state.rts.opts & IB_MOD_QP_ACCESS_CTRL) { + *qp_attr_mask_p |= IB_QP_ACCESS_FLAGS; /* optional flags */ + qp_attr_p->qp_access_flags = map_qp_ibal_acl(modify_attr_p->state.rts.access_ctrl); + } + + // IB_QP_ALT_PATH: Convert alternate RC AV + if (modify_attr_p->state.rts.opts & IB_MOD_QP_ALTERNATE_AV) { + *qp_attr_mask_p |= IB_QP_ALT_PATH; /* optional flag */ + err = mlnx_conv_ibal_av(ib_qp_p->device, + &modify_attr_p->state.rts.alternate_av, &qp_attr_p->alt_ah_attr); + if (err) { + status = IB_ERROR; + break; + } + } + break; + + case IB_QPT_UNRELIABLE_DGRM: + case IB_QPT_QP0: + case IB_QPT_QP1: + default: + if (qp_p->state != IBQPS_RTS) + *qp_attr_mask_p |= /* required flags */ + IB_QP_SQ_PSN; + + // IB_QP_QKEY + if (modify_attr_p->state.rts.opts & IB_MOD_QP_QKEY) { + *qp_attr_mask_p |= IB_QP_QKEY; + qp_attr_p->qkey = cl_ntoh32 (modify_attr_p->state.rts.qkey); + } + break; + + break; + + } + + // IB_QP_SQ_PSN: common for all + qp_attr_p->sq_psn = cl_ntoh32 (modify_attr_p->state.rts.sq_psn); + //NB: IB_QP_CUR_STATE flag is not provisioned by IBAL + break; + + case IB_QPS_SQD: + case IB_QPS_SQD_DRAINING: + case IB_QPS_SQD_DRAINED: + *qp_attr_mask_p |= IB_QP_EN_SQD_ASYNC_NOTIFY; + qp_attr_p->en_sqd_async_notify = (u8)modify_attr_p->state.sqd.sqd_event; + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_SHIM ,("IB_QP_EN_SQD_ASYNC_NOTIFY seems like unsupported\n")); + break; + + default: + //NB: is this an error case and we need this message ? What about returning an error ? + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Unmapped qp_state %d\n", modify_attr_p->req_state)); + break; + + } + + return status; +} + +int +mlnx_conv_ibal_av( + IN const struct ib_device *ib_dev_p, + IN const ib_av_attr_t *ibal_av_p, + OUT struct ib_ah_attr *ah_attr_p) +{ + int err = 0; + u8 port_num; + u16 gid_index; + + ah_attr_p->port_num = ibal_av_p->port_num; + ah_attr_p->sl = ibal_av_p->sl; + ah_attr_p->dlid = cl_ntoh16(ibal_av_p->dlid); + //TODO: how static_rate is coded ? + ah_attr_p->static_rate = + (ibal_av_p->static_rate == IB_PATH_RECORD_RATE_10_GBS ? 0 : 3); + ah_attr_p->src_path_bits = ibal_av_p->path_bits; // PATH: + + /* For global destination or Multicast address:*/ + if (ibal_av_p->grh_valid) + { + ah_attr_p->ah_flags |= IB_AH_GRH; + ah_attr_p->grh.hop_limit = ibal_av_p->grh.hop_limit; + ib_grh_get_ver_class_flow( ibal_av_p->grh.ver_class_flow, NULL, + &ah_attr_p->grh.traffic_class, &ah_attr_p->grh.flow_label ); + err = ib_find_cached_gid((struct ib_device *)ib_dev_p, + (union ib_gid *)ibal_av_p->grh.src_gid.raw, &port_num, &gid_index); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("ib_find_cached_gid failed %d (%#x). Using default: sgid_index = 0\n", err, err)); + gid_index = 0; + } + else if (port_num != ah_attr_p->port_num) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("ib_find_cached_gid returned wrong port_num %u (Expected - %u). Using the expected.\n", + (u32)port_num, (u32)ah_attr_p->port_num)); + } + ah_attr_p->grh.sgid_index = (u8)gid_index; + RtlCopyMemory(ah_attr_p->grh.dgid.raw, ibal_av_p->grh.dest_gid.raw, sizeof(ah_attr_p->grh.dgid)); + } + + return err; +} + +int +mlnx_conv_mthca_av( + IN const struct ib_ah *ib_ah_p, + OUT ib_av_attr_t *ibal_av_p) +{ + int err = 0; + struct ib_ud_header header; + struct mthca_ah *ah_p = (struct mthca_ah *)ib_ah_p; + struct ib_device *ib_dev_p = ib_ah_p->pd->device; + struct mthca_dev *dev_p = (struct mthca_dev *)ib_dev_p; + + err = mthca_read_ah( dev_p, ah_p, &header); + if (err) + goto err_read_ah; + + // common part + ibal_av_p->sl = header.lrh.service_level; + mthca_get_av_params(ah_p, &ibal_av_p->port_num, + &ibal_av_p->dlid, &ibal_av_p->static_rate, &ibal_av_p->path_bits ); + + // GRH + ibal_av_p->grh_valid = header.grh_present; + if (ibal_av_p->grh_valid) { + ibal_av_p->grh.ver_class_flow = ib_grh_set_ver_class_flow( + header.grh.ip_version, header.grh.traffic_class, header.grh.flow_label ); + ibal_av_p->grh.hop_limit = header.grh.hop_limit; + RtlCopyMemory(ibal_av_p->grh.src_gid.raw, + header.grh.source_gid.raw, sizeof(ibal_av_p->grh.src_gid)); + RtlCopyMemory(ibal_av_p->grh.src_gid.raw, + header.grh.destination_gid.raw, sizeof(ibal_av_p->grh.dest_gid)); + } + + //TODO: don't know, how to fill conn. Note, that previous version didn't fill it also. + +err_read_ah: + return err; +} + +void +mlnx_modify_ah( + IN const struct ib_ah *ib_ah_p, + IN const struct ib_ah_attr *ah_attr_p) +{ + struct ib_device *ib_dev_p = ib_ah_p->pd->device; + struct mthca_dev *dev_p = (struct mthca_dev *)ib_dev_p; + + mthca_set_av_params(dev_p, (struct mthca_ah *)ib_ah_p, (struct ib_ah_attr *)ah_attr_p ); +} + diff --git a/branches/IBFD/hw/mthca/kernel/hca_data.h b/branches/IBFD/hw/mthca/kernel/hca_data.h new file mode 100644 index 00000000..a102c612 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/hca_data.h @@ -0,0 +1,395 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#ifndef __HCA_DATA_H__ +#define __HCA_DATA_H__ + + +#include +#include +#include +#include + + +extern char mlnx_uvp_lib_name[]; +extern uint32_t g_skip_tavor_reset; +extern uint32_t g_disable_tavor_reset; +extern uint32_t g_tune_pci; +extern uint32_t g_processor_affinity; +extern uint32_t g_max_DPC_time_us; +extern uint32_t g_profile_qp_num; +extern uint32_t g_profile_rd_out; + + +#define MLNX_MAX_HCA 4 +#define MLNX_NUM_HOBKL MLNX_MAX_HCA +#define MLNX_NUM_HOBUL MLNX_MAX_HCA +#define MLNX_NUM_CB_THR 1 +#define MLNX_SIZE_CB_POOL 256 +#define MLNX_UAL_ALLOC_HCA_UL_RES 1 +#define MLNX_UAL_FREE_HCA_UL_RES 2 + + +// Defines for QP ops +#define MLNX_MAX_NUM_SGE 8 +#define MLNX_MAX_WRS_PER_CHAIN 4 + +#define MLNX_NUM_RESERVED_QPS 16 + +/* + * Completion model. + * 0: No DPC processor assignment + * 1: DPCs per-CQ, processor affinity set at CQ initialization time. + * 2: DPCs per-CQ, processor affinity set at runtime. + * 3: DPCs per-CQ, no processor affinity set. + */ +#define MLNX_COMP_MODEL 3 + +#ifdef DBG +#define VALIDATE_INDEX(index, limit, error, label) \ + { \ + if (index >= limit) \ + { \ + status = error; \ + HCA_PRINT(TRACE_LEVEL_ERROR , g_mlnx_dbg_lvl ,("file %s line %d\n", __FILE__, __LINE__)));\ + goto label; \ + } \ + } +#else +#define VALIDATE_INDEX(index, limit, error, label) +#endif + + + +// Typedefs + +typedef enum { + E_EV_CA=1, + E_EV_QP, + E_EV_CQ, + E_EV_LAST +} ENUM_EVENT_CLASS; + +typedef enum { + E_MARK_CA=1, // Channel Adaptor + E_MARK_PD, // Protection Domain + E_MARK_CQ, // Completion Queue + E_MARK_QP, // Queue Pair + E_MARK_AV, // Address Vector (UD) + E_MARK_MG, // Multicast Group + E_MARK_MR, // Memory Region + E_MARK_MW, // Memory Windows + E_MARK_INVALID, +} ENUM_MARK; + +typedef enum { + E_MR_PHYS=1, + E_MR_SHARED, + E_MR_ANY, + E_MR_INVALID +} ENUM_MR_TYPE; + +/* + * Attribute cache for port info saved to expedite local MAD processing. + * Note that the cache accounts for the worst case GID and PKEY table size + * but is allocated from paged pool, so it's nothing to worry about. + */ + +typedef struct _guid_block +{ + boolean_t valid; + ib_guid_info_t tbl; + +} mlnx_guid_block_t; + +typedef struct _port_info_cache +{ + boolean_t valid; + ib_port_info_t info; + +} mlnx_port_info_cache_t; + +typedef struct _pkey_block +{ + boolean_t valid; + ib_pkey_table_t tbl; + +} mlnx_pkey_block_t; + +typedef struct _sl_vl_cache +{ + boolean_t valid; + ib_slvl_table_t tbl; + +} mlnx_sl_vl_cache_t; + +typedef struct _vl_arb_block +{ + boolean_t valid; + ib_vl_arb_table_t tbl; + +} mlnx_vl_arb_block_t; + +typedef struct _attr_cache +{ + mlnx_guid_block_t guid_block[32]; + mlnx_port_info_cache_t port_info; + mlnx_pkey_block_t pkey_tbl[2048]; + mlnx_sl_vl_cache_t sl_vl; + mlnx_vl_arb_block_t vl_arb[4]; + +} mlnx_cache_t; + +typedef struct _ib_ca { + ENUM_MARK mark; + ci_completion_cb_t comp_cb_p; + ci_async_event_cb_t async_cb_p; + const void *ca_context; + void *cl_device_h; + uint32_t index; + cl_async_proc_t *async_proc_mgr_p; + +} mlnx_hob_t; + +typedef struct HOBUL_t { + int dummy; +#ifdef WIN_TO_BE_REMOVED + pd_info_t *pd_info_tbl; + HH_hca_hndl_t hh_hndl; /* For HH direct access */ + HHUL_hca_hndl_t hhul_hndl; /* user level HCA resources handle for HH */ + uint32_t cq_idx_mask; /* */ + uint32_t qp_idx_mask; /* */ + uint32_t vendor_id; /* \ */ + uint32_t device_id; /* > 3 items needed for initializing user level */ + void *hca_ul_resources_p; /* / */ + MT_size_t cq_ul_resources_sz; /* Needed for allocating user resources for CQs */ + MT_size_t qp_ul_resources_sz; /* Needed for allocating user resources for QPs */ + MT_size_t pd_ul_resources_sz; /* Needed for allocating user resources for PDs */ + uint32_t max_cq; /* Max num. of CQs - size of following table */ + cq_info_t *cq_info_tbl; + uint32_t max_qp; /* Max num. of QPs - size of following table */ + qp_info_t *qp_info_tbl; + uint32_t max_pd; /* Max num. of PDs - size of following table */ + uint32_t log2_mpt_size; + atomic32_t count; +#endif +} mlnx_hobul_t, *mlnx_hobul_hndl_t; + +typedef struct _ib_mcast { + ib_gid_t mcast_gid; + struct ib_qp *ib_qp_p; + uint16_t mcast_lid; +} mlnx_mcast_t; + +typedef struct _mlnx_hca_t { + cl_list_item_t list_item; // to include in the HCA chain + net64_t guid; // HCA node Guid + struct mthca_dev *mdev; // VP Driver device + uint32_t hw_ver; // HCA HW version + mlnx_hob_t hob; // HOB - IBAL-related HCA resources + +#ifdef WIN_TO_BE_REMOVED + // removed as it is found in p_ext->cl_ext.p_pdo + const void* __ptr64 p_dev_obj; // Driver PDO +#endif +} mlnx_hca_t; + + +typedef mlnx_hob_t *mlnx_hca_h; + +// Global Variables +//extern mlnx_hca_t mlnx_hca_array[]; +//extern uint32_t mlnx_num_hca; + +extern mlnx_hob_t mlnx_hob_array[]; +extern mlnx_hobul_t *mlnx_hobul_array[]; + +// Functions +void +setup_ci_interface( + IN const ib_net64_t ca_guid, + IN const int is_livefish, + OUT ci_interface_t *p_interface ); + +void +mlnx_hca_insert( + IN mlnx_hca_t *p_hca ); + +void +mlnx_hca_remove( + IN mlnx_hca_t *p_hca ); + +mlnx_hca_t* +mlnx_hca_from_guid( + IN ib_net64_t guid ); + +/* +void +mlnx_names_from_guid( + IN ib_net64_t guid, + OUT char **hca_name_p, + OUT char **dev_name_p); +*/ + +cl_status_t +mlnx_hcas_init( void ); + +cl_status_t +mlnx_hobs_init( void ); + +ib_api_status_t +mlnx_hobs_insert( + IN mlnx_hca_t *p_hca, + OUT mlnx_hob_t **hob_p); + + +ib_api_status_t +mlnx_hobs_set_cb( + IN mlnx_hob_t *hob_p, + IN ci_completion_cb_t comp_cb_p, + IN ci_async_event_cb_t async_cb_p, + IN const void* const ib_context); + +ib_api_status_t +mlnx_hobs_get_context( + IN mlnx_hob_t *hob_p, + OUT void **context_p); + +ib_api_status_t +mlnx_hobs_create_device( + IN mlnx_hob_t *hob_p, + OUT char* dev_name); + +void +mlnx_hobs_remove( + IN mlnx_hob_t *hob_p); + +mlnx_hobul_t * +mlnx_hobs_get_hobul( + IN mlnx_hob_t *hob_p); + +void +mlnx_hobul_get( + IN mlnx_hob_t *hob_p, + OUT void **resources_p ); + +void +mlnx_hobul_delete( + IN mlnx_hob_t *hob_p); + +void +mlnx_conv_hca_cap( + IN struct ib_device *ib_dev, + IN struct ib_device_attr *hca_info_p, + IN struct ib_port_attr *hca_ports, + OUT ib_ca_attr_t *ca_attr_p); + +ib_api_status_t +mlnx_local_mad ( + IN const ib_ca_handle_t h_ca, + IN const uint8_t port_num, + IN const ib_av_attr_t *p_src_av_attr, + IN const ib_mad_t *p_mad_in, + OUT ib_mad_t *p_mad_out ); + +void +mlnx_memory_if( + IN OUT ci_interface_t *p_interface ); + +void +mlnx_memory_if_livefish( + IN OUT ci_interface_t *p_interface ); + +void +mlnx_ecc_if( + IN OUT ci_interface_t *p_interface ); + +void +mlnx_direct_if( + IN OUT ci_interface_t *p_interface ); + +void +mlnx_mcast_if( + IN OUT ci_interface_t *p_interface ); + +ib_api_status_t +fw_access_ctrl( + IN const void* __ptr64 context, + IN const void* __ptr64* const handle_array OPTIONAL, + IN uint32_t num_handles, + IN ib_ci_op_t* const p_ci_op, + IN OUT ci_umv_buf_t *p_umv_buf OPTIONAL); + +void unmap_crspace_for_all( struct ib_ucontext *p_context ); + +void cq_comp_handler(struct ib_cq *cq, void *context); + +void ca_event_handler(struct ib_event *ev, void *context); + +void srq_event_handler(struct ib_event *ev, void *context); + +void qp_event_handler(struct ib_event *ev, void *context); + +void cq_event_handler(struct ib_event *ev, void *context); + +ib_qp_state_t mlnx_qps_to_ibal(enum ib_qp_state qps); + +enum ib_qp_state mlnx_qps_from_ibal(ib_qp_state_t ib_qps); + +ib_api_status_t +mlnx_conv_qp_modify_attr( + IN const struct ib_qp *ib_qp_p, + IN ib_qp_type_t qp_type, + IN const ib_qp_mod_t *modify_attr_p, + OUT struct ib_qp_attr *qp_attr_p, + OUT int *qp_attr_mask_p + ); + +int +mlnx_conv_ibal_av( + IN const struct ib_device *ib_dev_p, + IN const ib_av_attr_t *ibal_av_p, + OUT struct ib_ah_attr *ah_attr_p); + +int +mlnx_conv_mthca_av( + IN const struct ib_ah *ib_ah_p, + OUT ib_av_attr_t *ibal_av_p); + +void +mlnx_modify_ah( + IN const struct ib_ah *ib_ah_p, + IN const struct ib_ah_attr *ah_attr_p); + +void set_skip_tavor_reset(); + +#endif diff --git a/branches/IBFD/hw/mthca/kernel/hca_debug.h b/branches/IBFD/hw/mthca/kernel/hca_debug.h new file mode 100644 index 00000000..18dba8f0 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/hca_debug.h @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + + +#ifndef _HCA_DEBUG_H_ +#define _HCA_DEBUG_H_ + + +extern uint32_t g_mthca_dbg_level; +extern uint32_t g_mthca_dbg_flags; +#define MAX_LOG_BUF_LEN 512 +extern WCHAR g_wlog_buf[ MAX_LOG_BUF_LEN ]; +extern UCHAR g_slog_buf[ MAX_LOG_BUF_LEN ]; + +static void _build_str( const char * format, ... ) +{ + va_list p_arg; + va_start(p_arg, format); + vsprintf((char *)g_slog_buf , format , p_arg); + swprintf(g_wlog_buf, L"%S", g_slog_buf); + va_end(p_arg); +} + +#define HCA_PRINT_TO_EVENT_LOG(_obj_,_level_,_flag_,_msg_) \ + { \ + NTSTATUS event_id; \ + switch (_level_) { \ + case TRACE_LEVEL_FATAL: case TRACE_LEVEL_ERROR: event_id = EVENT_MTHCA_ANY_ERROR; break; \ + case TRACE_LEVEL_WARNING: event_id = EVENT_MTHCA_ANY_WARN; break; \ + default: event_id = EVENT_MTHCA_ANY_INFO; break; \ + } \ + _build_str _msg_; \ + WriteEventLogEntryStr( _obj_, (ULONG)event_id, 0, 0, g_wlog_buf, 0, 0 ); \ + } + +#define HCA_PRINT_EV_MDEV(_level_,_flag_,_msg_) \ + HCA_PRINT_TO_EVENT_LOG(mdev->ext->cl_ext.p_self_do,_level_,_flag_,_msg_) + + +#if defined(EVENT_TRACING) +// +// Software Tracing Definitions +// + +#define WPP_CONTROL_GUIDS \ + WPP_DEFINE_CONTROL_GUID(HCACtlGuid,(8BF1F640,63FE,4743,B9EF,FA38C695BFDE), \ + WPP_DEFINE_BIT( HCA_DBG_DEV) \ + WPP_DEFINE_BIT( HCA_DBG_PNP) \ + WPP_DEFINE_BIT( HCA_DBG_INIT) \ + WPP_DEFINE_BIT( HCA_DBG_MAD) \ + WPP_DEFINE_BIT( HCA_DBG_PO) \ + WPP_DEFINE_BIT( HCA_DBG_PD)\ + WPP_DEFINE_BIT( HCA_DBG_CQ) \ + WPP_DEFINE_BIT( HCA_DBG_QP) \ + WPP_DEFINE_BIT( HCA_DBG_MEMORY) \ + WPP_DEFINE_BIT( HCA_DBG_AV) \ + WPP_DEFINE_BIT( HCA_DBG_SRQ) \ + WPP_DEFINE_BIT( HCA_DBG_MCAST) \ + WPP_DEFINE_BIT( HCA_DBG_LOW) \ + WPP_DEFINE_BIT( HCA_DBG_SHIM)) + + +#define WPP_GLOBALLOGGER + + +#define WPP_LEVEL_FLAGS_ENABLED(lvl, flags) (WPP_LEVEL_ENABLED(flags) && WPP_CONTROL(WPP_BIT_ ## flags).Level >= lvl) +#define WPP_LEVEL_FLAGS_LOGGER(lvl,flags) WPP_LEVEL_LOGGER(flags) +#define WPP_FLAG_ENABLED(flags)(WPP_LEVEL_ENABLED(flags) && WPP_CONTROL(WPP_BIT_ ## flags).Level >= TRACE_LEVEL_VERBOSE) +#define WPP_FLAG_LOGGER(flags) WPP_LEVEL_LOGGER(flags) + + +// begin_wpp config +// HCA_ENTER(FLAG); +// HCA_EXIT(FLAG); +// USEPREFIX(HCA_PRINT, "%!STDPREFIX! [MTHCA] :%!FUNC!() :"); +// USESUFFIX(HCA_ENTER, " [MTHCA] :%!FUNC!()["); +// USESUFFIX(HCA_EXIT, " [MTHCA] :%!FUNC!()]"); +// end_wpp + + + +#define HCA_PRINT_EV(_level_,_flag_,_msg_) \ + { \ + HCA_PRINT_EV_MDEV(_level_,_flag_,_msg_) \ + } + + +#else + + +#include + +/* + * Debug macros + */ + + +#define HCA_DBG_DEV (1 << 0) +#define HCA_DBG_PNP (1<<1) +#define HCA_DBG_INIT (1 << 2) +#define HCA_DBG_MAD (1 << 3) +#define HCA_DBG_PO (1 << 4) +#define HCA_DBG_PD (1<<5) +#define HCA_DBG_QP (1 << 6) +#define HCA_DBG_CQ (1 << 7) +#define HCA_DBG_MEMORY (1 << 8) +#define HCA_DBG_AV (1<<9) +#define HCA_DBG_SRQ (1 << 10) +#define HCA_DBG_MCAST (1<<11) +#define HCA_DBG_LOW (1 << 12) +#define HCA_DBG_SHIM (1 << 13) + + +#if DBG + +// assignment of _level_ is need to to overcome warning C4127 +#define HCA_PRINT(_level_,_flag_,_msg_) \ + { \ + int __lvl = _level_; \ + if (g_mthca_dbg_level >= (_level_) && \ + (g_mthca_dbg_flags & (_flag_))) { \ + DbgPrint ("~%d:[MTHCA] %s() :", KeGetCurrentProcessorNumber(), __FUNCTION__); \ + if(__lvl == TRACE_LEVEL_ERROR) DbgPrint ("***ERROR*** "); \ + DbgPrint _msg_; \ + } \ + } + +#else + +#define HCA_PRINT(lvl ,flags, msg) + +#endif + +#define HCA_PRINT_EV(_level_,_flag_,_msg_) \ + { \ + HCA_PRINT(_level_,_flag_,_msg_) \ + HCA_PRINT_EV_MDEV(_level_,_flag_,_msg_) \ + } + +#define HCA_ENTER(flags)\ + HCA_PRINT(TRACE_LEVEL_VERBOSE, flags,("[\n")); + +#define HCA_EXIT(flags)\ + HCA_PRINT(TRACE_LEVEL_VERBOSE, flags, ("]\n" )); + + +#endif //EVENT_TRACING + + +#endif /*_HCA_DEBUG_H_ */ + + diff --git a/branches/IBFD/hw/mthca/kernel/hca_direct.c b/branches/IBFD/hw/mthca/kernel/hca_direct.c new file mode 100644 index 00000000..69e91355 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/hca_direct.c @@ -0,0 +1,310 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + + +#include "hca_driver.h" +#include "hca_debug.h" + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "hca_direct.tmh" +#endif +#include "mthca_dev.h" + + +/* Controls whether to use the VAPI entrypoints in THH, or the IBAL native ones. */ +#define MLNX_SEND_NATIVE 1 +#define MLNX_RECV_NATIVE 1 +#define MLNX_POLL_NATIVE 1 + + +/* +* Work Request Processing Verbs. +*/ + + +ib_api_status_t +mlnx_post_send ( + IN const ib_qp_handle_t h_qp, + IN ib_send_wr_t *p_send_wr, + OUT ib_send_wr_t **pp_failed ) +{ + int err; + ib_api_status_t status; + struct ib_qp *ib_qp_p = (struct ib_qp *)h_qp; + struct ib_device *ib_dev = ib_qp_p->device; + + HCA_ENTER(HCA_DBG_QP); + + err = ib_dev->post_send(ib_qp_p, p_send_wr, pp_failed ); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_QP, + ("post_send failed (%d)\n", err)); + if (err == -ENOMEM) + status = IB_INSUFFICIENT_RESOURCES; + else + status = errno_to_iberr(err); + goto err_post_send; + } + + status = IB_SUCCESS; + +err_post_send: + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_QP, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_QP); + return status; + +} + + +ib_api_status_t +mlnx_post_recv ( + IN const ib_qp_handle_t h_qp, + IN ib_recv_wr_t *p_recv_wr, + OUT ib_recv_wr_t **pp_failed OPTIONAL ) +{ + int err; + ib_api_status_t status; + struct ib_qp *ib_qp_p = (struct ib_qp *)h_qp; + struct ib_device *ib_dev = ib_qp_p->device; + + HCA_ENTER(HCA_DBG_QP); + + err = ib_dev->post_recv(ib_qp_p, p_recv_wr, pp_failed ); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP, + ("post_recv failed (%d)\n", err)); + if (err == -ENOMEM) + status = IB_INSUFFICIENT_RESOURCES; + else + status = errno_to_iberr(err); + goto err_post_recv; + } + + status = IB_SUCCESS; + +err_post_recv: + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_QP, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_QP); + return status; + +} + +ib_api_status_t +mlnx_post_srq_recv ( + IN const ib_srq_handle_t h_srq, + IN ib_recv_wr_t *p_recv_wr, + OUT ib_recv_wr_t **pp_failed OPTIONAL ) +{ + int err; + ib_api_status_t status; + struct ib_srq *ib_srq_p = (struct ib_srq *)h_srq; + struct ib_device *ib_dev = ib_srq_p->device; + + HCA_ENTER(HCA_DBG_SRQ); + + err = ib_dev->post_srq_recv(ib_srq_p, p_recv_wr, pp_failed ); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SRQ, + ("post_srq_recv failed (%d)\n", err)); + if (err == -ENOMEM) + status = IB_INSUFFICIENT_RESOURCES; + else + status = errno_to_iberr(err); + goto err_post_recv; + } + + status = IB_SUCCESS; + +err_post_recv: + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SRQ, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_SRQ); + return status; + +} + +/* +* Completion Processing and Completion Notification Request Verbs. +*/ + +ib_api_status_t +mlnx_peek_cq( + IN const ib_cq_handle_t h_cq, + OUT uint32_t* const p_n_cqes ) +{ + UNREFERENCED_PARAMETER(h_cq); + UNREFERENCED_PARAMETER(p_n_cqes); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("mlnx_peek_cq not implemented\n")); + return IB_INVALID_CA_HANDLE; +} + +ib_api_status_t +mlnx_poll_cq ( + IN const ib_cq_handle_t h_cq, + IN OUT ib_wc_t** const pp_free_wclist, + OUT ib_wc_t** const pp_done_wclist ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_cq *ib_cq_p = (struct ib_cq *)h_cq; + PREP_IBDEV_FOR_PRINT(ib_cq_p->device); + + HCA_ENTER(HCA_DBG_CQ); + + // sanity checks + if (!pp_free_wclist || !pp_done_wclist || !*pp_free_wclist) { + status = IB_INVALID_PARAMETER; + goto err_invalid_params; + } + + // poll CQ + err = mthca_poll_cq_list(ib_cq_p, pp_free_wclist, pp_done_wclist ); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_CQ, + ("mthca_poll_cq_list failed (%d)\n", err)); + status = errno_to_iberr(err); + }else if (!*pp_done_wclist) + status = IB_NOT_FOUND; + +err_invalid_params: + if (status != IB_SUCCESS && status != IB_NOT_FOUND) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_CQ, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_CQ); + return status; + +} + +ib_api_status_t +mlnx_enable_cq_notify ( + IN const ib_cq_handle_t h_cq, + IN const boolean_t solicited ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_cq *ib_cq_p = (struct ib_cq *)h_cq; + PREP_IBDEV_FOR_PRINT(ib_cq_p->device); + + HCA_ENTER(HCA_DBG_CQ); + + // REARM CQ + err = ib_req_notify_cq(ib_cq_p, (solicited) ? IB_CQ_SOLICITED : IB_CQ_NEXT_COMP ); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_CQ, + ("ib_req_notify_cq failed (%d)\n", err)); + status = errno_to_iberr(err); + } + + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_CQ, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_CQ); + return status; +} + +ib_api_status_t +mlnx_enable_ncomp_cq_notify ( + IN const ib_cq_handle_t h_cq, + IN const uint32_t n_cqes ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_cq *ib_cq_p = (struct ib_cq *)h_cq; + PREP_IBDEV_FOR_PRINT(ib_cq_p->device); + + HCA_ENTER(HCA_DBG_CQ); + + err = ib_req_ncomp_notif(ib_cq_p, n_cqes ); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_CQ, + ("ib_req_ncomp_notif failed (%d)\n", err)); + status = errno_to_iberr(err); + } + + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_CQ, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_CQ); + return status; +} + +ib_api_status_t +mlnx_bind_mw ( + IN const ib_mw_handle_t h_mw, + IN const ib_qp_handle_t h_qp, + IN ib_bind_wr_t* const p_mw_bind, + OUT net32_t* const p_rkey ) +{ + UNREFERENCED_PARAMETER(h_mw); + UNREFERENCED_PARAMETER(h_qp); + UNREFERENCED_PARAMETER(p_mw_bind); + UNREFERENCED_PARAMETER(p_rkey); + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SHIM,("mlnx_bind_mw not implemented\n")); + return IB_INVALID_CA_HANDLE; +} + + +void +mlnx_direct_if( + IN OUT ci_interface_t *p_interface ) +{ + p_interface->post_send = mlnx_post_send; + p_interface->post_recv = mlnx_post_recv; + p_interface->post_srq_recv = mlnx_post_srq_recv; + + p_interface->enable_ncomp_cq_notify = mlnx_enable_ncomp_cq_notify; + p_interface->peek_cq = NULL; /* mlnx_peek_cq: Not implemented */ + p_interface->poll_cq = mlnx_poll_cq; + p_interface->enable_cq_notify = mlnx_enable_cq_notify; + + p_interface->bind_mw = mlnx_bind_mw; +} diff --git a/branches/IBFD/hw/mthca/kernel/hca_driver.c b/branches/IBFD/hw/mthca/kernel/hca_driver.c new file mode 100644 index 00000000..417da7e3 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/hca_driver.c @@ -0,0 +1,1038 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + + +/* + * Provides the driver entry points for the Tavor VPD. + */ + +#include +#include "hca_driver.h" +#include "hca_debug.h" + +#include "mthca_log.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "hca_driver.tmh" +#endif +#include "mthca_dev.h" +#include +#include +#pragma warning( push, 3 ) +//#include "MdCard.h" +#pragma warning( pop ) +#include +#include "mthca/mthca_vc.h" +#include "mt_pa_cash.h" +/* from \inc\platform\evntrace.h +#define TRACE_LEVEL_NONE 0 // Tracing is not on +#define TRACE_LEVEL_FATAL 1 // Abnormal exit or termination +#define TRACE_LEVEL_ERROR 2 // Severe errors that need logging +#define TRACE_LEVEL_WARNING 3 // Warnings such as allocation failure +#define TRACE_LEVEL_INFORMATION 4 // Includes non-error cases(e.g.,Entry-Exit) +#define TRACE_LEVEL_VERBOSE 5 // Detailed traces from intermediate steps +*/ +uint32_t g_mthca_dbg_level = TRACE_LEVEL_INFORMATION; +uint32_t g_mthca_dbg_flags= 0xffff; +WCHAR g_wlog_buf[ MAX_LOG_BUF_LEN ]; +UCHAR g_slog_buf[ MAX_LOG_BUF_LEN ]; +uint32_t g_skip_tavor_reset=0; /* skip reset for Tavor cards */ +uint32_t g_disable_tavor_reset=1; /* disable Tavor reset for the next driver load */ +uint32_t g_tune_pci=0; /* 0 - skip tuning PCI configuration space of HCAs */ +uint32_t g_processor_affinity = 0; +uint32_t g_max_DPC_time_us = 10000; +uint32_t g_profile_qp_num = 0; +uint32_t g_profile_rd_out = 0xffffffff; + +UNICODE_STRING g_param_path; + + +/* + * UVP name does not include file extension. For debug builds, UAL + * will append "d.dll". For release builds, UAL will append ".dll" + */ +char mlnx_uvp_lib_name[MAX_LIB_NAME] = {"mthcau"}; + +void reregister_hca( hca_dev_ext_t *p_ext ); + + +NTSTATUS +DriverEntry( + IN PDRIVER_OBJECT p_driver_obj, + IN PUNICODE_STRING p_registry_path ); + +static NTSTATUS +__read_registry( + IN UNICODE_STRING* const p_Param_Path ); + +static void +hca_drv_unload( + IN PDRIVER_OBJECT p_driver_obj ); + +static NTSTATUS +hca_sysctl( + IN PDEVICE_OBJECT p_dev_obj, + IN PIRP p_irp ); + +static NTSTATUS +__pnp_notify_target( + IN TARGET_DEVICE_REMOVAL_NOTIFICATION *p_notify, + IN void *context ); + +static NTSTATUS +__pnp_notify_ifc( + IN DEVICE_INTERFACE_CHANGE_NOTIFICATION *p_notify, + IN void *context ); + +static NTSTATUS +fw_access_pciconf ( + IN BUS_INTERFACE_STANDARD *p_BusInterface, + IN ULONG op_flag, + IN PVOID p_buffer, + IN ULONG offset, + IN ULONG POINTER_ALIGNMENT length ); + +static NTSTATUS +fw_flash_write_data ( + IN BUS_INTERFACE_STANDARD *p_BusInterface, + IN PVOID p_buffer, + IN ULONG offset, + IN ULONG POINTER_ALIGNMENT length ); + +static NTSTATUS +fw_flash_read_data ( + IN BUS_INTERFACE_STANDARD *p_BusInterface, + IN PVOID p_buffer, + IN ULONG offset, + IN ULONG POINTER_ALIGNMENT length ); + +static NTSTATUS +fw_flash_read4( + IN BUS_INTERFACE_STANDARD *p_BusInterface, + IN uint32_t addr, + IN OUT uint32_t *p_data); + +static NTSTATUS +fw_flash_readbuf( + IN BUS_INTERFACE_STANDARD *p_BusInterface, + IN uint32_t offset, + IN OUT void *p_data, + IN uint32_t len); +static NTSTATUS +fw_set_bank( + IN BUS_INTERFACE_STANDARD *p_BusInterface, + IN uint32_t bank ); + +static NTSTATUS +fw_flash_init( + IN BUS_INTERFACE_STANDARD *p_BusInterface ); + +static NTSTATUS +fw_flash_deinit( + IN BUS_INTERFACE_STANDARD *p_BusInterface ); + +#ifdef ALLOC_PRAGMA +#pragma alloc_text (INIT, DriverEntry) +#pragma alloc_text (INIT, __read_registry) +#pragma alloc_text (PAGE, hca_drv_unload) +#pragma alloc_text (PAGE, hca_sysctl) +#endif + +NTSTATUS +DriverEntry( + IN PDRIVER_OBJECT p_driver_obj, + IN PUNICODE_STRING p_registry_path ) +{ + NTSTATUS status; + cl_status_t cl_status; +#if defined(EVENT_TRACING) + WPP_INIT_TRACING(p_driver_obj ,p_registry_path); +#endif + HCA_ENTER( HCA_DBG_DEV ); + + /* init common mechanisms */ + fill_bit_tbls(); + + status = __read_registry( p_registry_path ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_INIT, + ("__read_registry_path returned 0x%X.\n", status)); + return status; + } + + /* Initialize Adapter DB */ + cl_status = mlnx_hcas_init(); + if( cl_status != CL_SUCCESS ) + { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_INIT , + ("mlnx_hcas_init returned %#x.\n", cl_status)); + return cl_to_ntstatus( cl_status ); + } +// cl_memclr( mlnx_hca_array, MLNX_MAX_HCA * sizeof(ci_interface_t) ); + + /* init pa cash */ + status = pa_cash_init(); + if (status) + { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_INIT , + ("pa_cash_init failed.\n")); + return status; + } + + /*leo: init function table */ + hca_init_vfptr(); + + p_driver_obj->MajorFunction[IRP_MJ_PNP] = cl_pnp; + p_driver_obj->MajorFunction[IRP_MJ_POWER] = cl_power; + p_driver_obj->MajorFunction[IRP_MJ_SYSTEM_CONTROL] = hca_sysctl; + p_driver_obj->DriverUnload = hca_drv_unload; + p_driver_obj->DriverExtension->AddDevice = hca_add_device; + + /* init core */ + if (ib_core_init()) { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_INIT ,("Failed to init core, aborting.\n")); + return STATUS_UNSUCCESSFUL; + } + + /* init uverbs module */ + if (ib_uverbs_init()) { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_INIT ,("Failed ib_uverbs_init, aborting.\n")); + return STATUS_UNSUCCESSFUL; + } + HCA_EXIT( HCA_DBG_DEV ); + return STATUS_SUCCESS; +} + + +static NTSTATUS +__read_registry( + IN UNICODE_STRING* const p_registry_path ) +{ + NTSTATUS status; + /* Remember the terminating entry in the table below. */ + RTL_QUERY_REGISTRY_TABLE table[10]; + + HCA_ENTER( HCA_DBG_DEV ); + + RtlInitUnicodeString( &g_param_path, NULL ); + g_param_path.MaximumLength = p_registry_path->Length + + sizeof(L"\\Parameters"); + g_param_path.Buffer = cl_zalloc( g_param_path.MaximumLength ); + if( !g_param_path.Buffer ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_INIT, + ("Failed to allocate parameters path buffer.\n")); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlAppendUnicodeStringToString( &g_param_path, p_registry_path ); + RtlAppendUnicodeToString( &g_param_path, L"\\Parameters" ); + + /* + * Clear the table. This clears all the query callback pointers, + * and sets up the terminating table entry. + */ + cl_memclr( table, sizeof(table) ); + + /* Setup the table entries. */ + table[0].Flags = RTL_QUERY_REGISTRY_DIRECT; + table[0].Name = L"DebugLevel"; + table[0].EntryContext = &g_mthca_dbg_level; + table[0].DefaultType = REG_DWORD; + table[0].DefaultData = &g_mthca_dbg_level; + table[0].DefaultLength = sizeof(ULONG); + + + table[1].Flags = RTL_QUERY_REGISTRY_DIRECT; + table[1].Name = L"DebugFlags"; + table[1].EntryContext = &g_mthca_dbg_flags; + table[1].DefaultType = REG_DWORD; + table[1].DefaultData = &g_mthca_dbg_flags; + table[1].DefaultLength = sizeof(ULONG); + + table[2].Flags = RTL_QUERY_REGISTRY_DIRECT; + table[2].Name = L"SkipTavorReset"; + table[2].EntryContext = &g_skip_tavor_reset; + table[2].DefaultType = REG_DWORD; + table[2].DefaultData = &g_skip_tavor_reset; + table[2].DefaultLength = sizeof(ULONG); + + table[3].Flags = RTL_QUERY_REGISTRY_DIRECT; + table[3].Name = L"DisableTavorResetOnFailure"; + table[3].EntryContext = &g_disable_tavor_reset; + table[3].DefaultType = REG_DWORD; + table[3].DefaultData = &g_disable_tavor_reset; + table[3].DefaultLength = sizeof(ULONG); + + table[4].Flags = RTL_QUERY_REGISTRY_DIRECT; + table[4].Name = L"TunePci"; + table[4].EntryContext = &g_tune_pci; + table[4].DefaultType = REG_DWORD; + table[4].DefaultData = &g_tune_pci; + table[4].DefaultLength = sizeof(ULONG); + + table[5].Flags = RTL_QUERY_REGISTRY_DIRECT; + table[5].Name = L"ProcessorAffinity"; + table[5].EntryContext = &g_processor_affinity; + table[5].DefaultType = REG_DWORD; + table[5].DefaultData = &g_processor_affinity; + table[5].DefaultLength = sizeof(ULONG); + + table[6].Flags = RTL_QUERY_REGISTRY_DIRECT; + table[6].Name = L"MaxDpcTimeUs"; + table[6].EntryContext = &g_max_DPC_time_us; + table[6].DefaultType = REG_DWORD; + table[6].DefaultData = &g_max_DPC_time_us; + table[6].DefaultLength = sizeof(ULONG); + + table[7].Flags = RTL_QUERY_REGISTRY_DIRECT; + table[7].Name = L"ProfileQpNum"; + table[7].EntryContext = &g_profile_qp_num; + table[7].DefaultType = REG_DWORD; + table[7].DefaultData = &g_profile_qp_num; + table[7].DefaultLength = sizeof(ULONG); + + table[8].Flags = RTL_QUERY_REGISTRY_DIRECT; + table[8].Name = L"ProfileRdOut"; + table[8].EntryContext = &g_profile_rd_out; + table[8].DefaultType = REG_DWORD; + table[8].DefaultData = &g_profile_rd_out; + table[8].DefaultLength = sizeof(ULONG); + + /* Have at it! */ + status = RtlQueryRegistryValues( RTL_REGISTRY_ABSOLUTE, + g_param_path.Buffer, table, NULL, NULL ); + + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_INIT, + ("debug level %d debug flags 0x%.8x SkipTavorReset %d DisableTavorReset %d TunePci %d" + "g_processor_affinity %d g_max_DPC_time_us %d g_profile_qp_num %d g_profile_rd_out %d\n", + g_mthca_dbg_level, g_mthca_dbg_flags, + g_skip_tavor_reset, g_disable_tavor_reset, + g_tune_pci, g_processor_affinity, g_max_DPC_time_us, + g_profile_qp_num, g_profile_rd_out )); + + HCA_EXIT( HCA_DBG_DEV ); + return status; +} + +void set_skip_tavor_reset() +{ + NTSTATUS status; + HANDLE key_handle; + UNICODE_STRING key_name; + ULONG val = 1; + OBJECT_ATTRIBUTES oa; + + HCA_ENTER( HCA_DBG_DEV ); + + InitializeObjectAttributes( &oa, &g_param_path, + OBJ_CASE_INSENSITIVE | OBJ_KERNEL_HANDLE, NULL, NULL ); + + + status = ZwOpenKey( &key_handle, GENERIC_WRITE, &oa ); + if( !NT_SUCCESS( status ) ) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_LOW, + ("ZwOpenKey failed (%#x)\n", status)); + goto err_open_key; + } + + RtlInitUnicodeString( &key_name, L"SkipTavorReset" ); + status = ZwSetValueKey( key_handle, &key_name, 0, + REG_DWORD, &val, sizeof(ULONG) ); + if( !NT_SUCCESS( status ) ) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_LOW, + ("ZwSetValueKey failed (%#x)\n", status)); + } + + ZwClose( key_handle ); + +err_open_key: + HCA_EXIT( HCA_DBG_DEV ); +} + +static void +hca_drv_unload( + IN PDRIVER_OBJECT p_driver_obj ) +{ + HCA_ENTER( HCA_DBG_DEV ); + + UNUSED_PARAM( p_driver_obj ); + + pa_cash_release(); + ib_uverbs_cleanup(); + ib_core_cleanup(); + cl_free( g_param_path.Buffer ); + + HCA_EXIT( HCA_DBG_DEV ); +#if defined(EVENT_TRACING) + WPP_CLEANUP(p_driver_obj); +#endif + +} + + +static NTSTATUS +hca_sysctl( + IN PDEVICE_OBJECT p_dev_obj, + IN PIRP p_irp ) +{ + NTSTATUS status; + hca_dev_ext_t *p_ext; + + HCA_ENTER( HCA_DBG_DEV ); + + p_ext = p_dev_obj->DeviceExtension; + + IoSkipCurrentIrpStackLocation( p_irp ); + status = IoCallDriver( p_ext->cl_ext.p_next_do, p_irp ); + + HCA_EXIT( HCA_DBG_DEV ); + return status; +} + +typedef struct Primary_Sector{ + uint32_t fi_addr; + uint32_t fi_size; + uint32_t signature; + uint32_t fw_reserved[5]; + uint32_t vsd[56]; + uint32_t branch_to; + uint32_t crc016; +} primary_sector_t; + +static uint32_t old_dir; +static uint32_t old_pol; +static uint32_t old_mod; +static uint32_t old_dat; + +static NTSTATUS +fw_access_pciconf ( + IN BUS_INTERFACE_STANDARD *p_BusInterface, + IN ULONG op_flag, + IN PVOID p_buffer, + IN ULONG offset, + IN ULONG POINTER_ALIGNMENT length ) +{ + + ULONG bytes; + NTSTATUS status = STATUS_SUCCESS; + + PAGED_CODE(); + + if( !p_buffer ) + return STATUS_INVALID_PARAMETER; + + if (p_BusInterface) + { + + bytes = p_BusInterface->SetBusData( + p_BusInterface->Context, + PCI_WHICHSPACE_CONFIG, + (PVOID)&offset, + PCI_CONF_ADDR, + sizeof(ULONG) ); + + if( op_flag == 0 ) + { + if ( bytes ) + bytes = p_BusInterface->GetBusData( + p_BusInterface->Context, + PCI_WHICHSPACE_CONFIG, + p_buffer, + PCI_CONF_DATA, + length ); + if ( !bytes ) + status = STATUS_NOT_SUPPORTED; + } + + else + { + if ( bytes ) + bytes = p_BusInterface->SetBusData( + p_BusInterface->Context, + PCI_WHICHSPACE_CONFIG, + p_buffer, + PCI_CONF_DATA, + length); + + if ( !bytes ) + status = STATUS_NOT_SUPPORTED; + } + } + return status; +} + + +static NTSTATUS +__map_crspace( + IN struct ib_ucontext * p_context, + IN mlnx_hob_t * p_hob, + IN PVOID p_buf, + IN ULONG buf_size + ) +{ + NTSTATUS status; + PMDL p_mdl; + PVOID ua, ka; + ULONG sz; + hca_dev_ext_t *p_ext = EXT_FROM_HOB(p_hob); + map_crspace *p_res = (map_crspace *)p_buf; + + HCA_ENTER( HCA_DBG_PNP ); + + // sanity checks + if ( buf_size < sizeof *p_res || !p_buf ) { + status = STATUS_INVALID_PARAMETER; + goto err_invalid_params; + } + + // map memory + sz =(ULONG)p_ext->bar[HCA_BAR_TYPE_HCR].size; + if (!p_ext->bar[HCA_BAR_TYPE_HCR].virt) { + PHYSICAL_ADDRESS pa; + pa.QuadPart = p_ext->bar[HCA_BAR_TYPE_HCR].phys; + ka = MmMapIoSpace( pa, sz, MmNonCached ); + if ( ka == NULL) { + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM, + ("No kernel mapping of CR space.\n") ); + status = STATUS_INSUFFICIENT_RESOURCES; + goto err_map_to_kernel; + } + p_ext->bar[HCA_BAR_TYPE_HCR].virt = ka; + } + ka = p_ext->bar[HCA_BAR_TYPE_HCR].virt; + + // prepare for mapping to user space + p_mdl = IoAllocateMdl( ka, sz, FALSE,FALSE,NULL); + if (p_mdl == NULL) { + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM, + ("IoAllocateMdl failed.\n") ); + status = STATUS_INSUFFICIENT_RESOURCES; + goto err_alloc_mdl; + } + + // fill MDL + MmBuildMdlForNonPagedPool(p_mdl); + + // map the buffer into user space + __try + { + ua = MmMapLockedPagesSpecifyCache( p_mdl, UserMode, MmNonCached, + NULL, FALSE, NormalPagePriority ); + } + __except(EXCEPTION_EXECUTE_HANDLER) + { + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM, + ("MmMapLockedPagesSpecifyCache failed.\n") ); + status = STATUS_INSUFFICIENT_RESOURCES; + goto err_map_to_user; + } + + // fill the results + p_res->va = (uint64_t)(ULONG_PTR)ua; + p_res->size = sz; + + // resource tracking + p_context->p_mdl = p_mdl; + p_context->va = ua; + +#if 0 + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SHIM, + ("MTHCA: __map_crspace succeeded with .ka %I64x, size %I64x va %I64x, size %x, pa %I64x \n", + p_ext->bar[HCA_BAR_TYPE_HCR].virt, p_ext->bar[HCA_BAR_TYPE_HCR].size, + p_res->va, p_res->size, p_ext->bar[HCA_BAR_TYPE_HCR].phys )); +#endif + status = STATUS_SUCCESS; + goto out; + +err_map_to_user: + IoFreeMdl( p_mdl ); +err_alloc_mdl: +err_map_to_kernel: +err_invalid_params: +out: + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + + +static void +__unmap_crspace( + IN struct ib_ucontext * p_context + ) +{ + HCA_ENTER( HCA_DBG_PNP ); + + if (p_context->va && p_context->p_mdl) { + MmUnmapLockedPages(p_context->va, p_context->p_mdl); + IoFreeMdl( p_context->p_mdl ); + p_context->va = p_context->p_mdl = NULL; + //NB: the unmap of IO space is being done in __UnmapHcaMemoryResources + } + + HCA_EXIT( HCA_DBG_PNP ); +} + + +static void +__open_fw_access( + IN struct ib_ucontext* p_context, + IN PBUS_INTERFACE_STANDARD p_bus_interface ) +{ + if( !p_context->fw_if_open ) + { + p_bus_interface->InterfaceReference( p_bus_interface->Context ); + p_context->fw_if_open = TRUE; + } +} + + +static void +__close_fw_access( + IN struct ib_ucontext * p_context, + IN PBUS_INTERFACE_STANDARD p_bus_interface + ) +{ + if (p_context->fw_if_open ) { + p_bus_interface->InterfaceDereference((PVOID)p_bus_interface->Context); + p_context->fw_if_open = FALSE; + } +} + + +void +unmap_crspace_for_all( struct ib_ucontext *p_context ) +{ + mlnx_hob_t *p_hob = HOB_FROM_IBDEV( p_context->device ); + hca_dev_ext_t *p_ext = EXT_FROM_HOB(p_hob); + PBUS_INTERFACE_STANDARD p_bus_interface = &p_ext->hcaBusIfc; + + HCA_ENTER( HCA_DBG_PNP ); + + down( &p_context->mutex ); + __unmap_crspace( p_context); + __close_fw_access(p_context, p_bus_interface); + up( &p_context->mutex ); + + HCA_EXIT( HCA_DBG_PNP ); +} + +ib_api_status_t +fw_access_ctrl( + IN const ib_ca_handle_t h_ca, + IN const void* __ptr64* const handle_array OPTIONAL, + IN uint32_t num_handles, + IN ib_ci_op_t* const p_ci_op, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + DEVICE_OBJECT *p_dev_obj; + PBUS_INTERFACE_STANDARD p_bus_interface; + NTSTATUS status = STATUS_SUCCESS; + PVOID p_data; + ULONG offset; + ULONG POINTER_ALIGNMENT length; + struct ib_ucontext * p_context; + mlnx_hob_t *p_hob; + hca_dev_ext_t *p_ext; + + UNREFERENCED_PARAMETER(handle_array); + UNREFERENCED_PARAMETER(num_handles); + + if( !p_umv_buf ) + return IB_UNSUPPORTED; + + p_context = (struct ib_ucontext *)h_ca; + p_hob = HOB_FROM_IBDEV( p_context->device ); + p_ext = EXT_FROM_HOB(p_hob); + p_dev_obj = (DEVICE_OBJECT *)p_ext->cl_ext.p_self_do; + p_bus_interface = &p_ext->hcaBusIfc; + + if ( !p_ci_op ) + return IB_INVALID_PARAMETER; + + length = p_ci_op->buf_size; + offset = p_ci_op->buf_info; + p_data = p_ci_op->p_buf; + + down( &p_context->mutex ); + + switch ( p_ci_op->command ) + { + case FW_REREGISTER_HCA: + reregister_hca(p_ext); + break; + + case FW_MAP_CRSPACE: + status = __map_crspace(p_context, p_hob, p_data, length); + break; + + case FW_UNMAP_CRSPACE: + __unmap_crspace(p_context); + break; + + case FW_OPEN_IF: // open BusInterface + __open_fw_access( p_context, p_bus_interface ); + break; + + case FW_READ: // read data from flash + if ( p_context->fw_if_open ) + status = fw_flash_read_data(p_bus_interface, p_data, offset, length); + break; + + case FW_WRITE: // write data to flash + if ( p_context->fw_if_open ) + status = fw_flash_write_data(p_bus_interface, p_data, offset, length); + break; + + case FW_READ_CMD: + if ( p_context->fw_if_open ) + status = fw_access_pciconf(p_bus_interface, 0 , p_data, offset, 4); + break; + + case FW_WRITE_CMD: + if ( p_context->fw_if_open ) + status = fw_access_pciconf(p_bus_interface, 1 , p_data, offset, 4); + break; + + case FW_CLOSE_IF: // close BusInterface + __close_fw_access(p_context, p_bus_interface); + break; + + default: + status = STATUS_INVALID_DEVICE_REQUEST; + } + + if ( status != STATUS_SUCCESS ) { + __close_fw_access(p_context, p_bus_interface); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_INIT, + ("fw_access_ctrl failed, ntstatus: %08x.\n", status)); + } + + up( &p_context->mutex ); + + switch( status ) { + case STATUS_SUCCESS: return IB_SUCCESS; + case STATUS_INVALID_DEVICE_REQUEST: return IB_UNSUPPORTED; + case STATUS_INSUFFICIENT_RESOURCES: return IB_INSUFFICIENT_RESOURCES; + default: return IB_ERROR; + } +} + +static NTSTATUS +fw_flash_write_data ( + IN BUS_INTERFACE_STANDARD *p_BusInterface, + IN PVOID p_buffer, + IN ULONG offset, + IN ULONG POINTER_ALIGNMENT length ) +{ + NTSTATUS status; + uint32_t cnt = 0; + uint32_t lcl_data; + + if (!length) + return IB_INVALID_PARAMETER; + + lcl_data = (*((uint32_t*)p_buffer) << 24); + + status = fw_access_pciconf(p_BusInterface, FW_WRITE , &lcl_data, FLASH_OFFSET+4, length ); + if ( status != STATUS_SUCCESS ) + return status; + lcl_data = ( WRITE_BIT | (offset & ADDR_MSK)); + + status = fw_access_pciconf(p_BusInterface, FW_WRITE , &lcl_data, FLASH_OFFSET, 4 ); + if ( status != STATUS_SUCCESS ) + return status; + + lcl_data = 0; + + do + { + if (++cnt > 5000) + { + return STATUS_DEVICE_NOT_READY; + } + + status = fw_access_pciconf(p_BusInterface, FW_READ , &lcl_data, FLASH_OFFSET, 4 ); + if ( status != STATUS_SUCCESS ) + return status; + + } while(lcl_data & CMD_MASK); + + return status; +} + +static NTSTATUS +fw_flash_read_data ( + IN BUS_INTERFACE_STANDARD *p_BusInterface, + IN PVOID p_buffer, + IN ULONG offset, + IN ULONG POINTER_ALIGNMENT length ) +{ + NTSTATUS status = STATUS_SUCCESS; + uint32_t cnt = 0; + uint32_t lcl_data = ( READ_BIT | (offset & ADDR_MSK)); + + if (!length) + return IB_INVALID_PARAMETER; + + status = fw_access_pciconf(p_BusInterface, FW_WRITE, &lcl_data, FLASH_OFFSET, 4 ); + if ( status != STATUS_SUCCESS ) + return status; + + lcl_data = 0; + do + { + // Timeout checks + if (++cnt > 5000 ) + { + return STATUS_DEVICE_NOT_READY; + } + + status = fw_access_pciconf(p_BusInterface, FW_READ, &lcl_data, FLASH_OFFSET, 4 ); + + if ( status != STATUS_SUCCESS ) + return status; + + } while(lcl_data & CMD_MASK); + + status = fw_access_pciconf(p_BusInterface, FW_READ, p_buffer, FLASH_OFFSET+4, length ); + return status; +} + +static NTSTATUS +fw_flash_read4( + IN BUS_INTERFACE_STANDARD *p_BusInterface, + IN uint32_t addr, + IN OUT uint32_t *p_data) +{ + NTSTATUS status = STATUS_SUCCESS; + uint32_t lcl_data = 0; + uint32_t bank; + static uint32_t curr_bank = 0xffffffff; + + if (addr & 0x3) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_INIT, + ("Invalid address %08x\n", addr) ); + return STATUS_INVALID_PARAMETER; + } + + bank = addr & BANK_MASK; + if (bank != curr_bank) + { + curr_bank = bank; + if ((status = fw_set_bank(p_BusInterface, bank)) != STATUS_SUCCESS ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_INIT, + ("fw_set_bank returned %08x\n", status) ); + return STATUS_INVALID_PARAMETER; + } + } + status = fw_flash_read_data(p_BusInterface, &lcl_data, addr, 4); + *p_data = cl_ntoh32(lcl_data); + return STATUS_SUCCESS; +} + +static NTSTATUS +fw_flash_readbuf( + IN BUS_INTERFACE_STANDARD *p_BusInterface, + IN uint32_t offset, + IN OUT void *p_data, + IN uint32_t len) +{ + NTSTATUS status = STATUS_SUCCESS; + uint32_t *p_lcl_data; + uint32_t i; + + if (offset & 0x3) + { + //Address should be 4-bytes aligned + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_INIT, + ("Invalid address %08x\n", offset) ); + return STATUS_INVALID_PARAMETER; + } + if (len & 0x3) + { + //Length should be 4-bytes aligned + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_INIT, + ("Invalid length %d\n", len) ); + return STATUS_INVALID_PARAMETER; + } + p_lcl_data = (uint32_t *)p_data; + + for ( i=0; i < (len >> 2); i++) + { + if ( (status = fw_flash_read_data( p_BusInterface, p_lcl_data, offset, sizeof(uint32_t) )) != STATUS_SUCCESS ) + return status; + offset += 4; + p_lcl_data++; + } + return STATUS_SUCCESS; +} // Flash::flash_read + +static NTSTATUS +fw_flash_writebuf( + IN BUS_INTERFACE_STANDARD *p_BusInterface, + IN PVOID p_buffer, + IN ULONG offset, + IN ULONG POINTER_ALIGNMENT length ) +{ + NTSTATUS status = STATUS_SUCCESS; + uint32_t i; + uint8_t *p_data = (uint8_t *)p_buffer; + + for ( i = 0; i < length; i++ ) + { + status = fw_flash_write_data (p_BusInterface, p_data, offset, 1 ); + if (status != STATUS_SUCCESS ) + return status; + p_data++; + offset++; + } + return status; +} +static NTSTATUS +fw_flash_init( + IN BUS_INTERFACE_STANDARD *p_BusInterface ) +{ + uint32_t dir; + uint32_t pol; + uint32_t mod; + + uint32_t cnt=0; + uint32_t data; + NTSTATUS status = STATUS_SUCCESS; + uint32_t semaphore = 0; + + while ( !semaphore ) + { + status = fw_access_pciconf(p_BusInterface, FW_READ , &data, SEMAP63, 4); + if ( status != STATUS_SUCCESS ) + break; + if( !data ) + { + semaphore = 1; + break; + } + if (++cnt > 5000 ) + { + break; + } + } + + if ( !semaphore ) + { + return STATUS_NOT_SUPPORTED; + } + + // Save old values + + status = fw_access_pciconf(p_BusInterface, FW_READ , &old_dir,GPIO_DIR_L , 4); + if ( status == STATUS_SUCCESS ) + status = fw_access_pciconf(p_BusInterface, FW_READ , &old_pol,GPIO_POL_L , 4); + if ( status == STATUS_SUCCESS ) + status = fw_access_pciconf(p_BusInterface, FW_READ , &old_mod,GPIO_MOD_L , 4); + if ( status == STATUS_SUCCESS ) + status = fw_access_pciconf(p_BusInterface, FW_READ , &old_dat,GPIO_DAT_L , 4); + + // Set Direction=1, Polarity=0, Mode=0 for 3 GPIO lower bits + dir = old_dir | 0x70; + pol = old_pol & ~0x70; + mod = old_mod & ~0x70; + + status = fw_access_pciconf(p_BusInterface, FW_WRITE , &dir,GPIO_DIR_L , 4); + if ( status == STATUS_SUCCESS ) + status = fw_access_pciconf(p_BusInterface, FW_WRITE , &pol,GPIO_POL_L , 4); + if ( status == STATUS_SUCCESS ) + status = fw_access_pciconf(p_BusInterface, FW_WRITE , &mod,GPIO_MOD_L , 4); + if ( status == STATUS_SUCCESS ) + // Set CPUMODE + status = fw_access_pciconf(p_BusInterface, FW_READ , &data, CPUMODE, 4); + if ( status == STATUS_SUCCESS ) + { + data &= ~CPUMODE_MSK; + data |= 1 << CPUMODE_SHIFT; + status = fw_access_pciconf(p_BusInterface, FW_WRITE , &data, CPUMODE, 4); + } + if ( status == STATUS_SUCCESS ) + { + // Reset flash + data = 0xf0; + status = fw_flash_write_data(p_BusInterface, &data, 0x0, 4); + } + return status; +} + +static NTSTATUS +fw_flash_deinit( + IN BUS_INTERFACE_STANDARD *p_BusInterface ) +{ + uint32_t data = 0; + NTSTATUS status = STATUS_SUCCESS; + + status = fw_set_bank(p_BusInterface, 0); + if ( status == STATUS_SUCCESS ) + // Restore origin values + status = fw_access_pciconf(p_BusInterface, FW_WRITE , &old_dir,GPIO_DIR_L , 4); + if ( status == STATUS_SUCCESS ) + status = fw_access_pciconf(p_BusInterface, FW_WRITE , &old_pol,GPIO_POL_L , 4); + if ( status == STATUS_SUCCESS ) + status = fw_access_pciconf(p_BusInterface, FW_WRITE , &old_mod,GPIO_MOD_L , 4); + if ( status == STATUS_SUCCESS ) + status = fw_access_pciconf(p_BusInterface, FW_WRITE , &old_dat,GPIO_DAT_L , 4); + if ( status == STATUS_SUCCESS ) + // Free GPIO Semaphore + status = fw_access_pciconf(p_BusInterface, FW_WRITE , &data, SEMAP63, 4); + return status; +} + +static NTSTATUS +fw_set_bank( + IN BUS_INTERFACE_STANDARD *p_BusInterface, + IN uint32_t bank ) +{ + NTSTATUS status = STATUS_SUCCESS; + uint32_t data = ( (uint32_t)0x70 << 24 ); + uint32_t mask = ((bank >> (BANK_SHIFT-4)) << 24 ); + + status = fw_access_pciconf(p_BusInterface, FW_WRITE , &data, GPIO_DATACLEAR_L, 4); + if (status == STATUS_SUCCESS) + { + // A1 + data &= mask; + //data |= mask; // for A0 + status = fw_access_pciconf(p_BusInterface, FW_WRITE , &data, GPIO_DATASET_L, 4); + } + return status; +} diff --git a/branches/IBFD/hw/mthca/kernel/hca_driver.h b/branches/IBFD/hw/mthca/kernel/hca_driver.h new file mode 100644 index 00000000..4e8fc3b6 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/hca_driver.h @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + + +#if !defined( _HCA_DRIVER_H_ ) +#define _HCA_DRIVER_H_ + + +#include +#include +#include +#include +#include "mthca/mthca_vc.h" +#include "hca_data.h" +#include "mt_l2w.h" +#include "hca_debug.h" + + +#include "hca_pnp.h" +#include "hca_pci.h" + +#if !defined(FILE_DEVICE_INFINIBAND) // Not defined in WXP DDK +#define FILE_DEVICE_INFINIBAND 0x0000003B +#endif + +/****s* HCA/hca_reg_state_t +* NAME +* hca_reg_state_t +* +* DESCRIPTION +* State for tracking registration with AL. This state is independent of the +* device PnP state, and both are used to properly register with AL. +* +* SYNOPSIS +*/ +typedef enum _hca_reg_state +{ + HCA_SHUTDOWN, + HCA_ADDED, + HCA_STARTED, + HCA_IFC_DEREFERENCED, + HCA_REGISTERED + +} hca_reg_state_t; +/* +* VALUES +* HCA_SHUTDOWN +* Cleaning up. +* +* HCA_ADDED +* AddDevice was called and successfully registered for interface +* notifications. +* +* HCA_STARTED +* IRP_MN_START_DEVICE was called. The HCA is fully functional. +* +* HCA_IFC_DEREFERENCED +* DEVICE_QUERY_REMOVE for IBBUS was received. +* +* HCA_REGISTERED +* Fully functional and registered with the bus root. +*********/ + + +typedef enum _hca_bar_type +{ + HCA_BAR_TYPE_HCR, + HCA_BAR_TYPE_UAR, + HCA_BAR_TYPE_DDR, + HCA_BAR_TYPE_MAX + +} hca_bar_type_t; + + +typedef struct _hca_bar +{ + uint64_t phys; + void *virt; + SIZE_T size; + +} hca_bar_t; + + +typedef struct _hca_dev_ext +{ + /* ------------------------------------------------- + * PNP DATA + * ------------------------------------------------ */ + cl_pnp_po_ext_t cl_ext; /* COMPLIB PnP object */ + void * pnp_ifc_entry; /* Notification entry for PnP interface events. */ + void * pnp_target_entry; /* Notification entry for PnP target events. */ + PNP_DEVICE_STATE pnpState; /* state for PnP Manager */ + + /* ------------------------------------------------- + * POWER MANAGER DATA + * ------------------------------------------------ */ + /* Cache of the system to device power states. */ + DEVICE_POWER_STATE DevicePower[PowerSystemMaximum]; + DEVICE_POWER_STATE DevicePowerState; + SYSTEM_POWER_STATE SystemPowerState; + PIO_WORKITEM pPoWorkItem; + + /* ------------------------------------------------- + * IB_AL DATA + * ------------------------------------------------ */ + ib_ci_ifc_t ci_ifc; /* Interface for the lower edge of the IB_AL device. */ + hca_reg_state_t state; /* State for tracking registration with AL */ + DEVICE_OBJECT * p_al_dev; /* IB_AL FDO */ + FILE_OBJECT * p_al_file_obj; /* IB_AL file object */ + UNICODE_STRING al_sym_name; /* IB_AL symbolic name */ + + /* ------------------------------------------------- + * LOW LEVEL DRIVER' DATA + * ------------------------------------------------ */ + mlnx_hca_t hca; + atomic32_t usecnt; /* the number of working applications*/ + cl_spinlock_t uctx_lock; // spinlock for the below chain + cl_qlist_t uctx_list; // chain of user contexts + + /* ------------------------------------------------- + * OS DATA + * ------------------------------------------------ */ + hca_bar_t bar[HCA_BAR_TYPE_MAX]; /* HCA memory bars */ + CM_PARTIAL_RESOURCE_DESCRIPTOR interruptInfo; /* HCA interrupt resources */ + PKINTERRUPT int_obj; /* HCA interrupt object */ + spinlock_t isr_lock; /* lock for the ISR */ + ULONG bus_number; /* HCA's bus number */ + BUS_INTERFACE_STANDARD hcaBusIfc; /* PCI bus interface */ + + /* ------------------------------------------------- + * VARIABLES + * ------------------------------------------------ */ + DMA_ADAPTER * p_dma_adapter; /* HCA adapter object */ + ULONG n_map_regs; /* num of allocated adapter map registers */ + PCI_COMMON_CONFIG hcaConfig; /* saved HCA PCI configuration header */ + int hca_hidden; /* flag: when set - no attached DDR memory */ + +} hca_dev_ext_t; + +#define EXT_FROM_HOB(hob_p) (container_of(hob_p, hca_dev_ext_t, hca.hob)) +#define HCA_FROM_HOB(hob_p) (container_of(hob_p, mlnx_hca_t, hob)) +#define MDEV_FROM_HOB(hob_p) (HCA_FROM_HOB(hob_p)->mdev) +#define IBDEV_FROM_HOB(hob_p) (&EXT_FROM_HOB(hob_p)->hca.mdev->ib_dev) +#define HOBUL_FROM_HOB(hob_p) (&EXT_FROM_HOB(hob_p)->hca.hobul) +#define HOB_FROM_IBDEV(dev_p) (mlnx_hob_t *)&dev_p->mdev->ext->hca.hob + + +#define IB_GET_ERR_STR ib_dev->mdev->ext->ci_ifc.get_err_str +#if DBG || defined( EVENT_TRACING ) +#define PREP_IBDEV_FOR_PRINT(val) struct ib_device *ib_dev = val +#else +#define PREP_IBDEV_FOR_PRINT(val) +#endif + +/*********************************** +Firmware Update definitions +***********************************/ +#define PCI_CONF_ADDR (0x00000058) +#define PCI_CONF_DATA (0x0000005c) +#define FLASH_OFFSET (0x000f01a4) +#define READ_BIT (1<<29) +#define WRITE_BIT (2<<29) +#define ADDR_MSK (0x0007ffff) +#define CMD_MASK (0xe0000000) +#define BANK_SHIFT (19) +#define BANK_MASK (0xfff80000) +#define MAX_FLASH_SIZE (0x80000) // 512K + +#define SEMAP63 (0xf03fc) +#define GPIO_DIR_L (0xf008c) +#define GPIO_POL_L (0xf0094) +#define GPIO_MOD_L (0xf009c) +#define GPIO_DAT_L (0xf0084) +#define GPIO_DATACLEAR_L (0xf00d4) +#define GPIO_DATASET_L (0xf00dc) + +#define CPUMODE (0xf0150) +#define CPUMODE_MSK (0xc0000000UL) +#define CPUMODE_SHIFT (30) + +/* Definitions intended to become shared with UM. Later... */ +#define FW_READ 0x00 +#define FW_WRITE 0x01 +#define FW_READ_CMD 0x08 +#define FW_WRITE_CMD 0x09 +#define FW_OPEN_IF 0xe7 +#define FW_CLOSE_IF 0x7e + +#define FW_SIGNATURE (0x5a445a44) +#define FW_SECT_SIZE (0x10000) + +static inline errno_to_iberr(int err) +{ +#define MAP_ERR(err,ibstatus) case err: ib_status = ibstatus; break + ib_api_status_t ib_status = IB_UNKNOWN_ERROR; + if (err < 0) + err = -err; + switch (err) { + MAP_ERR( ENOENT, IB_NOT_FOUND ); + MAP_ERR( EINTR, IB_INTERRUPTED ); + MAP_ERR( EAGAIN, IB_RESOURCE_BUSY ); + MAP_ERR( ENOMEM, IB_INSUFFICIENT_MEMORY ); + MAP_ERR( EACCES, IB_INVALID_PERMISSION ); + MAP_ERR( EFAULT, IB_ERROR ); + MAP_ERR( EBUSY, IB_RESOURCE_BUSY ); + MAP_ERR( ENODEV, IB_UNSUPPORTED ); + MAP_ERR( EINVAL, IB_INVALID_PARAMETER ); + MAP_ERR( ENOSYS, IB_UNSUPPORTED ); + MAP_ERR( ERANGE, IB_INVALID_SETTING ); + default: + //HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_SHIM, + // "Unmapped errno (%d)\n", err); + break; + } + return ib_status; +} + +#endif /* !defined( _HCA_DRIVER_H_ ) */ diff --git a/branches/IBFD/hw/mthca/kernel/hca_mcast.c b/branches/IBFD/hw/mthca/kernel/hca_mcast.c new file mode 100644 index 00000000..1df61bfa --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/hca_mcast.c @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + + +#include +#include + +#include "hca_driver.h" + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "hca_mcast.tmh" +#endif +#include "mthca_dev.h" + +/* +* Multicast Support Verbs. +*/ +ib_api_status_t +mlnx_attach_mcast ( + IN const ib_qp_handle_t h_qp, + IN const ib_gid_t *p_mcast_gid, + IN const uint16_t mcast_lid, + OUT ib_mcast_handle_t *ph_mcast, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + int err; + ib_api_status_t status; + struct ib_qp *ib_qp_p = (struct ib_qp *)h_qp; + PREP_IBDEV_FOR_PRINT(ib_qp_p->device); + mlnx_mcast_t *mcast_p; + + HCA_ENTER(HCA_DBG_MCAST); + + // sanity checks + if( p_umv_buf && p_umv_buf->command ) { + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MCAST, + ("User mode is not supported yet\n")); + status = IB_UNSUPPORTED; + goto err_user_unsupported; + } + + if( !cl_is_blockable() ) { + status = IB_UNSUPPORTED; + goto err_unsupported; + } + + if (!p_mcast_gid || !ph_mcast) { + status = IB_INVALID_PARAMETER; + goto err_invalid_param; + } + + // allocate structure + mcast_p = (mlnx_mcast_t*)kmalloc(sizeof *mcast_p, GFP_ATOMIC ); + if (mcast_p == NULL) { + status = IB_INSUFFICIENT_MEMORY; + goto err_no_mem; + } + + // attach to mcast group + if( p_umv_buf && p_umv_buf->command ) { + //TODO: call uverbs + } + else { + err = ibv_attach_mcast(ib_qp_p, (union ib_gid *)p_mcast_gid, (u16)mcast_lid); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_MCAST, + ("ibv_attach_mcast failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_attach; + } + } + + // fill the structure + mcast_p->ib_qp_p = ib_qp_p; + mcast_p->mcast_lid = mcast_lid; + RtlCopyMemory(mcast_p->mcast_gid.raw, p_mcast_gid->raw, sizeof *p_mcast_gid); + HCA_PRINT(TRACE_LEVEL_INFORMATION,HCA_DBG_MCAST, + ("mcasth %p, qp_p %p, mlid %hx, mgid %I64x`%I64x\n", + mcast_p, mcast_p->ib_qp_p, mcast_p->mcast_lid, + cl_ntoh64(*(uint64_t*)&mcast_p->mcast_gid.raw[0]), + cl_ntoh64(*(uint64_t*)&mcast_p->mcast_gid.raw[8] ))); + + // return the result + if (ph_mcast) *ph_mcast = (ib_mcast_handle_t)mcast_p; + + status = IB_SUCCESS; + goto end; + +err_attach: + kfree(mcast_p); +err_no_mem: +err_invalid_param: +err_unsupported: +err_user_unsupported: +end: + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_MCAST, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_MCAST); + return status; +} + +ib_api_status_t +mlnx_detach_mcast ( + IN const ib_mcast_handle_t h_mcast) +{ + ib_api_status_t status = IB_INVALID_PARAMETER; + int err; + mlnx_mcast_t *mcast_p = (mlnx_mcast_t*)h_mcast; + struct ib_device *ib_dev; + + + HCA_ENTER(HCA_DBG_MCAST); + // sanity check + if (!mcast_p || !mcast_p->ib_qp_p) + { + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MCAST, + ("completes with ERROR status IB_INVALID_PARAMETER\n")); + status = IB_INVALID_PARAMETER; + goto err_invalid_param; + } + ib_dev = mcast_p->ib_qp_p->device; + + if( !cl_is_blockable() ) { + status = IB_UNSUPPORTED; + goto err_unsupported; + } + + + HCA_PRINT(TRACE_LEVEL_INFORMATION,HCA_DBG_MCAST, + ("mcasth %p, qp_p %p, mlid %hx, mgid %I64x`%I64x\n", + mcast_p, mcast_p->ib_qp_p, mcast_p->mcast_lid, + *(uint64_t*)&mcast_p->mcast_gid.raw[0], + *(uint64_t*)&mcast_p->mcast_gid.raw[8] )); + + // detach + err = ibv_detach_mcast( mcast_p->ib_qp_p, + (union ib_gid *)&mcast_p->mcast_gid, mcast_p->mcast_lid ); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_MCAST, + ("ibv_detach_mcast failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_detach_mcast; + } + + status = IB_SUCCESS; + +err_detach_mcast: + kfree(mcast_p); +err_unsupported: +err_invalid_param: + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_MCAST, + ("completes with ERROR status %d\n", status)); + } + HCA_EXIT(HCA_DBG_MCAST); + return status; +} + + +void +mlnx_mcast_if( + IN OUT ci_interface_t *p_interface ) +{ + p_interface->attach_mcast = mlnx_attach_mcast; + p_interface->detach_mcast = mlnx_detach_mcast; +} diff --git a/branches/IBFD/hw/mthca/kernel/hca_memory.c b/branches/IBFD/hw/mthca/kernel/hca_memory.c new file mode 100644 index 00000000..6e9e90a0 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/hca_memory.c @@ -0,0 +1,609 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + + +#include "hca_utils.h" +#include "mthca_dev.h" + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "hca_memory.tmh" +#endif + +/* + * Memory Management Verbs. + */ + +ib_api_status_t +mlnx_register_mr ( + IN const ib_pd_handle_t h_pd, + IN const ib_mr_create_t *p_mr_create, + OUT net32_t* const p_lkey, + OUT net32_t* const p_rkey, + OUT ib_mr_handle_t *ph_mr, + IN boolean_t um_call ) +{ + ib_api_status_t status; + int err; + struct ib_mr *mr_p; + struct ib_pd *ib_pd_p = (struct ib_pd *)h_pd; + PREP_IBDEV_FOR_PRINT(ib_pd_p->device); + + HCA_ENTER(HCA_DBG_MEMORY); + + // sanity checks + if( !cl_is_blockable() ) { + status = IB_UNSUPPORTED; + goto err_unsupported; + } + if (!p_mr_create || 0 == p_mr_create->length) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_MEMORY, + ("invalid attributes\n")); + status = IB_INVALID_PARAMETER; + goto err_invalid_parm; + } + /* + * Local write permission is required if remote write or + * remote atomic permission is also requested. + */ + if (p_mr_create->access_ctrl & (IB_AC_RDMA_WRITE | IB_AC_ATOMIC) && + !(p_mr_create->access_ctrl & IB_AC_LOCAL_WRITE)) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_MEMORY, + ("invalid access rights\n")); + status = IB_INVALID_PERMISSION; + goto err_invalid_access; + } + + // register mr + mr_p = ibv_reg_mr(ib_pd_p, map_qp_ibal_acl(p_mr_create->access_ctrl), + p_mr_create->vaddr, p_mr_create->length, + (uint64_t)p_mr_create->vaddr, um_call ); + if (IS_ERR(mr_p)) { + err = PTR_ERR(mr_p); + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_MEMORY, + ("ibv_reg_mr failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_reg_mr; + } + + // results + *p_lkey = mr_p->lkey; + *p_rkey = cl_hton32( mr_p->rkey ); + if (ph_mr) *ph_mr = (ib_mr_handle_t)mr_p; + status = IB_SUCCESS; + +err_reg_mr: +err_invalid_access: +err_invalid_parm: +err_unsupported: + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_MEMORY); + return status; +} + +ib_api_status_t +mlnx_register_pmr ( + IN const ib_pd_handle_t h_pd, + IN const ib_phys_create_t* const p_pmr_create, + IN OUT uint64_t* const p_vaddr, + OUT net32_t* const p_lkey, + OUT net32_t* const p_rkey, + OUT ib_mr_handle_t* const ph_mr, + IN boolean_t um_call ) +{ + ib_api_status_t status; + int err; + struct ib_mr *mr_p; + struct ib_phys_buf *buffer_list; + struct ib_pd *ib_pd_p = (struct ib_pd *)h_pd; + PREP_IBDEV_FOR_PRINT(ib_pd_p->device); + + UNUSED_PARAM( um_call ); + + HCA_ENTER(HCA_DBG_MEMORY); + + if (mthca_is_livefish(to_mdev(ib_pd_p->device))) { + mr_p = kzalloc(sizeof *mr_p, GFP_KERNEL); + if (!mr_p) { + status = IB_INSUFFICIENT_MEMORY; + goto err_mem; + } + mr_p->device = ib_pd_p->device; + mr_p->pd = ib_pd_p; + goto done; + } + + // sanity checks + if( !cl_is_blockable() ) { + status = IB_UNSUPPORTED; + goto err_unsupported; + } + if (!p_vaddr || !p_pmr_create || + 0 == p_pmr_create->length ) { + status = IB_INVALID_PARAMETER; + goto err_invalid_parm; + } + + // prepare parameters + buffer_list = (void*)p_pmr_create->range_array; + //NB: p_pmr_create->buf_offset is not used, i.e. supposed that region is page-aligned + //NB: p_pmr_create->hca_page_size is not used, i.e. supposed it is always the same + + // register pmr + if (p_pmr_create->length == (uint64_t)-1i64) + { + mr_p = ibv_get_dma_mr( ib_pd_p, + map_qp_ibal_acl(p_pmr_create->access_ctrl) ); + } + else + mr_p = ibv_reg_phys_mr(ib_pd_p, buffer_list, p_pmr_create->num_ranges, + map_qp_ibal_acl(p_pmr_create->access_ctrl), p_vaddr ); + if (IS_ERR(mr_p)) { + err = PTR_ERR(mr_p); + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_MEMORY, + ("mthca_reg_phys_mr failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_reg_phys_mr; + } + + // results +done: + if (ph_mr) *ph_mr = (ib_mr_handle_t)mr_p; + *p_lkey = mr_p->lkey; + *p_rkey = cl_hton32( mr_p->rkey ); + //NB: p_vaddr was not changed + status = IB_SUCCESS; + +err_reg_phys_mr: +err_invalid_parm: +err_unsupported: +err_mem: + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_MEMORY); + return status; + +} + +ib_api_status_t +mlnx_query_mr ( + IN const ib_mr_handle_t h_mr, + OUT ib_mr_attr_t *p_mr_query ) +{ + UNREFERENCED_PARAMETER(h_mr); + UNREFERENCED_PARAMETER(p_mr_query); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY ,("mlnx_query_mr not implemented\n")); + return IB_UNSUPPORTED; +} + + +ib_api_status_t +mlnx_modify_mr ( + IN const ib_mr_handle_t h_mr, + IN const ib_mr_mod_t mem_modify_req, + IN const ib_mr_create_t *p_mr_create, + OUT uint32_t *p_lkey, + OUT uint32_t *p_rkey, + IN const ib_pd_handle_t h_pd OPTIONAL, + IN boolean_t um_call ) +{ + UNREFERENCED_PARAMETER(h_mr); + UNREFERENCED_PARAMETER(mem_modify_req); + UNREFERENCED_PARAMETER(p_mr_create); + UNREFERENCED_PARAMETER(p_lkey); + UNREFERENCED_PARAMETER(p_rkey); + UNREFERENCED_PARAMETER(h_pd); + UNREFERENCED_PARAMETER(um_call); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY ,("mlnx_modify_mr not implemented\n")); + return IB_UNSUPPORTED; +} + + +ib_api_status_t +mlnx_modify_pmr ( + IN const ib_mr_handle_t h_mr, + IN const ib_mr_mod_t mem_modify_req, + IN const ib_phys_create_t* const p_pmr_create, + IN OUT uint64_t* const p_vaddr, + OUT uint32_t* const p_lkey, + OUT uint32_t* const p_rkey, + IN const ib_pd_handle_t h_pd OPTIONAL, + IN boolean_t um_call ) +{ + UNREFERENCED_PARAMETER(h_mr); + UNREFERENCED_PARAMETER(mem_modify_req); + UNREFERENCED_PARAMETER(p_pmr_create); + UNREFERENCED_PARAMETER(p_vaddr); + UNREFERENCED_PARAMETER(p_lkey); + UNREFERENCED_PARAMETER(p_rkey); + UNREFERENCED_PARAMETER(h_pd); + UNREFERENCED_PARAMETER(um_call); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY ,("mlnx_modify_pmr not implemented\n")); + return IB_UNSUPPORTED; +} + +ib_api_status_t +mlnx_register_smr ( + IN const ib_mr_handle_t h_mr, + IN const ib_pd_handle_t h_pd, + IN const ib_access_t access_ctrl, + IN OUT uint64_t* const p_vaddr, + OUT net32_t* const p_lkey, + OUT net32_t* const p_rkey, + OUT ib_mr_handle_t* const ph_mr, + IN boolean_t um_call ) +{ + UNREFERENCED_PARAMETER(h_mr); + UNREFERENCED_PARAMETER(h_pd); + UNREFERENCED_PARAMETER(access_ctrl); + UNREFERENCED_PARAMETER(p_vaddr); + UNREFERENCED_PARAMETER(p_lkey); + UNREFERENCED_PARAMETER(p_rkey); + UNREFERENCED_PARAMETER(ph_mr); + UNREFERENCED_PARAMETER(um_call); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY ,("mlnx_register_smr not implemented\n")); + return IB_UNSUPPORTED; +} + +ib_api_status_t +mlnx_deregister_mr ( + IN const ib_mr_handle_t h_mr) +{ + ib_api_status_t status; + int err; + struct ib_mr *ib_mr = (struct ib_mr *)h_mr; + PREP_IBDEV_FOR_PRINT(ib_mr->device); + + HCA_ENTER(HCA_DBG_SHIM); + + if (mthca_is_livefish(to_mdev(ib_mr->device))) { + kfree(ib_mr); + goto done; + } + + // sanity checks + if( !cl_is_blockable() ) { + status = IB_UNSUPPORTED; + goto err_unsupported; + } + + // deregister + err = ibv_dereg_mr((struct ib_mr *)h_mr); + if (err) { + status = errno_to_iberr(err); + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_MEMORY, + ("mthca_dereg_mr failed (%d)", status)); + goto err_dereg_mr; + } + +done: + status = IB_SUCCESS; + +err_dereg_mr: +err_unsupported: + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_MEMORY); + return status; + +} + +ib_api_status_t +mlnx_alloc_fmr( + IN const ib_pd_handle_t h_pd, + IN const mlnx_fmr_create_t* const p_fmr_create, + OUT mlnx_fmr_handle_t* const ph_fmr + ) +{ + ib_api_status_t status; + int err; + struct ib_fmr * fmr_p; + struct ib_pd *ib_pd_p = (struct ib_pd *)h_pd; + struct ib_fmr_attr fmr_attr; + PREP_IBDEV_FOR_PRINT(ib_pd_p->device); + + HCA_ENTER(HCA_DBG_MEMORY); + + // sanity checks + if( !cl_is_blockable() ) { + status = IB_UNSUPPORTED; + goto err_unsupported; + } + if (!p_fmr_create ) { + status = IB_INVALID_PARAMETER; + goto err_invalid_parm; + } + // TODO: check Max remap in AL + + // prepare parameters + RtlZeroMemory(&fmr_attr, sizeof(struct ib_fmr_attr)); + fmr_attr.max_maps = p_fmr_create->max_maps; + fmr_attr.max_pages = p_fmr_create->max_pages; + fmr_attr.page_shift = p_fmr_create->page_size; + + // register mr + fmr_p = ibv_alloc_fmr(ib_pd_p, + map_qp_ibal_acl(p_fmr_create->access_ctrl), &fmr_attr); + if (IS_ERR(fmr_p)) { + err = PTR_ERR(fmr_p); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY , + ("mthca_alloc_fmr failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_alloc_fmr; + } + + // results + if (ph_fmr) *ph_fmr = (mlnx_fmr_handle_t)fmr_p; + status = IB_SUCCESS; + +err_alloc_fmr: +err_invalid_parm: +err_unsupported: + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_MEMORY); + return status; + +} + +ib_api_status_t +mlnx_map_phys_fmr ( + IN const mlnx_fmr_handle_t h_fmr, + IN const uint64_t* const page_list, + IN const int list_len, + IN OUT uint64_t* const p_vaddr, + OUT net32_t* const p_lkey, + OUT net32_t* const p_rkey + ) +{ + int err; + ib_api_status_t status; + struct ib_fmr *ib_fmr = (struct ib_fmr *)h_fmr; + uint64_t vaddr = (*p_vaddr) & ~(PAGE_SIZE - 1); + PREP_IBDEV_FOR_PRINT(ib_fmr->device); + + HCA_ENTER(HCA_DBG_MEMORY); + + // mapping + err = ibv_map_phys_fmr(ib_fmr, (u64*)page_list, list_len, (uint64_t)(ULONG_PTR)vaddr); + if (err) { + status = errno_to_iberr(err); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY , + ("ibv_map_phys_fmr failed (%d) for mr %p\n", err, h_fmr)); + goto err_dealloc_fmr; + } + + // return the results + *p_vaddr = vaddr; + *p_lkey = ib_fmr->lkey; + *p_rkey = cl_hton32( ib_fmr->rkey ); + + status = IB_SUCCESS; + +err_dealloc_fmr: + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_MEMORY); + return status; +} + + + +ib_api_status_t +mlnx_unmap_fmr ( + IN const mlnx_fmr_handle_t *ph_fmr) +{ + ib_api_status_t status; + int err; + struct ib_fmr *ib_fmr = (struct ib_fmr *)*ph_fmr; + struct list_head fmr_list; + PREP_IBDEV_FOR_PRINT(ib_fmr->device); + + HCA_ENTER(HCA_DBG_MEMORY); + + // sanity checks + if( !cl_is_blockable() ) { + status = IB_UNSUPPORTED; + goto err_unsupported; + } + + INIT_LIST_HEAD(&fmr_list); + while(*ph_fmr) + { + ib_fmr = (struct ib_fmr*)*ph_fmr; + list_add_tail(&ib_fmr->list, &fmr_list); + ph_fmr ++; + } + + err = ibv_unmap_fmr(&fmr_list); + if (err) { + status = errno_to_iberr(err); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY , + ("ibv_unmap_fmr failed (%d) \n", err)); + goto err_unmap_fmr; + } + + status = IB_SUCCESS; + +err_unmap_fmr: +err_unsupported: + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_MEMORY); + return status; + + +} + + + +ib_api_status_t +mlnx_dealloc_fmr ( + IN const mlnx_fmr_handle_t h_fmr + ) +{ + ib_api_status_t status; + int err; + struct ib_fmr *fmr = (struct ib_fmr *)h_fmr; + PREP_IBDEV_FOR_PRINT(fmr->device); + UNUSED_PARAM_WOWPP(fmr); + + HCA_ENTER(HCA_DBG_MEMORY); + + // sanity checks + if( !cl_is_blockable() ) { + status = IB_UNSUPPORTED; + goto err_unsupported; + } + + + // deregister + err = ibv_dealloc_fmr((struct ib_fmr *)h_fmr); + if (err) { + status = errno_to_iberr(err); + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_MEMORY , + ("ibv_dealloc_fmr failed (%d) for mr %p\n",err, h_fmr)); + goto err_dealloc_fmr; + } + + status = IB_SUCCESS; + +err_dealloc_fmr: +err_unsupported: + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_MEMORY); + return status; + +} + + + +/* +* Memory Window Verbs. +*/ + +ib_api_status_t +mlnx_create_mw ( + IN const ib_pd_handle_t h_pd, + OUT net32_t* const p_rkey, + OUT ib_mw_handle_t *ph_mw, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + UNREFERENCED_PARAMETER(h_pd); + UNREFERENCED_PARAMETER(p_rkey); + UNREFERENCED_PARAMETER(ph_mw); + UNREFERENCED_PARAMETER(p_umv_buf); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY ,("mlnx_create_mw not implemented\n")); + return IB_UNSUPPORTED; +} + +ib_api_status_t +mlnx_query_mw ( + IN const ib_mw_handle_t h_mw, + OUT ib_pd_handle_t *ph_pd, + OUT net32_t* const p_rkey, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + UNREFERENCED_PARAMETER(h_mw); + UNREFERENCED_PARAMETER(ph_pd); + UNREFERENCED_PARAMETER(p_rkey); + UNREFERENCED_PARAMETER(p_umv_buf); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY ,("mlnx_query_mw not implemented\n")); + return IB_UNSUPPORTED; +} + +ib_api_status_t +mlnx_destroy_mw ( + IN const ib_mw_handle_t h_mw) +{ + UNREFERENCED_PARAMETER(h_mw); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_MEMORY ,("mlnx_destroy_mw not implemented\n")); + return IB_UNSUPPORTED; +} + + +void +mlnx_memory_if( + IN OUT ci_interface_t *p_interface ) +{ + p_interface->register_mr = mlnx_register_mr; + p_interface->register_pmr = mlnx_register_pmr; + p_interface->query_mr = mlnx_query_mr; + p_interface->modify_mr = mlnx_modify_mr; + p_interface->modify_pmr = mlnx_modify_pmr; + p_interface->register_smr = mlnx_register_smr; + p_interface->deregister_mr = mlnx_deregister_mr; + + p_interface->alloc_mlnx_fmr = mlnx_alloc_fmr; + p_interface->map_phys_mlnx_fmr = mlnx_map_phys_fmr; + p_interface->unmap_mlnx_fmr = mlnx_unmap_fmr; + p_interface->dealloc_mlnx_fmr = mlnx_dealloc_fmr; + + p_interface->create_mw = mlnx_create_mw; + p_interface->query_mw = mlnx_query_mw; + p_interface->destroy_mw = mlnx_destroy_mw; +} + +void +mlnx_memory_if_livefish( + IN OUT ci_interface_t *p_interface ) +{ + p_interface->register_pmr = mlnx_register_pmr; + p_interface->deregister_mr = mlnx_deregister_mr; +} + + diff --git a/branches/IBFD/hw/mthca/kernel/hca_pci.c b/branches/IBFD/hw/mthca/kernel/hca_pci.c new file mode 100644 index 00000000..7f6afeb2 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/hca_pci.c @@ -0,0 +1,769 @@ + +#include "hca_driver.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "hca_pci.tmh" +#endif +#include +#include +#include + +#define HCA_RESET_HCR_OFFSET 0x000F0010 +#define HCA_RESET_TOKEN CL_HTON32(0x00000001) + +#define PCI_CAPABILITY_ID_VPD 0x03 +#define PCI_CAPABILITY_ID_PCIX 0x07 +#define PCI_CAPABILITY_ID_PCIEXP 0x10 + +boolean_t +FindBridgeIf( + IN hca_dev_ext_t *pi_ext, + IN PBUS_INTERFACE_STANDARD pi_pInterface + ); + + +/* + * Vital Product Data Capability + */ +typedef struct _PCI_VPD_CAPABILITY { + + PCI_CAPABILITIES_HEADER Header; + + USHORT Flags; + ULONG Data; + +} PCI_VPD_CAPABILITY, *PPCI_VPD_CAPABILITY; + + +/* + * PCI-X Capability + */ +typedef struct _PCI_PCIX_CAPABILITY { + + PCI_CAPABILITIES_HEADER Header; + + USHORT Command; + ULONG Status; + +/* for Command: */ +} PCI_PCIX_CAPABILITY, *PPCI_PCIX_CAPABILITY; + +#define PCI_X_CMD_MAX_READ 0x000c /* Max Memory Read Byte Count */ + +/* + * PCI-Express Capability + */ +typedef struct _PCI_PCIEXP_CAPABILITY { + + PCI_CAPABILITIES_HEADER Header; + + USHORT Flags; + ULONG DevCapabilities; + USHORT DevControl; + USHORT DevStatus; + ULONG LinkCapabilities; + USHORT LinkControl; + USHORT LinkStatus; + ULONG SlotCapabilities; + USHORT SlotControl; + USHORT SlotStatus; + USHORT RootControl; + USHORT RootCapabilities; + USHORT RootStatus; +} PCI_PCIEXP_CAPABILITY, *PPCI_PCIEXP_CAPABILITY; + +/* for DevControl: */ +#define PCI_EXP_DEVCTL_READRQ 0x7000 /* Max_Read_Request_Size */ + +static NTSTATUS +__get_bus_ifc( + IN DEVICE_OBJECT* const pDevObj, + IN const GUID* const pGuid, + OUT BUS_INTERFACE_STANDARD *pBusIfc ); + +static void +__fixup_pci_capabilities( + IN PCI_COMMON_CONFIG* const pConfig ); + +static NTSTATUS +__save_pci_config( + IN BUS_INTERFACE_STANDARD *pBusIfc, + OUT PCI_COMMON_CONFIG* const pConfig ); + +static NTSTATUS +__restore_pci_config( + IN BUS_INTERFACE_STANDARD *pBusIfc, + IN PCI_COMMON_CONFIG* const pConfig, + IN const int is_bridge ); + + +#ifdef ALLOC_PRAGMA +#pragma alloc_text (PAGE, __get_bus_ifc) +#pragma alloc_text (PAGE, __fixup_pci_capabilities) +#pragma alloc_text (PAGE, __save_pci_config) +#pragma alloc_text (PAGE, __restore_pci_config) +#endif + +/* + * Returns the offset in configuration space of the PCI-X capabilites. + */ +static ULONG +__FindCapability( + IN PCI_COMMON_CONFIG* const pConfig, + IN char cap_id + ) +{ + ULONG offset = 0; + PCI_CAPABILITIES_HEADER *pHdr = NULL; + UCHAR *pBuf = (UCHAR*)pConfig; + + HCA_ENTER( HCA_DBG_PNP ); + + if ( pConfig->HeaderType == PCI_DEVICE_TYPE ) { + if( pConfig->u.type0.CapabilitiesPtr ) + { + pHdr = (PCI_CAPABILITIES_HEADER*) + (pBuf + pConfig->u.type0.CapabilitiesPtr); + } + } + + if ( pConfig->HeaderType == PCI_BRIDGE_TYPE ) { + if( pConfig->u.type1.CapabilitiesPtr ) + { + pHdr = (PCI_CAPABILITIES_HEADER*) + (pBuf + pConfig->u.type1.CapabilitiesPtr); + } + } + + /* + * Fix up any fields that might cause changes to the + * device - like writing VPD data. + */ + while( pHdr ) + { + if( pHdr->CapabilityID == cap_id ) + { + offset = (UCHAR)(((ULONG_PTR)pHdr) - ((ULONG_PTR)pConfig)); + break; + } + + if( pHdr->Next ) + pHdr = (PCI_CAPABILITIES_HEADER*)(pBuf + pHdr->Next); + else + pHdr = NULL; + } + + HCA_EXIT( HCA_DBG_PNP ); + return offset; +} + +/* Forwards the request to the HCA's PDO. */ +static NTSTATUS +__get_bus_ifc( + IN DEVICE_OBJECT* const pDevObj, + IN const GUID* const pGuid, + OUT BUS_INTERFACE_STANDARD *pBusIfc ) +{ + NTSTATUS status; + IRP *pIrp; + IO_STATUS_BLOCK ioStatus; + IO_STACK_LOCATION *pIoStack; + DEVICE_OBJECT *pDev; + KEVENT event; + + HCA_ENTER( HCA_DBG_PNP ); + + CL_ASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); + + pDev = IoGetAttachedDeviceReference( pDevObj ); + + KeInitializeEvent( &event, NotificationEvent, FALSE ); + + /* Build the IRP for the HCA. */ + pIrp = IoBuildSynchronousFsdRequest( IRP_MJ_PNP, pDev, + NULL, 0, NULL, &event, &ioStatus ); + if( !pIrp ) + { + ObDereferenceObject( pDev ); + HCA_PRINT( TRACE_LEVEL_ERROR,HCA_DBG_PNP, + ("IoBuildSynchronousFsdRequest failed.\n")); + return STATUS_INSUFFICIENT_RESOURCES; + } + + /* Copy the request query parameters. */ + pIoStack = IoGetNextIrpStackLocation( pIrp ); + pIoStack->MinorFunction = IRP_MN_QUERY_INTERFACE; + pIoStack->Parameters.QueryInterface.Size = sizeof(BUS_INTERFACE_STANDARD); + pIoStack->Parameters.QueryInterface.Version = 1; + pIoStack->Parameters.QueryInterface.InterfaceType = pGuid; + pIoStack->Parameters.QueryInterface.Interface = (INTERFACE*)pBusIfc; + pIoStack->Parameters.QueryInterface.InterfaceSpecificData = NULL; + + pIrp->IoStatus.Status = STATUS_NOT_SUPPORTED; + + /* Send the IRP. */ + status = IoCallDriver( pDev, pIrp ); + if( status == STATUS_PENDING ) + { + KeWaitForSingleObject( &event, Executive, KernelMode, + FALSE, NULL ); + + status = ioStatus.Status; + } + ObDereferenceObject( pDev ); + + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + + +/* + * Reads and saves the PCI configuration of the device accessible + * through the provided bus interface. Does not read registers 22 or 23 + * as directed in Tavor PRM 1.0.1, Appendix A. InfiniHost Software Reset. + */ +static NTSTATUS +__save_pci_config( + IN BUS_INTERFACE_STANDARD *pBusIfc, + OUT PCI_COMMON_CONFIG* const pConfig ) +{ + ULONG len; + UINT32 *pBuf; + + HCA_ENTER( HCA_DBG_PNP ); + + pBuf = (UINT32*)pConfig; + + /* + * Read the lower portion of the configuration, up to but excluding + * register 22. + */ + len = pBusIfc->GetBusData( + pBusIfc->Context, PCI_WHICHSPACE_CONFIG, &pBuf[0], 0, 88 ); + if( len != 88 ) + { + HCA_PRINT( TRACE_LEVEL_ERROR , HCA_DBG_PNP ,("Failed to read HCA config.\n")); + return STATUS_DEVICE_NOT_READY; + } + + /* Read the upper portion of the configuration, from register 24. */ + len = pBusIfc->GetBusData( + pBusIfc->Context, PCI_WHICHSPACE_CONFIG, &pBuf[24], 96, 160 ); + if( len != 160 ) + { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP ,("Failed to read HCA config.\n")); + return STATUS_DEVICE_NOT_READY; + } + + HCA_EXIT( HCA_DBG_PNP ); + return STATUS_SUCCESS; +} + + +static void +__fixup_pci_capabilities( + IN PCI_COMMON_CONFIG* const pConfig ) +{ + UCHAR *pBuf; + PCI_CAPABILITIES_HEADER *pHdr, *pNextHdr; + + HCA_ENTER( HCA_DBG_PNP ); + + pBuf = (UCHAR*)pConfig; + + if( pConfig->HeaderType == PCI_DEVICE_TYPE ) + { + if( pConfig->u.type0.CapabilitiesPtr ) + { + pNextHdr = (PCI_CAPABILITIES_HEADER*) + (pBuf + pConfig->u.type0.CapabilitiesPtr); + } + else + { + pNextHdr = NULL; + } + } + else + { + ASSERT( pConfig->HeaderType == PCI_BRIDGE_TYPE ); + if( pConfig->u.type1.CapabilitiesPtr ) + { + pNextHdr = (PCI_CAPABILITIES_HEADER*) + (pBuf + pConfig->u.type1.CapabilitiesPtr); + } + else + { + pNextHdr = NULL; + } + } + + /* + * Fix up any fields that might cause changes to the + * device - like writing VPD data. + */ + while( pNextHdr ) + { + pHdr = pNextHdr; + if( pNextHdr->Next ) + pNextHdr = (PCI_CAPABILITIES_HEADER*)(pBuf + pHdr->Next); + else + pNextHdr = NULL; + + switch( pHdr->CapabilityID ) + { + case PCI_CAPABILITY_ID_VPD: + /* Clear the flags field so we don't cause a write. */ + ((PCI_VPD_CAPABILITY*)pHdr)->Flags = 0; + break; + + default: + break; + } + } + + HCA_EXIT( HCA_DBG_PNP ); +} + + +/* + * Restore saved PCI configuration, skipping registers 22 and 23, as well + * as any registers where writing will have side effects such as the flags + * field of the VPD and vendor specific capabilities. The function also delays + * writing the command register, bridge control register (if applicable), and + * PCIX command register (if present). + */ +static NTSTATUS +__restore_pci_config( + IN BUS_INTERFACE_STANDARD *pBusIfc, + IN PCI_COMMON_CONFIG* const pConfig, + IN const int is_bridge ) +{ + NTSTATUS status = STATUS_SUCCESS; + int i, *pci_hdr = (int*)pConfig; + int hca_pcix_cap = 0; + + HCA_ENTER( HCA_DBG_PNP ); + + /* get capabilities */ + hca_pcix_cap = __FindCapability( pConfig, PCI_CAPABILITY_ID_PCIX ); + + /* restore capabilities*/ + if (is_bridge) { + if ( 4 != pBusIfc->SetBusData( pBusIfc->Context, PCI_WHICHSPACE_CONFIG, + &pci_hdr[(hca_pcix_cap + 0x8) / 4], hca_pcix_cap + 0x8, 4) ) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Couldn't restore HCA bridge Upstream split transaction control, aborting.\n")); + status = STATUS_UNSUCCESSFUL; + goto out; + } + if ( 4 != pBusIfc->SetBusData( pBusIfc->Context, PCI_WHICHSPACE_CONFIG, + &pci_hdr[(hca_pcix_cap + 0xc) / 4], hca_pcix_cap + 0xc, 4) ) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Couldn't restore HCA bridge Downstream split transaction control, aborting.\n")); + status = STATUS_UNSUCCESSFUL; + goto out; + } + } + else { + int hca_pcie_cap = __FindCapability( pConfig, PCI_CAPABILITY_ID_PCIEXP ); + PCI_PCIEXP_CAPABILITY *pPciExpCap = (PCI_PCIEXP_CAPABILITY*)(((UCHAR*)pConfig) + hca_pcie_cap); + + if (hca_pcix_cap) { + if ( 4 != pBusIfc->SetBusData( pBusIfc->Context, PCI_WHICHSPACE_CONFIG, + &pci_hdr[hca_pcix_cap/4], hca_pcix_cap, 4) ) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Couldn't restore HCA PCI-X command register, aborting.\n")); + status = STATUS_UNSUCCESSFUL; + goto out; + } + } + + if (hca_pcie_cap) { + /* restore HCA PCI Express Device Control register */ + if ( sizeof( pPciExpCap->DevControl ) != pBusIfc->SetBusData( + pBusIfc->Context, PCI_WHICHSPACE_CONFIG, + &pPciExpCap->DevControl, hca_pcie_cap + + offsetof( PCI_PCIEXP_CAPABILITY, DevControl), + sizeof( pPciExpCap->DevControl ) )) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Couldn't restore HCA PCI Express Device Control register, aborting.\n")); + status = STATUS_UNSUCCESSFUL; + goto out; + } + /* restore HCA PCI Express Link Control register */ + if ( sizeof( pPciExpCap->LinkControl ) != pBusIfc->SetBusData( + pBusIfc->Context, PCI_WHICHSPACE_CONFIG, + &pPciExpCap->LinkControl, hca_pcie_cap + + offsetof( PCI_PCIEXP_CAPABILITY, LinkControl), + sizeof( pPciExpCap->LinkControl ) )) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Couldn't restore HCA PCI Express Link Control register, aborting.\n")); + status = STATUS_UNSUCCESSFUL; + goto out; + } + } + } + + /* write basic part */ + for (i = 0; i < 16; ++i) { + if (i == 1) + continue; + + if (4 != pBusIfc->SetBusData( pBusIfc->Context, + PCI_WHICHSPACE_CONFIG, &pci_hdr[i], i * 4, 4 )) { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP , + ("Couldn't restore PCI cfg reg %x, aborting.\n", i)); + status = STATUS_DEVICE_NOT_READY; + goto out; + } + } + + /* Write the command register. */ + if (4 != pBusIfc->SetBusData( pBusIfc->Context, + PCI_WHICHSPACE_CONFIG, &pci_hdr[1], 4, 4 )) { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP ,("Couldn't restore COMMAND.\n")); + status = STATUS_DEVICE_NOT_READY; + } + +out: + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + +NTSTATUS +hca_reset( DEVICE_OBJECT* const pDevObj, int is_tavor ) +{ + NTSTATUS status = STATUS_SUCCESS; + PCI_COMMON_CONFIG hcaConfig, brConfig; + BUS_INTERFACE_STANDARD hcaBusIfc; + BUS_INTERFACE_STANDARD brBusIfc = {0}; // to bypass C4701 + hca_dev_ext_t *pExt = (hca_dev_ext_t*)pDevObj->DeviceExtension; + + HCA_ENTER( HCA_DBG_PNP ); + + /* sanity check */ + if (is_tavor && g_skip_tavor_reset) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_PNP ,("Card reset is skipped, trying to proceed.\n")); + goto resetExit; + } + + /* get the resources */ + { + /* Get the HCA's bus interface. */ + status = __get_bus_ifc( pDevObj, &GUID_BUS_INTERFACE_STANDARD, &hcaBusIfc ); + if( !NT_SUCCESS( status ) ) { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP ,("Failed to get HCA bus interface.\n")); + goto resetErr1; + } + + /* Get the HCA Bridge's bus interface, if any */ + if (is_tavor) { + if (!FindBridgeIf( pExt, &brBusIfc )) + goto resetErr2; + } + } + + /* Save the HCA's PCI configuration headers */ + { + status = __save_pci_config( &hcaBusIfc, &hcaConfig ); + if( !NT_SUCCESS( status ) ) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Failed to save HCA config.\n")); + goto resetErr3; + } + + /* Save the HCA bridge's configuration, if any */ + if (is_tavor) { + int hca_pcix_cap; + status = __save_pci_config( &brBusIfc, &brConfig ); + if( !NT_SUCCESS( status ) ) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Failed to save bridge config.\n")); + goto resetErr3; + } + hca_pcix_cap = __FindCapability( &brConfig, PCI_CAPABILITY_ID_PCIX ); + if (!hca_pcix_cap) { + status = STATUS_UNSUCCESSFUL; + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Couldn't locate HCA bridge PCI-X capability, aborting.\n")); + goto resetErr3; + } + } + } + + /* reset the card */ + { + PULONG reset_p; + PHYSICAL_ADDRESS pa; + /* map reset register */ + pa.QuadPart = pExt->bar[HCA_BAR_TYPE_HCR].phys + (uint64_t)HCA_RESET_HCR_OFFSET; + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_PNP ,("Mapping reset register with address 0x%I64x\n", pa.QuadPart)); + reset_p = MmMapIoSpace( pa, 4, MmNonCached ); + if( !reset_p ) { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP ,("Failed to map reset register with address 0x%I64x\n", pa.QuadPart)); + status = STATUS_UNSUCCESSFUL; + goto resetErr3; + } + + /* Issue the reset. */ + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_PNP ,("Resetting the chip ...\n")); + WRITE_REGISTER_ULONG( reset_p, HCA_RESET_TOKEN ); + + /* unmap the reset register */ + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_PNP ,("Unmapping reset register \n")); + MmUnmapIoSpace( reset_p, 4 ); + + /* Wait a second. */ + cl_thread_suspend( 1000 ); + } + + /* Read the configuration register until it doesn't return 0xFFFFFFFF */ + { + ULONG data, i, reset_failed = 1; + BUS_INTERFACE_STANDARD *p_ifc = (is_tavor) ? &brBusIfc : &hcaBusIfc; + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_PNP ,("Read the configuration register \n")); + for( i = 0; i < 100; i++ ) { + if (4 != p_ifc->GetBusData( p_ifc->Context, + PCI_WHICHSPACE_CONFIG, &data, 0, 4)) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Failed to read device configuration data. Card reset failed !\n")); + status = STATUS_UNSUCCESSFUL; + break; + } + /* See if we got valid data. */ + if( data != 0xFFFFFFFF ) { + reset_failed = 0; + break; + } + + cl_thread_suspend( 100 ); + } + + if (reset_failed) { + /* on Tavor reset failure, if configured so, we disable the reset for next time */ + if (is_tavor && g_disable_tavor_reset) + set_skip_tavor_reset(); + + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Doh! PCI device did not come back after reset!\n")); + status = STATUS_UNSUCCESSFUL; + goto resetErr3; + } + } + + /* restore the HCA's PCI configuration headers */ + { + if (is_tavor) { + /* Restore the HCA's bridge configuration. */ + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_PNP ,("Restoring bridge PCI configuration \n")); + status = __restore_pci_config( &brBusIfc, &brConfig, TRUE ); + if( !NT_SUCCESS( status ) ) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Failed to restore bridge config. Card reset failed !\n")); + goto resetErr3; + } + } + + /* Restore the HCA's configuration. */ + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_PNP ,("Restoring HCA PCI configuration \n")); + status = __restore_pci_config( &hcaBusIfc, &hcaConfig, FALSE ); + if( !NT_SUCCESS( status ) ) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Failed to restore HCA config. Card reset failed !\n")); + } + } + +resetErr3: + if (is_tavor) + brBusIfc.InterfaceDereference( brBusIfc.Context ); + +resetErr2: + hcaBusIfc.InterfaceDereference( hcaBusIfc.Context ); + +resetErr1: +resetExit: + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + + +/* + * Tunes PCI configuration as described in 13.3.2 in the Tavor PRM. + */ +NTSTATUS +hca_tune_pci( + IN DEVICE_OBJECT* const pDevObj, + OUT uplink_info_t *p_uplink_info ) +{ + NTSTATUS status; + PCI_COMMON_CONFIG hcaConfig; + BUS_INTERFACE_STANDARD hcaBusIfc; + ULONG len; + ULONG capOffset; + PCI_PCIX_CAPABILITY *pPciXCap; + PCI_PCIEXP_CAPABILITY *pPciExpCap; + + HCA_ENTER( HCA_DBG_PNP ); + + /* Get the HCA's bus interface. */ + status = __get_bus_ifc( pDevObj, &GUID_BUS_INTERFACE_STANDARD, &hcaBusIfc ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP ,("Failed to get HCA bus interface.\n")); + return status; + } + + /* Save the HCA's configuration. */ + status = __save_pci_config( &hcaBusIfc, &hcaConfig ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Failed to save HCA config.\n")); + status = STATUS_UNSUCCESSFUL; + goto tweakErr; + } + status = 0; + + /* + * PCIX Capability + */ + capOffset = __FindCapability( &hcaConfig, PCI_CAPABILITY_ID_PCIX ); + if( capOffset ) + { + pPciXCap = (PCI_PCIX_CAPABILITY*)(((UCHAR*)&hcaConfig) + capOffset); + + /* fill uplink features */ + p_uplink_info->bus_type = UPLINK_BUS_PCIX; + if (pPciXCap->Status & (1 << 17)) + p_uplink_info->u.pci_x.capabilities = UPLINK_BUS_PCIX_133; + + /* Update the command field to max the read byte count if needed. */ + if ( g_tune_pci && (pPciXCap->Command & 0x000C) != 0x000C ) + { + HCA_PRINT( TRACE_LEVEL_WARNING, HCA_DBG_PNP, + ("Updating max recv byte count of PCI-X capability.\n")); + pPciXCap->Command = (pPciXCap->Command & ~PCI_X_CMD_MAX_READ) | (3 << 2); + len = hcaBusIfc.SetBusData( hcaBusIfc.Context, PCI_WHICHSPACE_CONFIG, + &pPciXCap->Command, + capOffset + offsetof( PCI_PCIX_CAPABILITY, Command), + sizeof( pPciXCap->Command ) ); + if( len != sizeof( pPciXCap->Command ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Failed to update PCI-X maximum read byte count.\n")); + status = STATUS_UNSUCCESSFUL; + goto tweakErr; + } + } + } + + + /* + * PCI Express Capability + */ + capOffset = __FindCapability( &hcaConfig, PCI_CAPABILITY_ID_PCIEXP ); + if( capOffset ) + { + pPciExpCap = (PCI_PCIEXP_CAPABILITY*)(((UCHAR*)&hcaConfig) + capOffset); + + /* fill uplink features */ + p_uplink_info->bus_type = UPLINK_BUS_PCIE; + if ((pPciExpCap->LinkStatus & 15) == 1) + p_uplink_info->u.pci_e.link_speed = UPLINK_BUS_PCIE_SDR; + if ((pPciExpCap->LinkStatus & 15) == 2) + p_uplink_info->u.pci_e.link_speed = UPLINK_BUS_PCIE_DDR; + p_uplink_info->u.pci_e.link_width = (uint8_t)((pPciExpCap->LinkStatus >> 4) & 0x03f); + p_uplink_info->u.pci_e.capabilities = (uint8_t)((pPciExpCap->LinkCapabilities >> 2) & 0xfc); + p_uplink_info->u.pci_e.capabilities |= pPciExpCap->LinkCapabilities & 3; + + if (g_tune_pci) { + /* Update Max_Read_Request_Size. */ + HCA_PRINT( TRACE_LEVEL_WARNING ,HCA_DBG_PNP, + ("Updating max recv byte count of PCI-Express capability.\n")); + pPciExpCap->DevControl = (pPciExpCap->DevControl & ~PCI_EXP_DEVCTL_READRQ) | (5 << 12); + len = hcaBusIfc.SetBusData( hcaBusIfc.Context, PCI_WHICHSPACE_CONFIG, + &pPciExpCap->DevControl, + capOffset + offsetof( PCI_PCIEXP_CAPABILITY, DevControl), + sizeof( pPciExpCap->DevControl ) ); + if( len != sizeof( pPciExpCap->DevControl ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Failed to update PCI-Exp maximum read byte count.\n")); + goto tweakErr; + } + } + } + + +tweakErr: + hcaBusIfc.InterfaceDereference( hcaBusIfc.Context ); + + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + + +/* leo */ + +NTSTATUS +hca_enable_pci( + IN DEVICE_OBJECT* const pDevObj, + OUT PBUS_INTERFACE_STANDARD phcaBusIfc, + OUT PCI_COMMON_CONFIG* pHcaConfig + ) +{ + NTSTATUS status; + ULONG len; + + HCA_ENTER( HCA_DBG_PNP ); + + /* Get the HCA's bus interface. */ + status = __get_bus_ifc( pDevObj, &GUID_BUS_INTERFACE_STANDARD, phcaBusIfc ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR , HCA_DBG_PNP ,("Failed to get HCA bus interface.\n")); + return STATUS_DEVICE_NOT_READY; + } + + /* Save the HCA's configuration. */ + status = __save_pci_config( phcaBusIfc, pHcaConfig ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Failed to save HCA config.\n")); + goto pciErr; + } + + /* fix command register (set PCI Master bit) */ + // NOTE: we change here the saved value of the command register + pHcaConfig->Command |= 7; + len = phcaBusIfc->SetBusData( phcaBusIfc->Context, PCI_WHICHSPACE_CONFIG, + (PVOID)&pHcaConfig->Command , 4, sizeof(ULONG) ); + if( len != sizeof(ULONG) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP ,("Failed to write command register.\n")); + status = STATUS_DEVICE_NOT_READY; + goto pciErr; + } + status = STATUS_SUCCESS; + goto out; + + pciErr: + phcaBusIfc->InterfaceDereference( phcaBusIfc->Context ); + phcaBusIfc->InterfaceDereference = NULL; + out: + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + +void hca_disable_pci(PBUS_INTERFACE_STANDARD phcaBusIfc) +{ + // no need to disable the card, so just release the PCI bus i/f + if (phcaBusIfc->InterfaceDereference) { + phcaBusIfc->InterfaceDereference( phcaBusIfc->Context ); + phcaBusIfc->InterfaceDereference = NULL; + } +} + diff --git a/branches/IBFD/hw/mthca/kernel/hca_pci.h b/branches/IBFD/hw/mthca/kernel/hca_pci.h new file mode 100644 index 00000000..dd8e9c0e --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/hca_pci.h @@ -0,0 +1,24 @@ +#ifndef HCI_PCI_H +#define HCI_PCI_H + + +NTSTATUS +hca_reset( + IN DEVICE_OBJECT* const pDevObj, int is_tavor ); + +NTSTATUS +hca_enable_pci( + IN DEVICE_OBJECT* const pDevObj, + OUT PBUS_INTERFACE_STANDARD phcaBusIfc, + OUT PCI_COMMON_CONFIG* pHcaConfig + ); + +void hca_disable_pci( + IN PBUS_INTERFACE_STANDARD phcaBusIfc); + +NTSTATUS + hca_tune_pci( + IN DEVICE_OBJECT* const pDevObj, + OUT uplink_info_t *p_uplink_info ); + +#endif diff --git a/branches/IBFD/hw/mthca/kernel/hca_pnp.c b/branches/IBFD/hw/mthca/kernel/hca_pnp.c new file mode 100644 index 00000000..9e4a4c0a --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/hca_pnp.c @@ -0,0 +1,1769 @@ +/* BEGIN_ICS_COPYRIGHT **************************************** +** END_ICS_COPYRIGHT ****************************************/ + +/* + $Revision: 1.1 $ +*/ + + +/* + * Provides the driver entry points for the Tavor VPD. + */ + +#include "hca_driver.h" +#include "mthca_dev.h" + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "hca_pnp.tmh" +#endif +#include "mthca.h" +#include +#include + +extern const char *mthca_version; + +static NTSTATUS +hca_start( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); + +static NTSTATUS +hca_query_stop( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); + +static NTSTATUS +hca_stop( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); + +static NTSTATUS +hca_cancel_stop( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); + +static NTSTATUS +hca_query_remove( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); + +static void +hca_release_resources( + IN DEVICE_OBJECT* const p_dev_obj ); + +static NTSTATUS +hca_cancel_remove( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); + +static NTSTATUS +hca_surprise_remove( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); + +static NTSTATUS +hca_query_capabilities( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); + +static NTSTATUS +hca_query_pnp_state( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); + +static NTSTATUS +hca_query_bus_relations( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); + +static NTSTATUS +hca_query_removal_relations( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); + +static NTSTATUS +hca_query_power( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); + +static NTSTATUS +hca_set_power( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ); + +static ci_interface_t* +__alloc_hca_ifc( + IN hca_dev_ext_t* const p_ext ); + +static NTSTATUS +__get_ci_interface( + IN DEVICE_OBJECT* const p_dev_obj ); + +static void +__hca_deregister( + IN hca_dev_ext_t *p_ext ); + +static NTSTATUS +__hca_register( + IN DEVICE_OBJECT *p_dev_obj ); + +static NTSTATUS +__pnp_notify_target( + IN void *pNotifyStruct, + IN void *context ); + +static NTSTATUS +__pnp_notify_ifc( + IN void *pNotifyStruct, + IN void *context ); + + +#ifdef ALLOC_PRAGMA +#pragma alloc_text (PAGE, hca_add_device) +#pragma alloc_text (PAGE, hca_start) +#pragma alloc_text (PAGE, hca_query_stop) +#pragma alloc_text (PAGE, hca_stop) +#pragma alloc_text (PAGE, hca_cancel_stop) +#pragma alloc_text (PAGE, hca_query_remove) +#pragma alloc_text (PAGE, hca_release_resources) +#pragma alloc_text (PAGE, hca_cancel_remove) +#pragma alloc_text (PAGE, hca_surprise_remove) +#pragma alloc_text (PAGE, hca_query_capabilities) +#pragma alloc_text (PAGE, hca_query_pnp_state) +#pragma alloc_text (PAGE, hca_query_bus_relations) +#pragma alloc_text (PAGE, hca_query_removal_relations) +#pragma alloc_text (PAGE, hca_set_power) +#pragma alloc_text (PAGE, __alloc_hca_ifc) +#pragma alloc_text (PAGE, __get_ci_interface) +#pragma alloc_text (PAGE, __hca_register) +#pragma alloc_text (PAGE, __pnp_notify_target) +#pragma alloc_text (PAGE, __pnp_notify_ifc) +#endif + + +static cl_vfptr_pnp_po_t vfptrHcaPnp; + + +void +hca_init_vfptr( void ) +{ + vfptrHcaPnp.identity = "HCA driver"; + vfptrHcaPnp.pfn_start = hca_start; + vfptrHcaPnp.pfn_query_stop = hca_query_stop; + vfptrHcaPnp.pfn_stop = hca_stop; + vfptrHcaPnp.pfn_cancel_stop = hca_cancel_stop; + vfptrHcaPnp.pfn_query_remove = hca_query_remove; + vfptrHcaPnp.pfn_release_resources = hca_release_resources; + vfptrHcaPnp.pfn_remove = cl_do_remove; + vfptrHcaPnp.pfn_cancel_remove = hca_cancel_remove; + vfptrHcaPnp.pfn_surprise_remove = hca_surprise_remove; + vfptrHcaPnp.pfn_query_capabilities = hca_query_capabilities; + vfptrHcaPnp.pfn_query_pnp_state = hca_query_pnp_state; + vfptrHcaPnp.pfn_filter_res_req = cl_irp_skip; + vfptrHcaPnp.pfn_dev_usage_notification = cl_do_sync_pnp; + vfptrHcaPnp.pfn_query_bus_relations = hca_query_bus_relations; + vfptrHcaPnp.pfn_query_ejection_relations = cl_irp_ignore; + vfptrHcaPnp.pfn_query_removal_relations = hca_query_removal_relations; + vfptrHcaPnp.pfn_query_target_relations = cl_irp_ignore; + vfptrHcaPnp.pfn_unknown = cl_irp_ignore; + vfptrHcaPnp.pfn_query_resources = cl_irp_ignore; + vfptrHcaPnp.pfn_query_res_req = cl_irp_ignore; + vfptrHcaPnp.pfn_query_bus_info = cl_irp_ignore; + vfptrHcaPnp.pfn_query_interface = cl_irp_ignore; + vfptrHcaPnp.pfn_read_config = cl_irp_ignore; + vfptrHcaPnp.pfn_write_config = cl_irp_ignore; + vfptrHcaPnp.pfn_eject = cl_irp_ignore; + vfptrHcaPnp.pfn_set_lock = cl_irp_ignore; + vfptrHcaPnp.pfn_query_power = hca_query_power; + vfptrHcaPnp.pfn_set_power = hca_set_power; + vfptrHcaPnp.pfn_power_sequence = cl_irp_ignore; + vfptrHcaPnp.pfn_wait_wake = cl_irp_ignore; +} + + +NTSTATUS +hca_add_device( + IN PDRIVER_OBJECT pDriverObj, + IN PDEVICE_OBJECT pPdo ) +{ + NTSTATUS status; + DEVICE_OBJECT *p_dev_obj, *pNextDevObj; + hca_dev_ext_t *p_ext; + + HCA_ENTER(HCA_DBG_PNP); + + /* + * Create the device so that we have a device extension to store stuff in. + */ + status = IoCreateDevice( pDriverObj, sizeof(hca_dev_ext_t), + NULL, FILE_DEVICE_INFINIBAND, FILE_DEVICE_SECURE_OPEN, + FALSE, &p_dev_obj ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoCreateDevice returned 0x%08X.\n", status)); + return status; + } + + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; + cl_memclr( p_ext, sizeof(hca_dev_ext_t) ); + cl_spinlock_init( &p_ext->uctx_lock ); + cl_qlist_init( &p_ext->uctx_list ); + atomic_set(&p_ext->usecnt, 0); + + /* Attach to the device stack. */ + pNextDevObj = IoAttachDeviceToDeviceStack( p_dev_obj, pPdo ); + if( !pNextDevObj ) + { + //cl_event_destroy( &p_ext->mutex ); + IoDeleteDevice( p_dev_obj ); + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoAttachDeviceToDeviceStack failed.\n")); + return STATUS_NO_SUCH_DEVICE; + } + + /* Inititalize the complib extension. */ + cl_init_pnp_po_ext( p_dev_obj, pNextDevObj, pPdo, 0, + &vfptrHcaPnp, NULL ); + + p_ext->state = HCA_ADDED; + + HCA_EXIT(HCA_DBG_PNP); + return status; +} + + +static NTSTATUS +__get_ci_interface( + IN DEVICE_OBJECT* const p_dev_obj ) +{ + NTSTATUS status; + IRP *p_irp; + hca_dev_ext_t *p_ext; + IO_STATUS_BLOCK ioStatus; + IO_STACK_LOCATION *pIoStack; + KEVENT event; + + HCA_ENTER( HCA_DBG_PNP ); + + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; + + KeInitializeEvent( &event, NotificationEvent, FALSE ); + + /* Query for the verbs interface. */ + p_irp = IoBuildSynchronousFsdRequest( IRP_MJ_PNP, p_ext->p_al_dev, + NULL, 0, NULL, &event, &ioStatus ); + if( !p_irp ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoBuildSynchronousFsdRequest failed.\n")); + return STATUS_INSUFFICIENT_RESOURCES; + } + + /* Format the IRP. */ + pIoStack = IoGetNextIrpStackLocation( p_irp ); + pIoStack->MinorFunction = IRP_MN_QUERY_INTERFACE; + pIoStack->Parameters.QueryInterface.Version = IB_CI_INTERFACE_VERSION; + pIoStack->Parameters.QueryInterface.Size = sizeof(ib_ci_ifc_t); + pIoStack->Parameters.QueryInterface.Interface = + (INTERFACE*)&p_ext->ci_ifc; + pIoStack->Parameters.QueryInterface.InterfaceSpecificData = NULL; + pIoStack->Parameters.QueryInterface.InterfaceType = + &GUID_IB_CI_INTERFACE; + p_irp->IoStatus.Status = STATUS_NOT_SUPPORTED; + + /* Send the IRP. */ + status = IoCallDriver( p_ext->p_al_dev, p_irp ); + if( status == STATUS_PENDING ) + { + KeWaitForSingleObject( &event, Executive, KernelMode, + FALSE, NULL ); + + status = ioStatus.Status; + } + + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR,HCA_DBG_PNP, + ("Query interface for verbs returned %08x.\n", status)); + return status; + } + + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + + +static NTSTATUS +__pnp_notify_target( + IN void *pNotifyStruct, + IN void *context ) +{ + NTSTATUS status = STATUS_SUCCESS; + DEVICE_OBJECT *p_dev_obj; + hca_dev_ext_t *p_ext; + TARGET_DEVICE_REMOVAL_NOTIFICATION *pNotify; + + HCA_ENTER( HCA_DBG_PNP ); + + pNotify = (TARGET_DEVICE_REMOVAL_NOTIFICATION*)pNotifyStruct; + p_dev_obj = (DEVICE_OBJECT*)context; + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; + + if( IsEqualGUID( &pNotify->Event, &GUID_TARGET_DEVICE_QUERY_REMOVE ) ) + { + if ( p_ext->state == HCA_REGISTERED) { + /* Release AL's CI interface. */ + p_ext->ci_ifc.wdm.InterfaceDereference( p_ext->ci_ifc.wdm.Context ); + p_ext->state = HCA_IFC_DEREFERENCED; + } + + /* Release AL's file object so that it can unload. */ + CL_ASSERT( p_ext->p_al_dev ); + CL_ASSERT( p_ext->p_al_file_obj ); + CL_ASSERT( p_ext->p_al_file_obj == pNotify->FileObject ); + if( p_ext->p_al_file_obj ) { + ObDereferenceObject( p_ext->p_al_file_obj ); + p_ext->p_al_file_obj = NULL; + p_ext->p_al_dev = NULL; + } + } + else if( IsEqualGUID( &pNotify->Event, + &GUID_TARGET_DEVICE_REMOVE_COMPLETE ) ) + { + if (p_ext->ci_ifc.deregister_ca) { + /* Notify AL that the CA is being removed. */ + p_ext->ci_ifc.deregister_ca( p_ext->hca.guid ); + p_ext->ci_ifc.deregister_ca = NULL; + } + + if ( p_ext->state == HCA_REGISTERED) { + /* Release AL's CI interface. */ + p_ext->ci_ifc.wdm.InterfaceDereference( p_ext->ci_ifc.wdm.Context ); + } + p_ext->state = HCA_STARTED; + + /* Release AL's file object so that it can unload. */ + if( p_ext->p_al_file_obj ) + { + ObDereferenceObject( p_ext->p_al_file_obj ); + p_ext->p_al_file_obj = NULL; + p_ext->p_al_dev = NULL; + } + + /* Cancel our target device change registration. */ + if (p_ext->pnp_target_entry) { + IoUnregisterPlugPlayNotification( p_ext->pnp_target_entry ); + p_ext->pnp_target_entry = NULL; + } + + } + else if( IsEqualGUID( &pNotify->Event, + &GUID_TARGET_DEVICE_REMOVE_CANCELLED ) ) + { + /* Cancel our target device change registration. */ + if (p_ext->pnp_target_entry) { + IoUnregisterPlugPlayNotification( p_ext->pnp_target_entry ); + p_ext->pnp_target_entry = NULL; + } + + /* Get the device object pointer for the AL. */ + CL_ASSERT( !p_ext->p_al_file_obj ); + CL_ASSERT( !p_ext->p_al_dev ); + /* Get the AL device object. */ + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM ,("Calling IoGetDeviceObjectPointer.\n")); + status = IoGetDeviceObjectPointer( &p_ext->al_sym_name, + FILE_ALL_ACCESS, &p_ext->p_al_file_obj, &p_ext->p_al_dev ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_SHIM, + ("IoGetDeviceObjectPointer returned %08x.\n", status )); + return STATUS_SUCCESS; + } + + /* Register for removal notification of the IB Fabric root device. */ + status = IoRegisterPlugPlayNotification( + EventCategoryTargetDeviceChange, 0, p_ext->p_al_file_obj, + p_dev_obj->DriverObject, __pnp_notify_target, p_dev_obj, + &p_ext->pnp_target_entry ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoRegisterPlugPlayNotification returned %08x.\n", status)); + return status; + } + + CL_ASSERT( p_ext->state == HCA_IFC_DEREFERENCED ); + if ( p_ext->state == HCA_IFC_DEREFERENCED) { + /* Release AL's CI interface. */ + p_ext->ci_ifc.wdm.InterfaceReference( p_ext->ci_ifc.wdm.Context ); + p_ext->state = HCA_REGISTERED; + } + } + + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + + +static ci_interface_t* +__alloc_hca_ifc( + IN hca_dev_ext_t* const p_ext ) +{ + ci_interface_t *pIfc; + + HCA_ENTER( HCA_DBG_PNP ); + + pIfc = (ci_interface_t*)ExAllocatePoolWithTag( PagedPool, + sizeof(ci_interface_t), + 'pnpa' ); + if( !pIfc ) + { + HCA_PRINT( TRACE_LEVEL_ERROR,HCA_DBG_PNP, + ("Failed to allocate ci_interface_t (%d bytes).\n", + sizeof(ci_interface_t))); + return NULL; + } + + setup_ci_interface( p_ext->hca.guid, + !!mthca_is_livefish(p_ext->hca.mdev), + pIfc ); + + pIfc->p_hca_dev = p_ext->cl_ext.p_pdo; + pIfc->vend_id = (uint32_t)p_ext->hcaConfig.VendorID; + pIfc->dev_id = (uint16_t)p_ext->hcaConfig.DeviceID; + pIfc->dev_revision = (uint16_t)p_ext->hca.hw_ver; + + HCA_EXIT( HCA_DBG_PNP ); + return pIfc; +} + +static void +__hca_deregister( + IN hca_dev_ext_t *p_ext ) +{ + HCA_ENTER( HCA_DBG_PNP ); + + if ( p_ext->state == HCA_REGISTERED) { + if (p_ext->ci_ifc.deregister_ca) { + /* Notify AL that the CA is being removed. */ + p_ext->ci_ifc.deregister_ca( p_ext->hca.guid ); + p_ext->ci_ifc.deregister_ca = NULL; + /* Release AL's CI interface. */ + p_ext->ci_ifc.wdm.InterfaceDereference( p_ext->ci_ifc.wdm.Context ); + p_ext->state = HCA_STARTED; + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_PNP, + ("***** HCA deregistered \n")); + } + } + + HCA_EXIT( HCA_DBG_PNP ); +} + +static NTSTATUS +__hca_register( + IN DEVICE_OBJECT *p_dev_obj ) +{ + hca_dev_ext_t *p_ext; + NTSTATUS status; + ib_api_status_t ib_status; + ci_interface_t *p_hca_ifc; + + HCA_ENTER( HCA_DBG_PNP ); + + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; + + ASSERT( p_ext->state == HCA_STARTED ); + ASSERT( p_ext->p_al_dev ); + + /* Get the AL's lower interface. */ + status = __get_ci_interface( p_dev_obj ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR,HCA_DBG_PNP, + ("__get_ci_interface returned %08x.\n", status)); + goto exit; + } + + /* Allocate and populate our HCA interface structure. */ + p_hca_ifc = __alloc_hca_ifc( p_ext ); + if( !p_hca_ifc ) + { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP ,("__alloc_hca_ifc failed.\n")); + status = STATUS_NO_MEMORY; + goto exit; + } + + /* Notify AL that we're available... */ + ib_status = p_ext->ci_ifc.register_ca( p_hca_ifc ); + ExFreePool( p_hca_ifc ); + if( ib_status != IB_SUCCESS ) + { + p_ext->ci_ifc.wdm.InterfaceDereference( p_ext->ci_ifc.wdm.Context ); + status = STATUS_INSUFFICIENT_RESOURCES; + goto exit; + } + + p_ext->state = HCA_REGISTERED; + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_PNP, + ("***** HCA registered \n")); +exit: + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + + +static NTSTATUS +__pnp_notify_ifc( + IN void *pNotifyStruct, + IN void *context ) +{ + NTSTATUS status = STATUS_SUCCESS; + DEVICE_OBJECT *p_dev_obj; + hca_dev_ext_t *p_ext; + DEVICE_INTERFACE_CHANGE_NOTIFICATION *pNotify; + + HCA_ENTER( HCA_DBG_PNP ); + + pNotify = (DEVICE_INTERFACE_CHANGE_NOTIFICATION*)pNotifyStruct; + p_dev_obj = (DEVICE_OBJECT*)context; + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; + + if( !IsEqualGUID( &pNotify->Event, &GUID_DEVICE_INTERFACE_ARRIVAL ) ) + goto done; + + /* + * Sanity check. We should only be getting notifications of the + * CI interface exported by AL. + */ + ASSERT( + IsEqualGUID( &pNotify->InterfaceClassGuid, &GUID_IB_CI_INTERFACE ) ); + + if( p_ext->state != HCA_STARTED ) + { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP ,("Invalid state: %d\n", p_ext->state)); + goto done; + } + + /* save symbolic name of IBAL for a case of cancelled IBAL removal */ + if (!p_ext->al_sym_name.Buffer) { + p_ext->al_sym_name.Length = pNotify->SymbolicLinkName->Length; + p_ext->al_sym_name.MaximumLength = pNotify->SymbolicLinkName->MaximumLength; + p_ext->al_sym_name.Buffer = ExAllocatePoolWithTag( NonPagedPool, + p_ext->al_sym_name.MaximumLength * sizeof(wchar_t), + 'cfin' ); + if (!p_ext->al_sym_name.Buffer) + { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_PNP ,("allocation of sym IBAL name failed.\n")); + goto done; + } + RtlCopyUnicodeString( &p_ext->al_sym_name, pNotify->SymbolicLinkName ); + } + + ASSERT( !p_ext->p_al_dev ); + ASSERT( !p_ext->p_al_file_obj ); + + /* Get the AL device object. */ + HCA_PRINT( TRACE_LEVEL_INFORMATION ,HCA_DBG_PNP ,("Calling IoGetDeviceObjectPointer.\n")); + status = IoGetDeviceObjectPointer( pNotify->SymbolicLinkName, + FILE_ALL_ACCESS, &p_ext->p_al_file_obj, &p_ext->p_al_dev ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoGetDeviceObjectPointer returned %08x.\n", status )); + goto done; + } + + /* Register for removal notification of the IB Fabric root device. */ + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_PNP, + ("Registering for target notifications.\n")); + status = IoRegisterPlugPlayNotification( + EventCategoryTargetDeviceChange, 0, p_ext->p_al_file_obj, + p_dev_obj->DriverObject, __pnp_notify_target, p_dev_obj, + &p_ext->pnp_target_entry ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoRegisterPlugPlayNotification returned %08x.\n", status)); + goto err_reg_notify; + } + + status = __hca_register( p_dev_obj ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("__get_ci_interface returned %08x.\n", status)); + goto err_reg_hca; + } + goto done; + +err_reg_hca: + IoUnregisterPlugPlayNotification( p_ext->pnp_target_entry ); + p_ext->pnp_target_entry = NULL; +err_reg_notify: + ObDereferenceObject( p_ext->p_al_file_obj ); + p_ext->p_al_file_obj = NULL; + p_ext->p_al_dev = NULL; +done: + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + + +/* + * Walk the resource lists and store the information. The write-only + * flag is not set for the UAR region, so it is indistinguishable from the + * DDR region since both are prefetchable. The code here assumes that the + * resources get handed in order - HCR, UAR, DDR. + * - Configuration Space: not prefetchable, read/write + * - UAR space: prefetchable, write only. + * - DDR: prefetchable, read/write. + */ +static NTSTATUS +__SetupHcaResources( + IN DEVICE_OBJECT* const p_dev_obj, + IN CM_RESOURCE_LIST* const pHcaResList, + IN CM_RESOURCE_LIST* const pHostResList ) +{ + NTSTATUS status = STATUS_SUCCESS; + hca_dev_ext_t *p_ext; + USHORT i; + hca_bar_type_t type = HCA_BAR_TYPE_HCR; + + CM_PARTIAL_RESOURCE_DESCRIPTOR *pHcaRes, *pHostRes; + + HCA_ENTER( HCA_DBG_PNP ); + + // there will be no resources for "livefish" (PCI memory controller mode) + if (!pHcaResList || !pHostResList) + goto done; + + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; + + // store the bus number for reset of Tavor + p_ext->bus_number = pHostResList->List[0].BusNumber; + + for( i = 0; i < pHostResList->List[0].PartialResourceList.Count; i++ ) + { + pHcaRes = + &pHcaResList->List[0].PartialResourceList.PartialDescriptors[i]; + pHostRes = + &pHostResList->List[0].PartialResourceList.PartialDescriptors[i]; + + + /* + * Save the interrupt information so that we can power the device + * up and down. Since the device will lose state when powered down + * we have to fully disable it. Note that we can leave memory mapped + * resources in place when powered down as the resource assignments + * won't change. However, we must disconnect our interrupt, and + * reconnect it when powering up. + */ + if( pHcaRes->Type == CmResourceTypeInterrupt ) + { + p_ext->interruptInfo = *pHostRes; + continue; + } + + if( pHcaRes->Type != CmResourceTypeMemory ) + continue; + + /* + * Sanity check that our assumption on how resources + * are reported hold. + */ + if( type == HCA_BAR_TYPE_HCR && + (pHcaRes->Flags & CM_RESOURCE_MEMORY_PREFETCHABLE) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("First memory resource is prefetchable - expected HCR.\n")); + status = STATUS_UNSUCCESSFUL; + break; + } + + p_ext->bar[type].phys = pHcaRes->u.Memory.Start.QuadPart; + p_ext->bar[type].size = pHcaRes->u.Memory.Length; +#ifdef MAP_ALL_HCA_MEMORY + /*leo: no need to map all the resources */ + p_ext->bar[type].virt = MmMapIoSpace( pHostRes->u.Memory.Start, + pHostRes->u.Memory.Length, MmNonCached ); + if( !p_ext->bar[type].virt ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Failed to map memory resource type %d\n", type)); + status = STATUS_UNSUCCESSFUL; + break; + } +#else + p_ext->bar[type].virt = NULL; +#endif + + type++; + } + + if( type == HCA_BAR_TYPE_DDR) + { + p_ext->hca_hidden = 1; + } + else + if( type != HCA_BAR_TYPE_MAX ) + { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("Failed to map all memory resources.\n")); + status = STATUS_UNSUCCESSFUL; + } + + if( p_ext->interruptInfo.Type != CmResourceTypeInterrupt ) + { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("No interrupt resource.\n")); + status = STATUS_UNSUCCESSFUL; + } + +done: + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + + +static void +__UnmapHcaMemoryResources( + IN DEVICE_OBJECT* const p_dev_obj ) +{ + hca_dev_ext_t *p_ext; + USHORT i; + + HCA_ENTER( HCA_DBG_PNP ); + + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; + + for( i = 0; i < HCA_BAR_TYPE_MAX; i++ ) + { + if( p_ext->bar[i].virt ) + { + MmUnmapIoSpace( p_ext->bar[i].virt, p_ext->bar[i].size ); + cl_memclr( &p_ext->bar[i], sizeof(hca_bar_t) ); + } + } + + HCA_EXIT( HCA_DBG_PNP ); +} + + +static int mthca_get_livefish_info(struct mthca_dev *mdev, __be64 *node_guid, u32 *hw_id) +{ + *node_guid = cl_hton64((uint64_t)(ULONG_PTR)mdev); + mdev->ib_dev.node_guid = *node_guid; + *hw_id = 0; + return 0; +} + +static NTSTATUS +hca_start( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) +{ + NTSTATUS status; + hca_dev_ext_t *p_ext; + IO_STACK_LOCATION *pIoStack; + POWER_STATE powerState; + DEVICE_DESCRIPTION devDesc; + + HCA_ENTER( HCA_DBG_PNP ); + + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; + + /* Handled on the way up. */ + status = cl_do_sync_pnp( p_dev_obj, p_irp, p_action ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Lower drivers failed IRP_MN_START_DEVICE (%#x).\n", status)); + return status; + } + + pIoStack = IoGetCurrentIrpStackLocation( p_irp ); + + /* + * Walk the resource lists and store the information. The write-only + * flag is not set for the UAR region, so it is indistinguishable from the + * DDR region since both are prefetchable. The code here assumes that the + * resources get handed in order - HCR, UAR, DDR. + * - Configuration Space: not prefetchable, read/write + * - UAR space: prefetchable, write only. + * - DDR: prefetchable, read/write. + */ + status = __SetupHcaResources( p_dev_obj, + pIoStack->Parameters.StartDevice.AllocatedResources, + pIoStack->Parameters.StartDevice.AllocatedResourcesTranslated ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("__ProcessResources returned %08X.\n", status)); + return status; + } + + /* save PCI bus i/f, PCI configuration info and enable device */ + hca_enable_pci( p_dev_obj, &p_ext->hcaBusIfc, &p_ext->hcaConfig ); + + /* + * Get the DMA adapter representing the HCA so we can + * allocate common buffers. + */ + RtlZeroMemory( &devDesc, sizeof(devDesc) ); + devDesc.Version = DEVICE_DESCRIPTION_VERSION2; + devDesc.Master = TRUE; + devDesc.ScatterGather = TRUE; + devDesc.Dma32BitAddresses = TRUE; + devDesc.Dma64BitAddresses = TRUE; + devDesc.InterfaceType = PCIBus; + + // get the adapter object + // 0x80000000 is a threshold, that's why - 1 + devDesc.MaximumLength = 0x80000000 - 1; + p_ext->p_dma_adapter = IoGetDmaAdapter( + p_ext->cl_ext.p_pdo, &devDesc, &p_ext->n_map_regs ); + if( !p_ext->p_dma_adapter ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("Failed to get DMA_ADAPTER for HCA.\n")); + return STATUS_INSUFFICIENT_RESOURCES; + } + + /* Initialize the HCA now. */ + status = mthca_init_one( p_ext ); + if( !NT_SUCCESS( status ) ) + { + //TODO: no cleanup on error + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("mthca_init_one returned %08X\n", status)); + return status; + } + + /*leo: get node GUID */ + { + int err; + if (mthca_is_livefish(p_ext->hca.mdev)) + err = mthca_get_livefish_info( p_ext->hca.mdev, &p_ext->hca.guid, &p_ext->hca.hw_ver ); + else + err = mthca_get_dev_info( p_ext->hca.mdev, &p_ext->hca.guid, &p_ext->hca.hw_ver ); + + if (err) { + + //TODO: no cleanup on error + HCA_PRINT( TRACE_LEVEL_ERROR,HCA_DBG_PNP, + ("can't get guid - mthca_query_port()")); + return STATUS_INSUFFICIENT_RESOURCES; + } + } + + /* queue HCA */ + mlnx_hca_insert( &p_ext->hca ); + + /* + * Change the state since the PnP callback can happen + * before the callback returns. + */ + p_ext->state = HCA_STARTED; + + /* Register for interface arrival of the IB_AL device. */ + status = IoRegisterPlugPlayNotification( + EventCategoryDeviceInterfaceChange, + PNPNOTIFY_DEVICE_INTERFACE_INCLUDE_EXISTING_INTERFACES, + (void*)&GUID_IB_CI_INTERFACE, p_dev_obj->DriverObject, + __pnp_notify_ifc, p_dev_obj, &p_ext->pnp_ifc_entry ); + if( !NT_SUCCESS( status ) ) + { + p_ext->state = HCA_ADDED; + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("IoRegisterPlugPlayNotification returned %08x.\n", status)); + } + + /* We get started fully powered. */ + p_ext->DevicePowerState = PowerDeviceD0; + powerState.DeviceState = PowerDeviceD0; + powerState = PoSetPowerState ( p_ext->cl_ext.p_self_do, DevicePowerState, powerState ); + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_PNP, + ("PoSetPowerState: old state %d, new state to %d\n", + powerState.DeviceState, p_ext->DevicePowerState )); + + + { + struct mthca_dev *mdev = p_ext->hca.mdev; + HCA_PRINT_EV(TRACE_LEVEL_INFORMATION ,HCA_DBG_LOW , + ("Ven %x Dev %d Hw %x Fw %d.%d.%d Drv %s (%s)", + (unsigned)p_ext->hcaConfig.VendorID, (unsigned)p_ext->hcaConfig.DeviceID, + p_ext->hca.hw_ver, (int) (mdev->fw_ver >> 32), + (int) (mdev->fw_ver >> 16) & 0xffff, (int) (mdev->fw_ver & 0xffff), + DRV_VERSION, DRV_RELDATE + )); + HCA_PRINT_EV(TRACE_LEVEL_INFORMATION ,HCA_DBG_LOW , + ("Flags %s%s%s%s%s%s%s\n", + (mdev->mthca_flags & MTHCA_FLAG_LIVEFISH) ? "Flash Recovery Mode:" : "", + (mdev->mthca_flags & MTHCA_FLAG_MEMFREE) ? "MemFree:" : "", + (mdev->mthca_flags & MTHCA_FLAG_NO_LAM) ? "NoLam:" : "", + (mdev->mthca_flags & MTHCA_FLAG_FMR) ? "Fmr:" : "", + (mdev->mthca_flags & MTHCA_FLAG_SRQ) ? "Srq:" : "", + (mdev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN) ? "HideDdr:" : "", + (mdev->mthca_flags & MTHCA_FLAG_PCIE) ? "PciEx:" : "" + )); + } + + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + + +/* release the resources, allocated in hca_start */ +static void +__hca_release_resources( + IN DEVICE_OBJECT* const p_dev_obj ) +{ + hca_dev_ext_t *p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; + + HCA_ENTER( HCA_DBG_PNP ); + + switch( p_ext->state ) + { + case HCA_REGISTERED: + __hca_deregister( p_ext ); + + /* Fall through. */ + case HCA_STARTED: + /* dequeue HCA */ + mlnx_hca_remove( &p_ext->hca ); + } + + if (p_ext->al_sym_name.Buffer) { + ExFreePool( p_ext->al_sym_name.Buffer ); + p_ext->al_sym_name.Buffer = NULL; + } + + if( p_ext->pnp_target_entry ) + { + ASSERT( p_ext->pnp_ifc_entry ); + IoUnregisterPlugPlayNotification( p_ext->pnp_target_entry ); + p_ext->pnp_target_entry = NULL; + } + + if( p_ext->pnp_ifc_entry ) { + IoUnregisterPlugPlayNotification( p_ext->pnp_ifc_entry ); + p_ext->pnp_ifc_entry = NULL; + } + + if( p_ext->p_al_file_obj ) { + ObDereferenceObject( p_ext->p_al_file_obj ); + p_ext->p_al_file_obj = NULL; + } + + mthca_remove_one( p_ext ); + + if( p_ext->p_dma_adapter ) { + p_ext->p_dma_adapter->DmaOperations->PutDmaAdapter( p_ext->p_dma_adapter ); + p_ext->p_dma_adapter = NULL; + } + + hca_disable_pci( &p_ext->hcaBusIfc ); + + //cl_event_destroy( &p_ext->mutex ); + __UnmapHcaMemoryResources( p_dev_obj ); + + p_ext->state = HCA_ADDED; + + HCA_EXIT( HCA_DBG_PNP ); +} + + +static void +hca_release_resources( + IN DEVICE_OBJECT* const p_dev_obj ) +{ + hca_dev_ext_t *p_ext; + POWER_STATE powerState; + + HCA_ENTER( HCA_DBG_PNP ); + + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; + + /* release all the resources, allocated in hca_start */ + __hca_release_resources(p_dev_obj); + + /* Notify the power manager that the device is powered down. */ + p_ext->DevicePowerState = PowerDeviceD3; + powerState.DeviceState = PowerDeviceD3; + powerState = PoSetPowerState ( p_ext->cl_ext.p_self_do, DevicePowerState, powerState ); + + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_PNP, + ("PoSetPowerState: old state %d, new state to %d\n", + powerState.DeviceState, p_ext->DevicePowerState )); + + + /* Clear the PnP state in case we get restarted. */ + p_ext->pnpState = 0; + + HCA_EXIT( HCA_DBG_PNP ); +} + + +static NTSTATUS +hca_query_removal_relations( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) +{ + NTSTATUS status; + hca_dev_ext_t *p_ext; + + HCA_ENTER( HCA_DBG_PNP ); + + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; + + if( p_ext->state == HCA_REGISTERED ) + { + status = p_ext->ci_ifc.get_relations( p_ext->hca.guid, p_irp ); + if( !NT_SUCCESS( status ) ) + { + *p_action = IrpComplete; + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("AL get_relations returned %08x.\n", status)); + return status; + } + } + + *p_action = IrpPassDown; + HCA_EXIT( HCA_DBG_PNP ); + return STATUS_SUCCESS; +} + + +static NTSTATUS +hca_query_bus_relations( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) +{ + NTSTATUS status; + DEVICE_RELATIONS *p_rel; + hca_dev_ext_t *p_ext; + + HCA_ENTER( HCA_DBG_PNP ); + + p_ext = p_dev_obj->DeviceExtension; + + //cl_event_wait_on( &p_ext->mutex, EVENT_NO_TIMEOUT, FALSE ); + if( p_ext->state == HCA_REGISTERED ) + { + status = p_ext->ci_ifc.get_relations( p_ext->hca.guid, p_irp ); + if( !NT_SUCCESS( status ) ) + { + //cl_event_signal( &p_ext->mutex ); + *p_action = IrpComplete; + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("AL get_relations returned %08x.\n", status)); + return status; + } + } + else + { + status = cl_alloc_relations( p_irp, 1 ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("cl_alloc_relations returned %08x.\n", status)); + return status; + } + + p_rel = (DEVICE_RELATIONS*)p_irp->IoStatus.Information; + p_rel->Count = 0; + p_rel->Objects[0] = NULL; + } + + //cl_event_signal( &p_ext->mutex ); + + *p_action = IrpPassDown; + HCA_EXIT( HCA_DBG_PNP ); + return STATUS_SUCCESS; +} + + +static NTSTATUS +hca_query_stop( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) +{ + /* All kernel clients will get notified through the device hierarchy. */ + + /* TODO: set a flag to fail creation of any new IB resources. */ + return cl_irp_skip( p_dev_obj, p_irp, p_action ); +} + + +static NTSTATUS +hca_stop( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) +{ + /* + * Must disable everything. Complib framework will + * call ReleaseResources handler. + */ + return cl_irp_skip( p_dev_obj, p_irp, p_action ); +} + + +static NTSTATUS +hca_cancel_stop( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) +{ + /* Handled on the way up. */ + return cl_do_sync_pnp( p_dev_obj, p_irp, p_action ); +} + + +static NTSTATUS +hca_query_remove( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) +{ + hca_dev_ext_t*p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; + if (atomic_read(&p_ext->usecnt)) { + DbgPrint( "MTHCA: Can't get unloaded. %d applications are still in work\n", p_ext->usecnt); + p_irp->IoStatus.Status = STATUS_UNSUCCESSFUL; + return cl_irp_complete( p_dev_obj, p_irp, p_action ); + } + /* TODO: set a flag to fail creation of any new IB resources. */ + return cl_irp_skip( p_dev_obj, p_irp, p_action ); +} + + +static NTSTATUS +hca_cancel_remove( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) +{ + /* Handled on the way up. */ + return cl_do_sync_pnp( p_dev_obj, p_irp, p_action ); +} + + +static NTSTATUS +hca_surprise_remove( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) +{ + /* + * TODO: Set state so that all further requests + * automatically succeed/fail as needed. + */ + return cl_irp_skip( p_dev_obj, p_irp, p_action ); +} + + +static NTSTATUS +hca_query_capabilities( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) +{ + NTSTATUS status; + hca_dev_ext_t *p_ext; + IO_STACK_LOCATION *pIoStack; + DEVICE_CAPABILITIES *pCaps; + + HCA_ENTER( HCA_DBG_PNP ); + + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; + + /* Process on the way up. */ + status = cl_do_sync_pnp( p_dev_obj, p_irp, p_action ); + if( !NT_SUCCESS( status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("cl_do_sync_pnp returned %08X.\n", status)); + return status; + } + + pIoStack = IoGetCurrentIrpStackLocation( p_irp ); + pCaps = pIoStack->Parameters.DeviceCapabilities.Capabilities; + + /* + * Store the device power mapping into our extension since we're + * the power policy owner. The mapping is used when handling + * IRP_MN_SET_POWER IRPs. + */ + cl_memcpy( + p_ext->DevicePower, pCaps->DeviceState, sizeof(p_ext->DevicePower) ); + + if( pCaps->DeviceD1 ) + { + HCA_PRINT( TRACE_LEVEL_WARNING ,HCA_DBG_PNP, + ("WARNING: Device reports support for DeviceD1 power state.\n")); + pCaps->DeviceD1 = FALSE; + } + + if( pCaps->DeviceD2 ) + { + HCA_PRINT( TRACE_LEVEL_WARNING,HCA_DBG_PNP, + ("WARNING: Device reports support for DeviceD2 power state.\n")); + pCaps->DeviceD2 = FALSE; + } + + if( pCaps->SystemWake != PowerSystemUnspecified ) + { + HCA_PRINT( TRACE_LEVEL_WARNING ,HCA_DBG_PNP, + ("WARNING: Device reports support for system wake.\n")); + pCaps->SystemWake = PowerSystemUnspecified; + } + + if( pCaps->DeviceWake != PowerDeviceUnspecified ) + { + HCA_PRINT( TRACE_LEVEL_WARNING, HCA_DBG_PNP, + ("WARNING: Device reports support for device wake.\n")); + pCaps->DeviceWake = PowerDeviceUnspecified; + } + + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + + +static NTSTATUS +hca_query_pnp_state( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) +{ + hca_dev_ext_t *p_ext; + + HCA_ENTER( HCA_DBG_PNP ); + + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; + + p_irp->IoStatus.Information |= p_ext->pnpState; + + *p_action = IrpSkip; + + HCA_EXIT( HCA_DBG_PNP ); + return STATUS_SUCCESS;; +} + +static NTSTATUS +hca_query_power( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) +{ + NTSTATUS status = STATUS_SUCCESS; + IO_STACK_LOCATION *pIoStack; + + HCA_ENTER(HCA_DBG_PO); + + UNUSED_PARAM( p_dev_obj ); + + pIoStack = IoGetCurrentIrpStackLocation( p_irp ); + + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_PO, + ("QUERY_POWER for FDO %p: type %s, state %d, action %d, IRQL %d, IRP %p\n", + p_dev_obj, + (pIoStack->Parameters.Power.Type) ? "DevicePowerState" : "SystemPowerState", + pIoStack->Parameters.Power.State.DeviceState, + pIoStack->Parameters.Power.ShutdownType, KeGetCurrentIrql(), p_irp )); + + switch( pIoStack->Parameters.Power.Type ) + { + case SystemPowerState: + /* Fail any requests to hibernate or sleep the system. */ + switch( pIoStack->Parameters.Power.State.SystemState ) + { + case PowerSystemSleeping1: // STANDBY support + case PowerSystemHibernate: + { + hca_dev_ext_t*p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; + if (atomic_read(&p_ext->usecnt)) + status = STATUS_UNSUCCESSFUL; + break; + } + + case PowerSystemWorking: + case PowerSystemShutdown: + break; + + default: + status = STATUS_NOT_SUPPORTED; + } + break; + + case DevicePowerState: + /* Fail any query for low power states. */ + switch( pIoStack->Parameters.Power.State.DeviceState ) + { + case PowerDeviceD0: + case PowerDeviceD3: + /* We only support fully powered or off power states. */ + break; + + default: + status = STATUS_NOT_SUPPORTED; + } + break; + } + + if( status == STATUS_SUCCESS ) + *p_action = IrpSkip; + else + *p_action = IrpComplete; + + HCA_EXIT( HCA_DBG_PO ); + return status; +} + + +static void +__RequestPowerCompletion( + IN DEVICE_OBJECT *p_dev_obj, + IN UCHAR minorFunction, + IN POWER_STATE powerState, + IN void *context, + IN IO_STATUS_BLOCK *pIoStatus ) +{ + IRP *p_irp; + cl_pnp_po_ext_t *p_ext; + + HCA_ENTER( HCA_DBG_PO ); + + UNUSED_PARAM( minorFunction ); + UNUSED_PARAM( powerState ); + + p_irp = (IRP*)context; + p_ext = (cl_pnp_po_ext_t*)p_dev_obj->DeviceExtension; + + /* Propagate the device IRP status to the system IRP status. */ + p_irp->IoStatus.Status = pIoStatus->Status; + + /* Continue Power IRP processing. */ + PoStartNextPowerIrp( p_irp ); + IoCompleteRequest( p_irp, IO_NO_INCREMENT ); + IoReleaseRemoveLock( &p_ext->remove_lock, p_irp ); + HCA_EXIT( HCA_DBG_PO ); +} + + +/*NOTE: Completion routines must NEVER be pageable. */ +static NTSTATUS +__SystemPowerCompletion( + IN DEVICE_OBJECT *p_dev_obj, + IN IRP *p_irp, + IN void *context ) +{ + NTSTATUS status; + POWER_STATE state; + hca_dev_ext_t *p_ext; + IO_STACK_LOCATION *pIoStack; + + HCA_ENTER( HCA_DBG_PO ); + + UNUSED_PARAM( context ); + + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; + pIoStack = IoGetCurrentIrpStackLocation( p_irp ); + + if( !NT_SUCCESS( p_irp->IoStatus.Status ) ) + { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PO, + ("IRP_MN_SET_POWER for system failed by lower driver with %08x.\n", + p_irp->IoStatus.Status)); + status = STATUS_SUCCESS; + PoStartNextPowerIrp( p_irp ); + goto release; + } + + state.DeviceState = + p_ext->DevicePower[pIoStack->Parameters.Power.State.SystemState]; + + /* + * Send a device power IRP to our devnode. Using our device object will + * only work on win2k and other NT based systems. + */ + status = PoRequestPowerIrp( p_dev_obj, IRP_MN_SET_POWER, state, + __RequestPowerCompletion, p_irp, NULL ); + + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_PO, + ("PoRequestPowerIrp: SET_POWER 'PowerDeviceD%d', status %#x\n", + state.DeviceState - 1, status )); + + if( status != STATUS_PENDING ) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PO, + ("PoRequestPowerIrp returned %08x.\n", status)); + p_irp->IoStatus.Status = status; /* Propagate the failure. */ + PoStartNextPowerIrp( p_irp ); + IoCompleteRequest( p_irp, IO_NO_INCREMENT ); + goto release; + } + + status = STATUS_MORE_PROCESSING_REQUIRED; + goto exit; + +release: + IoReleaseRemoveLock( &p_ext->cl_ext.remove_lock, p_irp ); +exit: + HCA_EXIT( HCA_DBG_PO ); + return status; +} + + +/* Work item callback to handle DevicePowerD0 IRPs at passive level. */ +static void +__DevicePowerUpCompletionWorkItem( + IN DEVICE_OBJECT* p_dev_obj, + IN void* context ) +{ + NTSTATUS status; + IO_STACK_LOCATION *pIoStack; + hca_dev_ext_t *p_ext; + IRP *p_irp; + POWER_STATE powerState; + + HCA_ENTER( HCA_DBG_PO ); + + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; + p_irp = (IRP*)context; + pIoStack = IoGetCurrentIrpStackLocation( p_irp ); + + IoFreeWorkItem( p_ext->pPoWorkItem ); + p_ext->pPoWorkItem = NULL; + + /* restart the HCA */ + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_PO, + ("***** Restart the HCA, IRQL %d\n", KeGetCurrentIrql())); + + status = mthca_init_one( p_ext ); + if( !NT_SUCCESS( status ) ) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PO, + ("!!! mthca_init_one failed (%#x) \n", status)); + goto err_mthca_init; + } + + if( p_ext->p_al_dev ) { + status = __hca_register( p_dev_obj ); + if( !NT_SUCCESS( status ) ) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PO, + ("!!! __hca_register failed (%#x) \n", status)); + goto err_hca_reg; + } + } + + p_ext->DevicePowerState = pIoStack->Parameters.Power.State.DeviceState; + powerState = PoSetPowerState( p_dev_obj, DevicePowerState, + pIoStack->Parameters.Power.State ); + + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_PO, + ("PoSetPowerState: old state %d, new state to %d\n", + powerState.DeviceState, p_ext->DevicePowerState )); + + goto exit; + +err_hca_reg: +err_mthca_init: + /* Flag device as having failed. */ + p_ext->pnpState |= PNP_DEVICE_FAILED; + IoInvalidateDeviceState( p_ext->cl_ext.p_pdo ); +exit: + PoStartNextPowerIrp( p_irp ); + IoCompleteRequest( p_irp, IO_NO_INCREMENT ); + IoReleaseRemoveLock( &p_ext->cl_ext.remove_lock, p_irp ); + HCA_EXIT( HCA_DBG_PO ); +} + +/*NOTE: Completion routines must NEVER be pageable. */ +static NTSTATUS +__DevicePowerUpCompletion( + IN DEVICE_OBJECT *p_dev_obj, + IN IRP *p_irp, + IN void *context ) +{ + NTSTATUS status = STATUS_SUCCESS; + hca_dev_ext_t *p_ext; + IO_STACK_LOCATION *pIoStack; + + HCA_ENTER( HCA_DBG_PO ); + + UNUSED_PARAM( context ); + + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; + pIoStack = IoGetCurrentIrpStackLocation( p_irp ); + + if( !NT_SUCCESS( p_irp->IoStatus.Status ) ) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PO, + ("IRP_MN_SET_POWER for device failed by lower driver with %08x.\n", + p_irp->IoStatus.Status)); + status = STATUS_SUCCESS; + PoStartNextPowerIrp( p_irp ); + goto release; + } + + /* Process in a work item - mthca_start blocks. */ + ASSERT( !p_ext->pPoWorkItem ); + p_ext->pPoWorkItem = IoAllocateWorkItem( p_dev_obj ); + if( !p_ext->pPoWorkItem ) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PO, + ("Failed to allocate work item.\n" )); + status = STATUS_SUCCESS; + p_ext->pnpState |= PNP_DEVICE_FAILED; + IoInvalidateDeviceState( p_ext->cl_ext.p_pdo ); + PoStartNextPowerIrp( p_irp ); + goto release; + } + + /* Process in work item callback. */ + IoMarkIrpPending( p_irp ); + IoQueueWorkItem( p_ext->pPoWorkItem, + __DevicePowerUpCompletionWorkItem, DelayedWorkQueue, p_irp ); + status = STATUS_MORE_PROCESSING_REQUIRED; + goto exit; + +release: + IoReleaseRemoveLock( &p_ext->cl_ext.remove_lock, p_irp ); +exit: + HCA_EXIT( HCA_DBG_PO ); + return status; +} + +static NTSTATUS __DevicePowerDownWorkItemCompletion( + IN DEVICE_OBJECT *p_dev_obj, + IN IRP *p_irp, + IN void *context ) +{ + hca_dev_ext_t *p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; + UNUSED_PARAM( context ); + + HCA_ENTER( HCA_DBG_PO ); + + PoStartNextPowerIrp( p_irp ); + IoReleaseRemoveLock( &p_ext->cl_ext.remove_lock, p_irp ); + + HCA_EXIT( HCA_DBG_PO ); + return STATUS_SUCCESS; +} + +/* Work item callback to handle DevicePowerD3 IRPs at passive level. */ +static void +__DevicePowerDownWorkItem( + IN DEVICE_OBJECT* p_dev_obj, + IN void* context ) +{ + IO_STACK_LOCATION *pIoStack; + hca_dev_ext_t *p_ext; + IRP *p_irp; + POWER_STATE powerState; + + HCA_ENTER( HCA_DBG_PO ); + + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; + p_irp = (IRP*)context; + pIoStack = IoGetCurrentIrpStackLocation( p_irp ); + + IoFreeWorkItem( p_ext->pPoWorkItem ); + p_ext->pPoWorkItem = NULL; + + p_ext->DevicePowerState = pIoStack->Parameters.Power.State.DeviceState; + powerState = PoSetPowerState( p_dev_obj, DevicePowerState, + pIoStack->Parameters.Power.State ); + + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_PO, + ("PoSetPowerState: old state %d, new state to %d, IRQL %d\n", + powerState.DeviceState, p_ext->DevicePowerState, KeGetCurrentIrql() )); + + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_PO, + ("***** Remove the HCA \n")); + + { + __hca_deregister( p_ext ); + mthca_remove_one( p_ext ); + } + + IoCopyCurrentIrpStackLocationToNext( p_irp ); +#pragma warning( push, 3 ) + IoSetCompletionRoutine( p_irp, __DevicePowerDownWorkItemCompletion, + NULL, TRUE, TRUE, TRUE ); +#pragma warning( pop ) + PoCallDriver( p_ext->cl_ext.p_next_do, p_irp ); + + HCA_EXIT( HCA_DBG_PO ); +} + + +static NTSTATUS +hca_set_power( + IN DEVICE_OBJECT* const p_dev_obj, + IN IRP* const p_irp, + OUT cl_irp_action_t* const p_action ) +{ + NTSTATUS status; + IO_STACK_LOCATION *pIoStack; + hca_dev_ext_t *p_ext; + + HCA_ENTER( HCA_DBG_PO ); + + p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; + pIoStack = IoGetCurrentIrpStackLocation( p_irp ); + + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_PO, + ("SET_POWER for FDO %p (ext %p): type %s, state %d, action %d, IRQL %d \n", + p_dev_obj, p_ext, + (pIoStack->Parameters.Power.Type) ? "DevicePowerState" : "SystemPowerState", + pIoStack->Parameters.Power.State.DeviceState, + pIoStack->Parameters.Power.ShutdownType, KeGetCurrentIrql() )); + + switch( pIoStack->Parameters.Power.Type ) + { + case SystemPowerState: + p_ext->SystemPowerState = pIoStack->Parameters.Power.State.SystemState; + + /* + * Process on the way up the stack. We cannot block since the + * power dispatch function can be called at elevated IRQL if the + * device is in a paging/hibernation/crash dump path. + */ + IoMarkIrpPending( p_irp ); + IoCopyCurrentIrpStackLocationToNext( p_irp ); +#pragma warning( push, 3 ) + IoSetCompletionRoutine( p_irp, __SystemPowerCompletion, NULL, + TRUE, TRUE, TRUE ); +#pragma warning( pop ) + PoCallDriver( p_ext->cl_ext.p_next_do, p_irp ); + + *p_action = IrpDoNothing; + status = STATUS_PENDING; + break; + + case DevicePowerState: + IoMarkIrpPending( p_irp ); + if( pIoStack->Parameters.Power.State.DeviceState == PowerDeviceD0 && + p_ext->SystemPowerState == PowerSystemWorking) + { /* power up */ + /* If we're already powered up, just pass down. */ + if( p_ext->DevicePowerState == PowerDeviceD0 ) + { + status = STATUS_SUCCESS; + *p_action = IrpIgnore; + break; + } + + /* Process in I/O completion callback. */ + IoCopyCurrentIrpStackLocationToNext( p_irp ); +#pragma warning( push, 3 ) + IoSetCompletionRoutine( p_irp, __DevicePowerUpCompletion, NULL, + TRUE, TRUE, TRUE ); +#pragma warning( pop ) + PoCallDriver( p_ext->cl_ext.p_next_do, p_irp ); + } + else + { /* power down */ + + /* Process in a work item - deregister_ca and HcaDeinit block. */ + ASSERT( !p_ext->pPoWorkItem ); + p_ext->pPoWorkItem = IoAllocateWorkItem( p_dev_obj ); + if( !p_ext->pPoWorkItem ) + { + status = STATUS_INSUFFICIENT_RESOURCES; + break; + } + + /* Process in work item callback. */ + IoQueueWorkItem( + p_ext->pPoWorkItem, __DevicePowerDownWorkItem, DelayedWorkQueue, p_irp ); + } + *p_action = IrpDoNothing; + status = STATUS_PENDING; + break; + + default: + /* Pass down and let the PDO driver handle it. */ + *p_action = IrpIgnore; + status = STATUS_SUCCESS; + break; + } + + if( !NT_SUCCESS( status ) ) + *p_action = IrpComplete; + + HCA_EXIT( HCA_DBG_PNP ); + return status; +} + +static void +__reregister_hca_cb( + IN DEVICE_OBJECT* p_dev_obj, + IN void* context ) +{ +#define SLEEP_TIME 100000 // 100 msec +#define POLL_TRIES 20 // to wait for 2 sec + int i; + NTSTATUS status; + LARGE_INTEGER interval; + hca_dev_ext_t *p_ext = (hca_dev_ext_t*)p_dev_obj->DeviceExtension; + PIO_WORKITEM pPoWorkItem = (PIO_WORKITEM)context; + + HCA_ENTER( HCA_DBG_PO ); + + IoFreeWorkItem( pPoWorkItem ); + + /* wait SLEEP_TIME_USEC usec for application to exit */ + interval.QuadPart = (-10) * SLEEP_TIME; + KeDelayExecutionThread( KernelMode, FALSE, &interval ); + for (i=0; p_ext->usecnt && i < POLL_TRIES; ++i) { + KeDelayExecutionThread( KernelMode, FALSE, &interval ); + } + + if (!p_ext->usecnt) { + /* reregister HCA */ + __hca_deregister( p_ext ); + + if( p_ext->p_al_dev ) { + status = __hca_register( p_dev_obj ); + if( !NT_SUCCESS( status ) ) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_PNP, + ("__hca_register returned 0x%08X.\n", status)); + } + } + } + + HCA_EXIT( HCA_DBG_PO ); +} + + +void reregister_hca( hca_dev_ext_t *p_ext ) +{ + DEVICE_OBJECT *p_dev_obj = (DEVICE_OBJECT *)p_ext->cl_ext.p_self_do; + PIO_WORKITEM pPoWorkItem; + + /* Process in a work item - deregister_ca and HcaDeinit block. */ + pPoWorkItem = IoAllocateWorkItem( p_dev_obj ); + if( pPoWorkItem ) + IoQueueWorkItem( pPoWorkItem, __reregister_hca_cb, + DelayedWorkQueue, pPoWorkItem ); + +} + diff --git a/branches/IBFD/hw/mthca/kernel/hca_pnp.h b/branches/IBFD/hw/mthca/kernel/hca_pnp.h new file mode 100644 index 00000000..bc74c8e1 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/hca_pnp.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + + +#ifndef _HCA_PNP_H_ +#define _HCA_PNP_H_ + +void hca_init_vfptr( void ); + +NTSTATUS +hca_add_device( + IN PDRIVER_OBJECT pDriverObj, + IN PDEVICE_OBJECT pPdo ); + + +#endif + + diff --git a/branches/IBFD/hw/mthca/kernel/hca_verbs.c b/branches/IBFD/hw/mthca/kernel/hca_verbs.c new file mode 100644 index 00000000..f0ddfbfd --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/hca_verbs.c @@ -0,0 +1,1697 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + + +#include "hca_driver.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "hca_verbs.tmh" +#endif +#include "mthca_dev.h" +#include "ib_cache.h" +#include "mx_abi.h" +#include "mt_pa_cash.h" + +#define PTR_ALIGN(size) (((size) + sizeof(void*) - 1) & ~(sizeof(void*) - 1)) + + +// Local declarations +ib_api_status_t +mlnx_query_qp ( + IN const ib_qp_handle_t h_qp, + OUT ib_qp_attr_t *p_qp_attr, + IN OUT ci_umv_buf_t *p_umv_buf ); + +/* +* CA Access Verbs +*/ +ib_api_status_t +mlnx_open_ca ( + IN const ib_net64_t ca_guid, // IN const char * ca_name, + IN const ci_completion_cb_t pfn_completion_cb, + IN const ci_async_event_cb_t pfn_async_event_cb, + IN const void*const ca_context, + OUT ib_ca_handle_t *ph_ca) +{ + mlnx_hca_t *p_hca; + ib_api_status_t status = IB_NOT_FOUND; + struct ib_device *ib_dev; + + HCA_ENTER(HCA_DBG_SHIM); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM, + ("context 0x%p\n", ca_context)); + + // find CA object + p_hca = mlnx_hca_from_guid( ca_guid ); + if( !p_hca ) { + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM, + ("completes with ERROR status IB_NOT_FOUND\n")); + } + HCA_EXIT(HCA_DBG_SHIM); + return IB_NOT_FOUND; + } + + ib_dev = &p_hca->mdev->ib_dev; + + if (mthca_is_livefish(p_hca->mdev)) + goto done; + + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM, + ("context 0x%p\n", ca_context)); + status = mlnx_hobs_set_cb(&p_hca->hob, + pfn_completion_cb, + pfn_async_event_cb, + ca_context); + if (IB_SUCCESS != status) { + goto err_set_cb; + } + + + //TODO: do we need something for kernel users ? + + // Return pointer to HOB object +done: + if (ph_ca) *ph_ca = &p_hca->hob; + status = IB_SUCCESS; + +//err_mad_cache: +err_set_cb: + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SHIM, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_SHIM); + return status; +} + +ib_api_status_t +mlnx_query_ca ( + IN const ib_ca_handle_t h_ca, + OUT ib_ca_attr_t *p_ca_attr, + IN OUT uint32_t *p_byte_count, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + ib_api_status_t status; + uint32_t size, required_size; + uint8_t port_num, num_ports; + uint32_t num_gids, num_pkeys; + uint32_t num_page_sizes = 1; // TBD: what is actually supported + uint8_t *last_p; + struct ib_device_attr props; + struct ib_port_attr *hca_ports = NULL; + int i; + + mlnx_hob_t *hob_p = (mlnx_hob_t *)h_ca; + struct ib_device *ib_dev = IBDEV_FROM_HOB( hob_p ); + int err; + + HCA_ENTER(HCA_DBG_SHIM); + + // sanity checks + if( p_umv_buf && p_umv_buf->command ) { + HCA_PRINT (TRACE_LEVEL_ERROR, HCA_DBG_SHIM ,("User mode is not supported yet\n")); + p_umv_buf->status = status = IB_UNSUPPORTED; + goto err_user_unsupported; + } + + if( !cl_is_blockable() ) { + status = IB_UNSUPPORTED; + goto err_unsupported; + } + + if (NULL == p_byte_count) { + status = IB_INVALID_PARAMETER; + goto err_byte_count; + } + + // query the device + err = mthca_query_device(ib_dev, &props ); + if (err) { + HCA_PRINT (TRACE_LEVEL_ERROR, HCA_DBG_SHIM, + ("ib_query_device failed (%d)\n",err)); + status = errno_to_iberr(err); + goto err_query_device; + } + + // alocate arrary for port properties + num_ports = ib_dev->phys_port_cnt; /* Number of physical ports of the HCA */ + if (NULL == (hca_ports = cl_zalloc( num_ports * sizeof *hca_ports))) { + HCA_PRINT (TRACE_LEVEL_ERROR, HCA_DBG_SHIM, ("Failed to cl_zalloc ports array\n")); + status = IB_INSUFFICIENT_MEMORY; + goto err_alloc_ports; + } + + // start calculation of ib_ca_attr_t full size + num_gids = 0; + num_pkeys = 0; + required_size = PTR_ALIGN(sizeof(ib_ca_attr_t)) + + PTR_ALIGN(sizeof(uint32_t) * num_page_sizes) + + PTR_ALIGN(sizeof(ib_port_attr_t) * num_ports)+ + PTR_ALIGN(MTHCA_BOARD_ID_LEN)+ + PTR_ALIGN(sizeof(uplink_info_t)); /* uplink info */ + + // get port properties + for (port_num = 0; port_num <= end_port(ib_dev) - start_port(ib_dev); ++port_num) { + // request + err = mthca_query_port(ib_dev, port_num + start_port(ib_dev), &hca_ports[port_num]); + if (err) { + HCA_PRINT (TRACE_LEVEL_ERROR, HCA_DBG_SHIM, ("ib_query_port failed(%d) for port %d\n",err, port_num)); + status = errno_to_iberr(err); + goto err_query_port; + } + + // calculate GID table size + num_gids = hca_ports[port_num].gid_tbl_len; + size = PTR_ALIGN(sizeof(ib_gid_t) * num_gids); + required_size += size; + + // calculate pkeys table size + num_pkeys = hca_ports[port_num].pkey_tbl_len; + size = PTR_ALIGN(sizeof(uint16_t) * num_pkeys); + required_size += size; + } + + // resource sufficience check + if (NULL == p_ca_attr || *p_byte_count < required_size) { + *p_byte_count = required_size; + status = IB_INSUFFICIENT_MEMORY; + if ( p_ca_attr != NULL) { + HCA_PRINT (TRACE_LEVEL_ERROR,HCA_DBG_SHIM, + ("Failed *p_byte_count (%d) < required_size (%d)\n", *p_byte_count, required_size )); + } + goto err_insuff_mem; + } + + // Space is sufficient - setup table pointers + last_p = (uint8_t*)p_ca_attr; + last_p += PTR_ALIGN(sizeof(*p_ca_attr)); + + p_ca_attr->p_page_size = (uint32_t*)last_p; + last_p += PTR_ALIGN(num_page_sizes * sizeof(uint32_t)); + + p_ca_attr->p_port_attr = (ib_port_attr_t *)last_p; + last_p += PTR_ALIGN(num_ports * sizeof(ib_port_attr_t)); + + for (port_num = 0; port_num < num_ports; port_num++) { + p_ca_attr->p_port_attr[port_num].p_gid_table = (ib_gid_t *)last_p; + size = PTR_ALIGN(sizeof(ib_gid_t) * hca_ports[port_num].gid_tbl_len); + last_p += size; + + p_ca_attr->p_port_attr[port_num].p_pkey_table = (uint16_t *)last_p; + size = PTR_ALIGN(sizeof(uint16_t) * hca_ports[port_num].pkey_tbl_len); + last_p += size; + } + + //copy vendor specific data + cl_memcpy(last_p,to_mdev(ib_dev)->board_id, MTHCA_BOARD_ID_LEN); + last_p += PTR_ALIGN(MTHCA_BOARD_ID_LEN); + *(uplink_info_t*)last_p = to_mdev(ib_dev)->uplink_info; + last_p += PTR_ALIGN(sizeof(uplink_info_t)); /* uplink info */ + + // Separate the loops to ensure that table pointers are always setup + for (port_num = 0; port_num < num_ports; port_num++) { + + // get pkeys, using cache + for (i=0; i < hca_ports[port_num].pkey_tbl_len; ++i) { + err = ib_get_cached_pkey( ib_dev, port_num + start_port(ib_dev), i, + &p_ca_attr->p_port_attr[port_num].p_pkey_table[i] ); + if (err) { + status = errno_to_iberr(err); + HCA_PRINT (TRACE_LEVEL_ERROR,HCA_DBG_SHIM, + ("ib_get_cached_pkey failed (%d) for port_num %d, index %d\n", + err, port_num + start_port(ib_dev), i)); + goto err_get_pkey; + } + } + + // get gids, using cache + for (i=0; i < hca_ports[port_num].gid_tbl_len; ++i) { + union ib_gid * __ptr64 gid = (union ib_gid *)&p_ca_attr->p_port_attr[port_num].p_gid_table[i]; + err = ib_get_cached_gid( ib_dev, port_num + start_port(ib_dev), i, (union ib_gid *)gid ); + //TODO: do we need to convert gids to little endian + if (err) { + status = errno_to_iberr(err); + HCA_PRINT (TRACE_LEVEL_ERROR, HCA_DBG_SHIM, + ("ib_get_cached_gid failed (%d) for port_num %d, index %d\n", + err, port_num + start_port(ib_dev), i)); + goto err_get_gid; + } + } + + HCA_PRINT(TRACE_LEVEL_VERBOSE, HCA_DBG_SHIM,("port %d gid0:\n", port_num)); + HCA_PRINT(TRACE_LEVEL_VERBOSE, HCA_DBG_SHIM, + (" 0x%x%x%x%x%x%x%x%x-0x%x%x%x%x%x%x%x%x\n", + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[0], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[1], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[2], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[3], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[4], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[5], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[6], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[7], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[8], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[9], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[10], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[11], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[12], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[13], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[14], + p_ca_attr->p_port_attr[port_num].p_gid_table[0].raw[15])); + } + + // set result size + p_ca_attr->size = required_size; + CL_ASSERT( required_size == (((uintn_t)last_p) - ((uintn_t)p_ca_attr)) ); + HCA_PRINT(TRACE_LEVEL_VERBOSE, HCA_DBG_SHIM , ("Space required %d used %d\n", + required_size, (int)((uintn_t)last_p - (uintn_t)p_ca_attr) )); + + // !!! GID/PKEY tables must be queried before this call !!! + mlnx_conv_hca_cap(ib_dev, &props, hca_ports, p_ca_attr); + + status = IB_SUCCESS; + +err_get_gid: +err_get_pkey: +err_insuff_mem: +err_query_port: + cl_free(hca_ports); +err_alloc_ports: +err_query_device: +err_byte_count: +err_unsupported: +err_user_unsupported: + if( status != IB_INSUFFICIENT_MEMORY && status != IB_SUCCESS ) + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_SHIM, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + HCA_EXIT(HCA_DBG_SHIM); + return status; +} + +ib_api_status_t +mlnx_modify_ca ( + IN const ib_ca_handle_t h_ca, + IN const uint8_t port_num, + IN const ib_ca_mod_t modca_cmd, + IN const ib_port_attr_mod_t *p_port_attr) +{ +#define SET_CAP_MOD(al_mask, al_fld, ib) \ + if (modca_cmd & al_mask) { \ + if (p_port_attr->cap.##al_fld) \ + props.set_port_cap_mask |= ib; \ + else \ + props.clr_port_cap_mask |= ib; \ + } + + ib_api_status_t status; + int err; + struct ib_port_modify props; + int port_modify_mask = 0; + mlnx_hob_t *hob_p = (mlnx_hob_t *)h_ca; + struct ib_device *ib_dev = IBDEV_FROM_HOB( hob_p ); + + HCA_ENTER(HCA_DBG_SHIM); + + //sanity check + if( !cl_is_blockable() ) { + status = IB_UNSUPPORTED; + goto err_unsupported; + } + + if (port_num < start_port(ib_dev) || port_num > end_port(ib_dev)) { + status = IB_INVALID_PORT; + goto err_port; + } + + // prepare parameters + RtlZeroMemory(&props, sizeof(props)); + SET_CAP_MOD(IB_CA_MOD_IS_SM, sm, IB_PORT_SM); + SET_CAP_MOD(IB_CA_MOD_IS_SNMP_SUPPORTED, snmp, IB_PORT_SNMP_TUNNEL_SUP); + SET_CAP_MOD(IB_CA_MOD_IS_DEV_MGMT_SUPPORTED, dev_mgmt, IB_PORT_DEVICE_MGMT_SUP); + SET_CAP_MOD(IB_CA_MOD_IS_VEND_SUPPORTED, vend, IB_PORT_VENDOR_CLASS_SUP); + if ((modca_cmd & IB_CA_MOD_QKEY_CTR) && (p_port_attr->qkey_ctr == 0)) + port_modify_mask |= IB_PORT_RESET_QKEY_CNTR; + + // modify port + err = mthca_modify_port(ib_dev, port_num, port_modify_mask, &props ); + if (err) { + status = errno_to_iberr(err); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("mthca_modify_port failed (%d) \n",err)); + goto err_modify_port; + } + + status = IB_SUCCESS; + +err_modify_port: +err_port: +err_unsupported: + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_SHIM, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_SHIM); + return status; +} + +ib_api_status_t +mlnx_close_ca ( + IN ib_ca_handle_t h_ca) +{ + mlnx_hob_t *hob_p = (mlnx_hob_t *)h_ca; + HCA_ENTER(HCA_DBG_SHIM); + + if (mthca_is_livefish(MDEV_FROM_HOB( hob_p ))) + goto done; + + mlnx_hobs_remove(h_ca); + +done: + HCA_EXIT(HCA_DBG_SHIM); + + return IB_SUCCESS; +} + + +static ib_api_status_t +mlnx_um_open( + IN const ib_ca_handle_t h_ca, + IN OUT ci_umv_buf_t* const p_umv_buf, + OUT ib_ca_handle_t* const ph_um_ca ) +{ + int err; + ib_api_status_t status; + mlnx_hob_t *hob_p = (mlnx_hob_t *)h_ca; + hca_dev_ext_t *ext_p = EXT_FROM_HOB( hob_p ); + struct ib_device *ib_dev = IBDEV_FROM_HOB( hob_p ); + struct ib_ucontext *p_context; + struct ibv_get_context_resp *uresp_p; + struct ibv_alloc_pd_resp resp; + ci_umv_buf_t umv_buf; + + HCA_ENTER(HCA_DBG_SHIM); + + // sanity check + ASSERT( p_umv_buf ); + if( !p_umv_buf->command ) + { + p_context = cl_zalloc( sizeof(struct ib_ucontext) ); + if( !p_context ) + { + status = IB_INSUFFICIENT_MEMORY; + goto err_alloc_ucontext; + } + /* Copy the dev info. */ + p_context->device = ib_dev; + p_umv_buf->output_size = 0; + goto done; + } + + // create user context in kernel + p_context = mthca_alloc_ucontext(ib_dev, p_umv_buf); + if (IS_ERR(p_context)) { + err = PTR_ERR(p_context); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM, + ("mthca_alloc_ucontext failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_alloc_ucontext; + } + + /* allocate pd */ + umv_buf.command = 1; + umv_buf.input_size = umv_buf.status = 0; + umv_buf.output_size = sizeof(struct ibv_alloc_pd_resp); + umv_buf.p_inout_buf = &resp; + //NB: Pay attention ! Ucontext parameter is important here: + // when it is present (i.e. - for user space) - mthca_alloc_pd won't create MR + p_context->pd = ibv_alloc_pd(ib_dev, p_context, &umv_buf); + if (IS_ERR(p_context->pd)) { + err = PTR_ERR(p_context->pd); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM, + ("ibv_alloc_pd failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_alloc_pd; + } + + // fill more parameters for user (sanity checks are in mthca_alloc_ucontext) + uresp_p = (struct ibv_get_context_resp *)(void*)p_umv_buf->p_inout_buf; + uresp_p->uar_addr = (uint64_t)(UINT_PTR)p_context->user_uar; + uresp_p->pd_handle = resp.pd_handle; + uresp_p->pdn = resp.pdn; + uresp_p->vend_id = (uint32_t)ext_p->hcaConfig.VendorID; + uresp_p->dev_id = (uint16_t)ext_p->hcaConfig.DeviceID; + +done: + // some more inits + p_context->va = p_context->p_mdl = NULL; + p_context->fw_if_open = FALSE; + KeInitializeMutex( &p_context->mutex, 0 ); + // chain user context to the device + cl_spinlock_acquire( &ext_p->uctx_lock ); + cl_qlist_insert_tail( &ext_p->uctx_list, &p_context->list_item ); + cl_atomic_inc(&ext_p->usecnt); + cl_spinlock_release( &ext_p->uctx_lock ); + + // return the result + if (ph_um_ca) *ph_um_ca = (ib_ca_handle_t)p_context; + + status = IB_SUCCESS; + goto end; + +err_alloc_pd: + mthca_dealloc_ucontext(p_context); +err_alloc_ucontext: +end: + if (p_umv_buf && p_umv_buf->command) + p_umv_buf->status = status; + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SHIM, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_SHIM); + return status; +} + +static void +mlnx_um_close( + IN ib_ca_handle_t h_ca, + IN ib_ca_handle_t h_um_ca ) +{ + struct ib_ucontext *p_ucontext = (struct ib_ucontext *)h_um_ca; + mlnx_hob_t *hob_p = (mlnx_hob_t *)h_ca; + hca_dev_ext_t *ext_p = EXT_FROM_HOB( hob_p ); + + if (mthca_is_livefish(to_mdev(p_ucontext->device))) + goto done; + unmap_crspace_for_all(p_ucontext); +done: + cl_spinlock_acquire( &ext_p->uctx_lock ); + cl_qlist_remove_item( &ext_p->uctx_list, &p_ucontext->list_item ); + cl_atomic_dec(&ext_p->usecnt); + cl_spinlock_release( &ext_p->uctx_lock ); + if( !p_ucontext->pd ) + cl_free( h_um_ca ); + else + ibv_um_close(p_ucontext); + pa_cash_print(); + return; +} + + +/* +* Protection Domain and Reliable Datagram Domain Verbs +*/ + +ib_api_status_t +mlnx_allocate_pd ( + IN const ib_ca_handle_t h_ca, + IN const ib_pd_type_t type, + OUT ib_pd_handle_t *ph_pd, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + ib_api_status_t status; + struct ib_device *ib_dev; + struct ib_ucontext *p_context; + struct ib_pd *ib_pd_p; + int err; + + //TODO: how are we use it ? + UNREFERENCED_PARAMETER(type); + + HCA_ENTER(HCA_DBG_PD); + + if( p_umv_buf ) { + p_context = (struct ib_ucontext *)h_ca; + ib_dev = p_context->device; + } + else { + mlnx_hob_t *hob_p = (mlnx_hob_t *)h_ca; + p_context = NULL; + ib_dev = IBDEV_FROM_HOB( hob_p ); + } + + // create PD + ib_pd_p = ibv_alloc_pd(ib_dev, p_context, p_umv_buf); + if (IS_ERR(ib_pd_p)) { + err = PTR_ERR(ib_pd_p); + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_PD, + ("ibv_alloc_pd failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_alloc_pd; + } + + // return the result + if (ph_pd) *ph_pd = (ib_pd_handle_t)ib_pd_p; + + status = IB_SUCCESS; + +err_alloc_pd: + if (p_umv_buf && p_umv_buf->command) + p_umv_buf->status = status; + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_PD, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_PD); + return status; +} + +ib_api_status_t +mlnx_deallocate_pd ( + IN ib_pd_handle_t h_pd) +{ + ib_api_status_t status; + int err; + struct ib_pd *ib_pd_p = (struct ib_pd *)h_pd; + PREP_IBDEV_FOR_PRINT(ib_pd_p->device); + + HCA_ENTER( HCA_DBG_PD); + + HCA_PRINT(TRACE_LEVEL_INFORMATION,HCA_DBG_PD, + ("pcs %p\n", PsGetCurrentProcess())); + + // dealloc pd + err = ibv_dealloc_pd( ib_pd_p ); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_PD + ,("ibv_dealloc_pd failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_dealloc_pd; + } + status = IB_SUCCESS; + +err_dealloc_pd: + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_PD + ,("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_PD); + return status; +} + +/* +* Address Vector Management Verbs +*/ +ib_api_status_t +mlnx_create_av ( + IN const ib_pd_handle_t h_pd, + IN const ib_av_attr_t *p_addr_vector, + OUT ib_av_handle_t *ph_av, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + int err = 0; + ib_api_status_t status = IB_SUCCESS; + struct ib_pd *ib_pd_p = (struct ib_pd *)h_pd; + struct ib_device *ib_dev = ib_pd_p->device; + struct ib_ah *ib_av_p; + struct ib_ah_attr ah_attr; + struct ib_ucontext *p_context = NULL; + + HCA_ENTER(HCA_DBG_AV); + + if( p_umv_buf && p_umv_buf->command ) { + // sanity checks + if (p_umv_buf->input_size < sizeof(struct ibv_create_ah) || + p_umv_buf->output_size < sizeof(struct ibv_create_ah_resp) || + !p_umv_buf->p_inout_buf) { + status = IB_INVALID_PARAMETER; + goto err_inval_params; + } + p_context = ib_pd_p->ucontext; + } + else + p_context = NULL; + + // fill parameters + RtlZeroMemory(&ah_attr, sizeof(ah_attr)); + mlnx_conv_ibal_av( ib_dev, p_addr_vector, &ah_attr ); + + ib_av_p = ibv_create_ah(ib_pd_p, &ah_attr, p_context, p_umv_buf); + if (IS_ERR(ib_av_p)) { + err = PTR_ERR(ib_av_p); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_AV, + ("ibv_create_ah failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_alloc_av; + } + + // return the result + if (ph_av) *ph_av = (ib_av_handle_t)ib_av_p; + + status = IB_SUCCESS; + +err_alloc_av: +err_inval_params: + if (p_umv_buf && p_umv_buf->command) + p_umv_buf->status = status; + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_AV, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_AV); + return status; +} + +ib_api_status_t +mlnx_query_av ( + IN const ib_av_handle_t h_av, + OUT ib_av_attr_t *p_addr_vector, + OUT ib_pd_handle_t *ph_pd, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_ah *ib_ah_p = (struct ib_ah *)h_av; + PREP_IBDEV_FOR_PRINT(ib_ah_p->device); + + HCA_ENTER(HCA_DBG_AV); + + // sanity checks + if( p_umv_buf && p_umv_buf->command ) { + HCA_PRINT (TRACE_LEVEL_ERROR, HCA_DBG_AV, + ("User mode is not supported yet\n")); + status = IB_UNSUPPORTED; + goto err_user_unsupported; + } + + // query AV +#ifdef WIN_TO_BE_CHANGED + //TODO: not implemented in low-level driver + err = ibv_query_ah(ib_ah_p, &ah_attr) + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_AV, + ("ibv_query_ah failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_query_ah; + } + // convert to IBAL structure: something like that + mlnx_conv_mthca_av( p_addr_vector, &ah_attr ); +#else + + err = mlnx_conv_mthca_av( ib_ah_p, p_addr_vector ); + if (err) { + HCA_PRINT (TRACE_LEVEL_ERROR, HCA_DBG_AV, + ("mlnx_conv_mthca_av failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_conv_mthca_av; + } +#endif + + // results + *ph_pd = (ib_pd_handle_t)ib_ah_p->pd; + +err_conv_mthca_av: +err_user_unsupported: + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_AV, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_AV); + return status; +} + +ib_api_status_t +mlnx_modify_av ( + IN const ib_av_handle_t h_av, + IN const ib_av_attr_t *p_addr_vector, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + struct ib_ah_attr ah_attr; + ib_api_status_t status = IB_SUCCESS; + struct ib_ah *ib_ah_p = (struct ib_ah *)h_av; + struct ib_device *ib_dev = ib_ah_p->pd->device; + + HCA_ENTER(HCA_DBG_AV); + + // sanity checks + if( p_umv_buf && p_umv_buf->command ) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_AV, + ("User mode is not supported yet\n")); + status = IB_UNSUPPORTED; + goto err_user_unsupported; + } + + // fill parameters + RtlZeroMemory(&ah_attr, sizeof(ah_attr)); + mlnx_conv_ibal_av( ib_dev, p_addr_vector, &ah_attr ); + + // modify AH +#ifdef WIN_TO_BE_CHANGED + //TODO: not implemented in low-level driver + err = ibv_modify_ah(ib_ah_p, &ah_attr) + if (err) { + HCA_PRINT (TRACE_LEVEL_ERROR,HCA_DBG_AV, + ("ibv_query_ah failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_query_ah; + } +#else + + mlnx_modify_ah( ib_ah_p, &ah_attr ); +#endif + +err_user_unsupported: + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_AV, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_AV); + return status; +} + +ib_api_status_t +mlnx_destroy_av ( + IN const ib_av_handle_t h_av) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_ah *ib_ah_p = (struct ib_ah *)h_av; + PREP_IBDEV_FOR_PRINT(ib_ah_p->device); + + HCA_ENTER(HCA_DBG_AV); + + // destroy AV + err = ibv_destroy_ah( ib_ah_p ); + if (err) { + HCA_PRINT (TRACE_LEVEL_ERROR ,HCA_DBG_AV, + ("ibv_destroy_ah failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_destroy_ah; + } + +err_destroy_ah: + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_AV, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_AV); + return status; +} + +/* +* Shared Queue Pair Management Verbs +*/ + + +ib_api_status_t +mlnx_create_srq ( + IN const ib_pd_handle_t h_pd, + IN const void *srq_context, + IN const ib_srq_attr_t * const p_srq_attr, + OUT ib_srq_handle_t *ph_srq, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + int err; + ib_api_status_t status; + struct ib_srq *ib_srq_p; + struct mthca_srq *srq_p; + struct ib_srq_init_attr srq_init_attr; + struct ib_ucontext *p_context = NULL; + struct ib_pd *ib_pd_p = (struct ib_pd *)h_pd; + struct ib_device *ib_dev = ib_pd_p->device; + mlnx_hob_t *hob_p = HOB_FROM_IBDEV(ib_dev); + + HCA_ENTER(HCA_DBG_SRQ); + + if( p_umv_buf && p_umv_buf->command) { + + // sanity checks + if (p_umv_buf->input_size < sizeof(struct ibv_create_srq) || + p_umv_buf->output_size < sizeof(struct ibv_create_srq_resp) || + !p_umv_buf->p_inout_buf) { + status = IB_INVALID_PARAMETER; + goto err_inval_params; + } + p_context = ib_pd_p->ucontext; + } + + // prepare the parameters + RtlZeroMemory(&srq_init_attr, sizeof(srq_init_attr)); + srq_init_attr.event_handler = srq_event_handler; + srq_init_attr.srq_context = hob_p; + srq_init_attr.attr = *p_srq_attr; + + // allocate srq + ib_srq_p = ibv_create_srq(ib_pd_p, &srq_init_attr, p_context, p_umv_buf ); + if (IS_ERR(ib_srq_p)) { + err = PTR_ERR(ib_srq_p); + HCA_PRINT (TRACE_LEVEL_ERROR ,HCA_DBG_SRQ, ("ibv_create_srq failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_create_srq; + } + + // fill the object + srq_p = (struct mthca_srq *)ib_srq_p; + srq_p->srq_context = (void*)srq_context; + + // return the result + if (ph_srq) *ph_srq = (ib_srq_handle_t)srq_p; + + status = IB_SUCCESS; + +err_create_srq: +err_inval_params: + if (p_umv_buf && p_umv_buf->command) + p_umv_buf->status = status; + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SRQ, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_SRQ); + return status; +} + + +ib_api_status_t +mlnx_modify_srq ( + IN const ib_srq_handle_t h_srq, + IN const ib_srq_attr_t* const p_srq_attr, + IN const ib_srq_attr_mask_t srq_attr_mask, + IN OUT ci_umv_buf_t *p_umv_buf OPTIONAL ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_srq *ib_srq = (struct ib_srq *)h_srq; + struct ib_device *ib_dev = ib_srq->device; + UNUSED_PARAM(p_umv_buf); + UNUSED_PARAM_WOWPP(ib_dev); + + HCA_ENTER(HCA_DBG_SRQ); + + err = ibv_modify_srq(ib_srq, (void*)p_srq_attr, srq_attr_mask); + if (err) { + HCA_PRINT (TRACE_LEVEL_ERROR, HCA_DBG_AV, + ("ibv_modify_srq failed (%d)\n", err)); + status = errno_to_iberr(err); + } + + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SRQ, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_SRQ); + return status; +} + +ib_api_status_t +mlnx_query_srq ( + IN const ib_srq_handle_t h_srq, + OUT ib_srq_attr_t* const p_srq_attr, + IN OUT ci_umv_buf_t *p_umv_buf OPTIONAL ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_srq *ib_srq = (struct ib_srq *)h_srq; + struct ib_device *ib_dev = ib_srq->device; + UNUSED_PARAM(p_umv_buf); + UNUSED_PARAM_WOWPP(ib_dev); + + HCA_ENTER(HCA_DBG_SRQ); + + err = ibv_query_srq(ib_srq, p_srq_attr); + if (err) { + HCA_PRINT (TRACE_LEVEL_ERROR, HCA_DBG_AV, + ("ibv_query_srq failed (%d)\n", err)); + status = errno_to_iberr(err); + } + + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SRQ, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_SRQ); + return status; +} + +ib_api_status_t +mlnx_destroy_srq ( + IN const ib_srq_handle_t h_srq ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct ib_srq *ib_srq = (struct ib_srq *)h_srq; + struct ib_device *ib_dev = ib_srq->device; + UNUSED_PARAM_WOWPP(ib_dev); + + HCA_ENTER(HCA_DBG_SRQ); + + err = ibv_destroy_srq(ib_srq); + if (err) { + HCA_PRINT (TRACE_LEVEL_ERROR, HCA_DBG_AV, + ("ibv_destroy_srq failed (%d)\n", err)); + status = errno_to_iberr(err); + } + + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SRQ, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_SRQ); + return status; +} + +/* +* Queue Pair Management Verbs +*/ + + +static ib_api_status_t +_create_qp ( + IN const ib_pd_handle_t h_pd, + IN const uint8_t port_num, + IN const void *qp_context, + IN const ib_qp_create_t *p_create_attr, + OUT ib_qp_attr_t *p_qp_attr, + OUT ib_qp_handle_t *ph_qp, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + int err; + ib_api_status_t status; + struct ib_qp * ib_qp_p; + struct mthca_qp *qp_p; + struct ib_qp_init_attr qp_init_attr; + struct ib_ucontext *p_context = NULL; + struct ib_pd *ib_pd_p = (struct ib_pd *)h_pd; + struct ib_device *ib_dev = ib_pd_p->device; + mlnx_hob_t *hob_p = HOB_FROM_IBDEV(ib_dev); + + HCA_ENTER(HCA_DBG_QP); + + if( p_umv_buf && p_umv_buf->command ) { + // sanity checks + if (p_umv_buf->input_size < sizeof(struct ibv_create_qp) || + p_umv_buf->output_size < sizeof(struct ibv_create_qp_resp) || + !p_umv_buf->p_inout_buf) { + status = IB_INVALID_PARAMETER; + goto err_inval_params; + } + p_context = ib_pd_p->ucontext; + } + + // prepare the parameters + RtlZeroMemory(&qp_init_attr, sizeof(qp_init_attr)); + qp_init_attr.qp_type = p_create_attr->qp_type; + qp_init_attr.event_handler = qp_event_handler; + qp_init_attr.qp_context = hob_p; + qp_init_attr.recv_cq = (struct ib_cq *)p_create_attr->h_rq_cq; + qp_init_attr.send_cq = (struct ib_cq *)p_create_attr->h_sq_cq; + qp_init_attr.srq = (struct ib_srq *)p_create_attr->h_srq; + qp_init_attr.cap.max_recv_sge = p_create_attr->rq_sge; + qp_init_attr.cap.max_send_sge = p_create_attr->sq_sge; + qp_init_attr.cap.max_recv_wr = p_create_attr->rq_depth; + qp_init_attr.cap.max_send_wr = p_create_attr->sq_depth; + qp_init_attr.sq_sig_type = (p_create_attr->sq_signaled) ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; + qp_init_attr.port_num = port_num; + + + // create qp + ib_qp_p = ibv_create_qp( ib_pd_p, &qp_init_attr, p_context, p_umv_buf ); + if (IS_ERR(ib_qp_p)) { + err = PTR_ERR(ib_qp_p); + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_QP, + ("ibv_create_qp failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_create_qp; + } + + // fill the object + qp_p = (struct mthca_qp *)ib_qp_p; + qp_p->qp_context = (void*)qp_context; + qp_p->qp_init_attr = qp_init_attr; + + // Query QP to obtain requested attributes + if (p_qp_attr) { + status = mlnx_query_qp ((ib_qp_handle_t)ib_qp_p, p_qp_attr, p_umv_buf); + if (status != IB_SUCCESS) + goto err_query_qp; + } + + // return the results + if (ph_qp) *ph_qp = (ib_qp_handle_t)ib_qp_p; + + status = IB_SUCCESS; + goto end; + +err_query_qp: + ibv_destroy_qp( ib_qp_p ); +err_create_qp: +err_inval_params: +end: + if (p_umv_buf && p_umv_buf->command) + p_umv_buf->status = status; + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_QP); + return status; +} + +ib_api_status_t +mlnx_create_spl_qp ( + IN const ib_pd_handle_t h_pd, + IN const uint8_t port_num, + IN const void *qp_context, + IN const ib_qp_create_t *p_create_attr, + OUT ib_qp_attr_t *p_qp_attr, + OUT ib_qp_handle_t *ph_qp ) +{ + ib_api_status_t status; + PREP_IBDEV_FOR_PRINT(((struct ib_pd*)h_pd)->device); + + HCA_ENTER(HCA_DBG_SHIM); + + status = _create_qp( h_pd, port_num, + qp_context, p_create_attr, p_qp_attr, ph_qp, NULL ); + + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_QP); + return status; +} + +ib_api_status_t +mlnx_create_qp ( + IN const ib_pd_handle_t h_pd, + IN const void *qp_context, + IN const ib_qp_create_t *p_create_attr, + OUT ib_qp_attr_t *p_qp_attr, + OUT ib_qp_handle_t *ph_qp, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + ib_api_status_t status; + PREP_IBDEV_FOR_PRINT(((struct ib_pd*)h_pd)->device); + + //NB: algorithm of mthca_alloc_sqp() requires port_num + // PRM states, that special pares are created in couples, so + // looks like we can put here port_num = 1 always + uint8_t port_num = 1; + + HCA_ENTER(HCA_DBG_QP); + + status = _create_qp( h_pd, port_num, + qp_context, p_create_attr, p_qp_attr, ph_qp, p_umv_buf ); + + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_QP); + return status; +} + +ib_api_status_t +mlnx_modify_qp ( + IN const ib_qp_handle_t h_qp, + IN const ib_qp_mod_t *p_modify_attr, + OUT ib_qp_attr_t *p_qp_attr OPTIONAL, + IN OUT ci_umv_buf_t *p_umv_buf OPTIONAL ) +{ + ib_api_status_t status; + int err; + struct ib_qp_attr qp_attr; + int qp_attr_mask; + struct ib_qp *ib_qp_p = (struct ib_qp *)h_qp; + PREP_IBDEV_FOR_PRINT(ib_qp_p->device); + + HCA_ENTER(HCA_DBG_QP); + + // sanity checks + if( p_umv_buf && p_umv_buf->command ) { + // sanity checks + if (p_umv_buf->output_size < sizeof(struct ibv_modify_qp_resp) || + !p_umv_buf->p_inout_buf) { + status = IB_INVALID_PARAMETER; + goto err_inval_params; + } + } + + // fill parameters + status = mlnx_conv_qp_modify_attr( ib_qp_p, ib_qp_p->qp_type, + p_modify_attr, &qp_attr, &qp_attr_mask ); + if (status == IB_NOT_DONE) + goto query_qp; + if (status != IB_SUCCESS ) + goto err_mode_unsupported; + + // modify QP + err = ibv_modify_qp(ib_qp_p, &qp_attr, qp_attr_mask); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_QP, + ("ibv_modify_qp failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_modify_qp; + } + + // Query QP to obtain requested attributes +query_qp: + if (p_qp_attr) { + status = mlnx_query_qp ((ib_qp_handle_t)ib_qp_p, p_qp_attr, p_umv_buf); + if (status != IB_SUCCESS) + goto err_query_qp; + } + + if( p_umv_buf && p_umv_buf->command ) { + struct ibv_modify_qp_resp resp; + resp.attr_mask = qp_attr_mask; + resp.qp_state = qp_attr.qp_state; + err = ib_copy_to_umv_buf(p_umv_buf, &resp, sizeof(struct ibv_modify_qp_resp)); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_SHIM ,("ib_copy_to_umv_buf failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_copy; + } + } + + status = IB_SUCCESS; + +err_copy: +err_query_qp: +err_modify_qp: +err_mode_unsupported: +err_inval_params: + if (p_umv_buf && p_umv_buf->command) + p_umv_buf->status = status; + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_QP); + return status; +} + +ib_api_status_t +mlnx_ndi_modify_qp ( + IN const ib_qp_handle_t h_qp, + IN const ib_qp_mod_t *p_modify_attr, + OUT ib_qp_attr_t *p_qp_attr OPTIONAL, + IN const uint32_t buf_size, + IN uint8_t* const p_outbuf) +{ + ci_umv_buf_t umv_buf; + ib_api_status_t status; + struct ibv_modify_qp_resp resp; + void *buf = &resp; + + HCA_ENTER(HCA_DBG_QP); + + /* imitate umv_buf */ + umv_buf.command = TRUE; /* special case for NDI. Usually it's TRUE */ + umv_buf.input_size = 0; + umv_buf.output_size = sizeof(struct ibv_modify_qp_resp); + umv_buf.p_inout_buf = buf; + + status = mlnx_modify_qp ( h_qp, p_modify_attr, p_qp_attr, &umv_buf ); + + if (status == IB_SUCCESS) { + cl_memclr( p_outbuf, buf_size ); + *p_outbuf = resp.qp_state; + } + + HCA_EXIT(HCA_DBG_QP); + return status; +} + +ib_api_status_t +mlnx_query_qp ( + IN const ib_qp_handle_t h_qp, + OUT ib_qp_attr_t *p_qp_attr, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + ib_api_status_t status = IB_SUCCESS; + struct ib_qp *ib_qp_p = (struct ib_qp *)h_qp; + struct mthca_qp *qp_p = (struct mthca_qp *)ib_qp_p; + + UNREFERENCED_PARAMETER(p_umv_buf); + + HCA_ENTER( HCA_DBG_QP); + // sanity checks + + // clean the structure + RtlZeroMemory( p_qp_attr, sizeof *p_qp_attr ); + + // fill the structure + //TODO: this function is to be implemented via ibv_query_qp, which is not supported now + p_qp_attr->h_pd = (ib_pd_handle_t)qp_p->ibqp.pd; + p_qp_attr->qp_type = qp_p->ibqp.qp_type; + p_qp_attr->sq_max_inline = qp_p->qp_init_attr.cap.max_inline_data; + p_qp_attr->sq_depth = qp_p->qp_init_attr.cap.max_send_wr; + p_qp_attr->rq_depth = qp_p->qp_init_attr.cap.max_recv_wr; + p_qp_attr->sq_sge = qp_p->qp_init_attr.cap.max_send_sge; + p_qp_attr->rq_sge = qp_p->qp_init_attr.cap.max_recv_sge; + p_qp_attr->resp_res = qp_p->resp_depth; + p_qp_attr->h_sq_cq = (ib_cq_handle_t)qp_p->ibqp.send_cq; + p_qp_attr->h_rq_cq = (ib_cq_handle_t)qp_p->ibqp.recv_cq; + p_qp_attr->sq_signaled = qp_p->sq_policy == IB_SIGNAL_ALL_WR; + p_qp_attr->state = mlnx_qps_to_ibal( qp_p->state ); + p_qp_attr->num = cl_hton32(qp_p->ibqp.qp_num); + +#ifdef WIN_TO_BE_CHANGED +//TODO: don't know how to fill the following fields without support of query_qp in MTHCA + p_qp_attr->access_ctrl = qp_p-> + p_qp_attr->pkey_index = qp_p-> + p_qp_attr->dest_num = qp_p- + p_qp_attr->init_depth = qp_p- + p_qp_attr->qkey = qp_p- + p_qp_attr->sq_psn = qp_p- + p_qp_attr->rq_psn = qp_p- + p_qp_attr->primary_port = qp_p- + p_qp_attr->alternate_port = qp_p- + p_qp_attr->primary_av = qp_p- + p_qp_attr->alternate_av = qp_p- + p_qp_attr->apm_state = qp_p- +#endif + + status = IB_SUCCESS; + + HCA_EXIT(HCA_DBG_QP); + return status; +} + +ib_api_status_t +mlnx_destroy_qp ( + IN const ib_qp_handle_t h_qp, + IN const uint64_t timewait ) +{ + ib_api_status_t status; + int err; + struct ib_qp *ib_qp_p = (struct ib_qp *)h_qp; + PREP_IBDEV_FOR_PRINT(ib_qp_p->device); + + UNUSED_PARAM( timewait ); + + HCA_ENTER( HCA_DBG_QP); + + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM , + ("qpnum %#x, pcs %p\n", ib_qp_p->qp_num, PsGetCurrentProcess()) ); + + err = ibv_destroy_qp( ib_qp_p ); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP, + ("ibv_destroy_qp failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_destroy_qp; + } + + status = IB_SUCCESS; + +err_destroy_qp: + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_QP); + return status; +} + +/* +* Completion Queue Managment Verbs. +*/ + +ib_api_status_t +mlnx_create_cq ( + IN const ib_ca_handle_t h_ca, + IN const void *cq_context, + IN OUT uint32_t *p_size, + OUT ib_cq_handle_t *ph_cq, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + int err; + ib_api_status_t status; + struct ib_cq *ib_cq_p; + struct mthca_cq *cq_p; + mlnx_hob_t *hob_p; + struct ib_device *ib_dev; + struct ib_ucontext *p_context; + + HCA_ENTER(HCA_DBG_CQ); + + if( p_umv_buf ) { + + p_context = (struct ib_ucontext *)h_ca; + hob_p = HOB_FROM_IBDEV(p_context->device); + ib_dev = p_context->device; + + // sanity checks + if (p_umv_buf->input_size < sizeof(struct ibv_create_cq) || + p_umv_buf->output_size < sizeof(struct ibv_create_cq_resp) || + !p_umv_buf->p_inout_buf) { + status = IB_INVALID_PARAMETER; + goto err_inval_params; + } + } + else { + hob_p = (mlnx_hob_t *)h_ca; + p_context = NULL; + ib_dev = IBDEV_FROM_HOB( hob_p ); + } + + /* sanity check */ + if (!*p_size || *p_size > (uint32_t)ib_dev->mdev->limits.max_cqes) { + status = IB_INVALID_CQ_SIZE; + goto err_cqe; + } + + // allocate cq + ib_cq_p = ibv_create_cq(ib_dev, + cq_comp_handler, cq_event_handler, + hob_p, *p_size, p_context, p_umv_buf ); + if (IS_ERR(ib_cq_p)) { + err = PTR_ERR(ib_cq_p); + HCA_PRINT (TRACE_LEVEL_ERROR ,HCA_DBG_CQ, ("ibv_create_cq failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_create_cq; + } + + // fill the object + cq_p = (struct mthca_cq *)ib_cq_p; + cq_p->cq_context = (void*)cq_context; + + // return the result +// *p_size = *p_size; // return the same value + *p_size = ib_cq_p->cqe; + + if (ph_cq) *ph_cq = (ib_cq_handle_t)cq_p; + + status = IB_SUCCESS; + +err_create_cq: +err_inval_params: +err_cqe: + if (p_umv_buf && p_umv_buf->command) + p_umv_buf->status = status; + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_CQ, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_CQ); + return status; +} + +ib_api_status_t +mlnx_resize_cq ( + IN const ib_cq_handle_t h_cq, + IN OUT uint32_t *p_size, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + UNREFERENCED_PARAMETER(h_cq); + UNREFERENCED_PARAMETER(p_size); + if (p_umv_buf && p_umv_buf->command) { + p_umv_buf->status = IB_UNSUPPORTED; + } + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_CQ,("mlnx_resize_cq not implemented\n")); + return IB_UNSUPPORTED; +} + +ib_api_status_t +mlnx_query_cq ( + IN const ib_cq_handle_t h_cq, + OUT uint32_t *p_size, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + UNREFERENCED_PARAMETER(h_cq); + UNREFERENCED_PARAMETER(p_size); + if (p_umv_buf && p_umv_buf->command) { + p_umv_buf->status = IB_UNSUPPORTED; + } + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_CQ,("mlnx_query_cq not implemented\n")); + return IB_UNSUPPORTED; +} + +ib_api_status_t +mlnx_destroy_cq ( + IN const ib_cq_handle_t h_cq) +{ + + ib_api_status_t status; + int err; + struct ib_cq *ib_cq_p = (struct ib_cq *)h_cq; + PREP_IBDEV_FOR_PRINT(ib_cq_p->device); + + HCA_ENTER( HCA_DBG_QP); + + HCA_PRINT(TRACE_LEVEL_INFORMATION,HCA_DBG_CQ, + ("cqn %#x, pcs %p\n", ((struct mthca_cq*)ib_cq_p)->cqn, PsGetCurrentProcess()) ); + + // destroy CQ + err = ibv_destroy_cq( ib_cq_p ); + if (err) { + HCA_PRINT (TRACE_LEVEL_ERROR ,HCA_DBG_SHIM, + ("ibv_destroy_cq failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_destroy_cq; + } + + status = IB_SUCCESS; + +err_destroy_cq: + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_CQ, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_CQ); + return status; +} + + +ib_api_status_t +mlnx_local_mad ( + IN const ib_ca_handle_t h_ca, + IN const uint8_t port_num, + IN const ib_av_attr_t* p_av_attr, + IN const ib_mad_t *p_mad_in, + OUT ib_mad_t *p_mad_out ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + mlnx_hob_t *hob_p = (mlnx_hob_t *)h_ca; + struct ib_device *ib_dev = IBDEV_FROM_HOB( hob_p ); + //TODO: do we need use flags (IB_MAD_IGNORE_MKEY, IB_MAD_IGNORE_BKEY) ? + int mad_flags = 0; + struct _ib_wc *wc_p = NULL; + //TODO: do we need use grh ? + struct _ib_grh *grh_p = NULL; + + HCA_ENTER(HCA_DBG_MAD); + + // sanity checks + if (port_num > 2) { + status = IB_INVALID_PARAMETER; + goto err_port_num; + } + + if (p_av_attr){ + wc_p = cl_zalloc(sizeof(struct _ib_wc)); + if(!wc_p){ + status = IB_INSUFFICIENT_MEMORY ; + goto err_wc_alloc; + } + //Copy part of the attributes need to fill the mad extended fields in mellanox devices + wc_p->recv.ud.remote_lid = p_av_attr->dlid; + wc_p->recv.ud.remote_sl = p_av_attr->sl; + wc_p->recv.ud.path_bits = p_av_attr->path_bits; + wc_p->recv.ud.recv_opt = p_av_attr->grh_valid?IB_RECV_OPT_GRH_VALID:0; + + if(wc_p->recv.ud.recv_opt &IB_RECV_OPT_GRH_VALID){ + grh_p = cl_zalloc(sizeof(struct _ib_grh)); + if(!grh_p){ + status = IB_INSUFFICIENT_MEMORY ; + goto err_grh_alloc; + } + cl_memcpy(grh_p, &p_av_attr->grh, sizeof(ib_grh_t)); + } + + + } + + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_MAD, + ("MAD: Class %02x, Method %02x, Attr %02x, HopPtr %d, HopCnt %d, \n", + (uint32_t)((ib_smp_t *)p_mad_in)->mgmt_class, + (uint32_t)((ib_smp_t *)p_mad_in)->method, + (uint32_t)((ib_smp_t *)p_mad_in)->attr_id, + (uint32_t)((ib_smp_t *)p_mad_in)->hop_ptr, + (uint32_t)((ib_smp_t *)p_mad_in)->hop_count)); + + + // process mad + + err = mthca_process_mad(ib_dev, mad_flags, (uint8_t)port_num, + wc_p, grh_p, (struct ib_mad*)p_mad_in, (struct ib_mad*)p_mad_out); + if (!err) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_MAD, + ("MAD failed:\n\tClass 0x%x\n\tMethod 0x%x\n\tAttr 0x%x", + p_mad_in->mgmt_class, p_mad_in->method, p_mad_in->attr_id )); + status = IB_ERROR; + goto err_process_mad; + } + + if( (p_mad_in->mgmt_class == IB_MCLASS_SUBN_DIR || + p_mad_in->mgmt_class == IB_MCLASS_SUBN_LID) && + p_mad_in->attr_id == IB_MAD_ATTR_PORT_INFO ) + { + ib_port_info_t *p_pi_in, *p_pi_out; + + if( p_mad_in->mgmt_class == IB_MCLASS_SUBN_DIR ) + { + p_pi_in = (ib_port_info_t*) + ib_smp_get_payload_ptr( (ib_smp_t*)p_mad_in ); + p_pi_out = (ib_port_info_t*) + ib_smp_get_payload_ptr( (ib_smp_t*)p_mad_out ); + } + else + { + p_pi_in = (ib_port_info_t*)(p_mad_in + 1); + p_pi_out = (ib_port_info_t*)(p_mad_out + 1); + } + + /* Work around FW bug 33958 */ + p_pi_out->subnet_timeout &= 0x7F; + if( p_mad_in->method == IB_MAD_METHOD_SET ) + p_pi_out->subnet_timeout |= (p_pi_in->subnet_timeout & 0x80); + } + + /* Modify direction for Direct MAD */ + if ( p_mad_in->mgmt_class == IB_MCLASS_SUBN_DIR ) + p_mad_out->status |= IB_SMP_DIRECTION; + + +err_process_mad: + if(grh_p) + cl_free(grh_p); +err_grh_alloc: + if(wc_p) + cl_free(wc_p); +err_wc_alloc: +err_port_num: + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_MAD, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_MAD); + return status; +} + + +void +setup_ci_interface( + IN const ib_net64_t ca_guid, + IN const int is_livefish, + IN OUT ci_interface_t *p_interface ) +{ + cl_memclr(p_interface, sizeof(*p_interface)); + + /* Guid of the CA. */ + p_interface->guid = ca_guid; + + /* Version of this interface. */ + p_interface->version = VERBS_VERSION; + + /* UVP name */ + cl_memcpy( p_interface->libname, mlnx_uvp_lib_name, MAX_LIB_NAME); + + HCA_PRINT(TRACE_LEVEL_VERBOSE , HCA_DBG_SHIM ,("UVP filename %s\n", p_interface->libname)); + + /* The real interface. */ + p_interface->open_ca = mlnx_open_ca; + p_interface->query_ca = mlnx_query_ca; + p_interface->close_ca = mlnx_close_ca; + p_interface->um_open_ca = mlnx_um_open; + p_interface->um_close_ca = mlnx_um_close; + + p_interface->allocate_pd = mlnx_allocate_pd; + p_interface->deallocate_pd = mlnx_deallocate_pd; + p_interface->vendor_call = fw_access_ctrl; + + if (is_livefish) { + mlnx_memory_if_livefish(p_interface); + } + else { + p_interface->modify_ca = mlnx_modify_ca; + + p_interface->create_av = mlnx_create_av; + p_interface->query_av = mlnx_query_av; + p_interface->modify_av = mlnx_modify_av; + p_interface->destroy_av = mlnx_destroy_av; + + p_interface->create_srq = mlnx_create_srq; + p_interface->modify_srq = mlnx_modify_srq; + p_interface->query_srq = mlnx_query_srq; + p_interface->destroy_srq = mlnx_destroy_srq; + + p_interface->create_qp = mlnx_create_qp; + p_interface->create_spl_qp = mlnx_create_spl_qp; + p_interface->modify_qp = mlnx_modify_qp; + p_interface->ndi_modify_qp = mlnx_ndi_modify_qp; + p_interface->query_qp = mlnx_query_qp; + p_interface->destroy_qp = mlnx_destroy_qp; + + p_interface->create_cq = mlnx_create_cq; + p_interface->resize_cq = mlnx_resize_cq; + p_interface->query_cq = mlnx_query_cq; + p_interface->destroy_cq = mlnx_destroy_cq; + + p_interface->local_mad = mlnx_local_mad; + + + mlnx_memory_if(p_interface); + mlnx_direct_if(p_interface); + mlnx_mcast_if(p_interface); + } + + return; +} + diff --git a/branches/IBFD/hw/mthca/kernel/ib_cache.h b/branches/IBFD/hw/mthca/kernel/ib_cache.h new file mode 100644 index 00000000..debfd1f5 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/ib_cache.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#ifndef _IB_CACHE_H +#define _IB_CACHE_H + +#include + +/** + * ib_get_cached_gid - Returns a cached GID table entry + * @device: The device to query. + * @port_num: The port number of the device to query. + * @index: The index into the cached GID table to query. + * @gid: The GID value found at the specified index. + * + * ib_get_cached_gid() fetches the specified GID table entry stored in + * the local software cache. + */ +int ib_get_cached_gid(struct ib_device *device, + u8 port_num, + int index, + union ib_gid *gid); + +/** + * ib_find_cached_gid - Returns the port number and GID table index where + * a specified GID value occurs. + * @device: The device to query. + * @gid: The GID value to search for. + * @port_num: The port number of the device where the GID value was found. + * @index: The index into the cached GID table where the GID was found. This + * parameter may be NULL. + * + * ib_find_cached_gid() searches for the specified GID value in + * the local software cache. + */ +int ib_find_cached_gid(struct ib_device *device, + union ib_gid *gid, + u8 *port_num, + u16 *index); + +/** + * ib_get_cached_pkey - Returns a cached PKey table entry + * @device: The device to query. + * @port_num: The port number of the device to query. + * @index: The index into the cached PKey table to query. + * @pkey: The PKey value found at the specified index. + * + * ib_get_cached_pkey() fetches the specified PKey table entry stored in + * the local software cache. + */ +int ib_get_cached_pkey(struct ib_device *device_handle, + u8 port_num, + int index, + u16 *pkey); + +/** + * ib_find_cached_pkey - Returns the PKey table index where a specified + * PKey value occurs. + * @device: The device to query. + * @port_num: The port number of the device to search for the PKey. + * @pkey: The PKey value to search for. + * @index: The index into the cached PKey table where the PKey was found. + * + * ib_find_cached_pkey() searches the specified PKey table in + * the local software cache. + */ +int ib_find_cached_pkey(struct ib_device *device, + u8 port_num, + u16 pkey, + u16 *index); + + +int ib_cache_setup(void); +void ib_cache_cleanup(void); + +#endif /* _IB_CACHE_H */ diff --git a/branches/IBFD/hw/mthca/kernel/ib_mad.h b/branches/IBFD/hw/mthca/kernel/ib_mad.h new file mode 100644 index 00000000..e8a80806 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/ib_mad.h @@ -0,0 +1,579 @@ +/* + * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#if !defined( IB_MAD_H ) +#define IB_MAD_H + +#include + +/* Management base version */ +#define IB_MGMT_BASE_VERSION 1 + +/* Management classes */ +#define IB_MGMT_CLASS_SUBN_LID_ROUTED 0x01 +#define IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE 0x81 +#define IB_MGMT_CLASS_SUBN_ADM 0x03 +#define IB_MGMT_CLASS_PERF_MGMT 0x04 +#define IB_MGMT_CLASS_BM 0x05 +#define IB_MGMT_CLASS_DEVICE_MGMT 0x06 +#define IB_MGMT_CLASS_CM 0x07 +#define IB_MGMT_CLASS_SNMP 0x08 +#define IB_MGMT_CLASS_VENDOR_RANGE2_START 0x30 +#define IB_MGMT_CLASS_VENDOR_RANGE2_END 0x4F + +#define IB_OPENIB_OUI (0x001405) + +/* Management methods */ +#define IB_MGMT_METHOD_GET 0x01 +#define IB_MGMT_METHOD_SET 0x02 +#define IB_MGMT_METHOD_GET_RESP 0x81 +#define IB_MGMT_METHOD_SEND 0x03 +#define IB_MGMT_METHOD_TRAP 0x05 +#define IB_MGMT_METHOD_REPORT 0x06 +#define IB_MGMT_METHOD_REPORT_RESP 0x86 +#define IB_MGMT_METHOD_TRAP_REPRESS 0x07 + +#define IB_MGMT_METHOD_RESP 0x80 + +#define IB_MGMT_MAX_METHODS 128 + +/* RMPP information */ +#define IB_MGMT_RMPP_VERSION 1 + +#define IB_MGMT_RMPP_TYPE_DATA 1 +#define IB_MGMT_RMPP_TYPE_ACK 2 +#define IB_MGMT_RMPP_TYPE_STOP 3 +#define IB_MGMT_RMPP_TYPE_ABORT 4 + +#define IB_MGMT_RMPP_FLAG_ACTIVE 1 +#define IB_MGMT_RMPP_FLAG_FIRST (1<<1) +#define IB_MGMT_RMPP_FLAG_LAST (1<<2) + +#define IB_MGMT_RMPP_NO_RESPTIME 0x1F + +#define IB_MGMT_RMPP_STATUS_SUCCESS 0 +#define IB_MGMT_RMPP_STATUS_RESX 1 +#define IB_MGMT_RMPP_STATUS_ABORT_MIN 118 +#define IB_MGMT_RMPP_STATUS_T2L 118 +#define IB_MGMT_RMPP_STATUS_BAD_LEN 119 +#define IB_MGMT_RMPP_STATUS_BAD_SEG 120 +#define IB_MGMT_RMPP_STATUS_BADT 121 +#define IB_MGMT_RMPP_STATUS_W2S 122 +#define IB_MGMT_RMPP_STATUS_S2B 123 +#define IB_MGMT_RMPP_STATUS_BAD_STATUS 124 +#define IB_MGMT_RMPP_STATUS_UNV 125 +#define IB_MGMT_RMPP_STATUS_TMR 126 +#define IB_MGMT_RMPP_STATUS_UNSPEC 127 +#define IB_MGMT_RMPP_STATUS_ABORT_MAX 127 + +#define IB_QP1_QKEY 0x00000180 /* big endian */ +#define IB_QP_SET_QKEY 0x00000080 /* big endian */ + +struct ib_mad_hdr { + u8 base_version; + u8 mgmt_class; + u8 class_version; + u8 method; + __be16 status; + __be16 class_specific; + __be64 tid; + __be16 attr_id; + __be16 resv; + __be32 attr_mod; +}; + +struct ib_rmpp_hdr { + u8 rmpp_version; + u8 rmpp_type; + u8 rmpp_rtime_flags; + u8 rmpp_status; + __be32 seg_num; + __be32 paylen_newwin; +}; + +typedef u64 ib_sa_comp_mask; + +#define IB_SA_COMP_MASK(n) ((ib_sa_comp_mask) cl_hton64(1ull << n)) + +/* + * ib_sa_hdr and ib_sa_mad structures must be packed because they have + * 64-bit fields that are only 32-bit aligned. 64-bit architectures will + * lay them out wrong otherwise. (And unfortunately they are sent on + * the wire so we can't change the layout) + */ +#pragma pack(push,1) +struct ib_sa_hdr { + __be64 sm_key; + __be16 attr_offset; + __be16 reserved; + ib_sa_comp_mask comp_mask; +}; +#pragma pack(pop) + +struct ib_mad { + struct ib_mad_hdr mad_hdr; + u8 data[232]; +}; + +struct ib_rmpp_mad { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + u8 data[220]; +}; + +#pragma pack(push,1) +struct ib_sa_mad { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + struct ib_sa_hdr sa_hdr; + u8 data[200]; +}; +#pragma pack(pop) + +struct ib_vendor_mad { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + u8 reserved; + u8 oui[3]; + u8 data[216]; +}; + +/** + * ib_mad_send_buf - MAD data buffer and work request for sends. + * @mad: References an allocated MAD data buffer. The size of the data + * buffer is specified in the @send_wr.length field. + * @mapping: DMA mapping information. + * @mad_agent: MAD agent that allocated the buffer. + * @context: User-controlled context fields. + * @send_wr: An initialized work request structure used when sending the MAD. + * The wr_id field of the work request is initialized to reference this + * data structure. + * @sge: A scatter-gather list referenced by the work request. + * + * Users are responsible for initializing the MAD buffer itself, with the + * exception of specifying the payload length field in any RMPP MAD. + */ +struct ib_mad_send_buf { + struct ib_mad *mad; + dma_addr_t mapping; + struct ib_mad_agent *mad_agent; + void *context[2]; + struct _ib_send_wr send_wr; + struct ib_sge sge; +}; + +/** + * ib_get_rmpp_resptime - Returns the RMPP response time. + * @rmpp_hdr: An RMPP header. + */ +static inline u8 ib_get_rmpp_resptime(struct ib_rmpp_hdr *rmpp_hdr) +{ + return rmpp_hdr->rmpp_rtime_flags >> 3; +} + +/** + * ib_get_rmpp_flags - Returns the RMPP flags. + * @rmpp_hdr: An RMPP header. + */ +static inline u8 ib_get_rmpp_flags(struct ib_rmpp_hdr *rmpp_hdr) +{ + return rmpp_hdr->rmpp_rtime_flags & 0x7; +} + +/** + * ib_set_rmpp_resptime - Sets the response time in an RMPP header. + * @rmpp_hdr: An RMPP header. + * @rtime: The response time to set. + */ +static inline void ib_set_rmpp_resptime(struct ib_rmpp_hdr *rmpp_hdr, u8 rtime) +{ + rmpp_hdr->rmpp_rtime_flags = ib_get_rmpp_flags(rmpp_hdr) | (rtime << 3); +} + +/** + * ib_set_rmpp_flags - Sets the flags in an RMPP header. + * @rmpp_hdr: An RMPP header. + * @flags: The flags to set. + */ +static inline void ib_set_rmpp_flags(struct ib_rmpp_hdr *rmpp_hdr, u8 flags) +{ + rmpp_hdr->rmpp_rtime_flags = (rmpp_hdr->rmpp_rtime_flags & 0xF1) | + (flags & 0x7); +} + +struct ib_mad_agent; +struct ib_mad_send_wc; +struct ib_mad_recv_wc; + +/** + * ib_mad_send_handler - callback handler for a sent MAD. + * @mad_agent: MAD agent that sent the MAD. + * @mad_send_wc: Send work completion information on the sent MAD. + */ +typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent, + struct ib_mad_send_wc *mad_send_wc); + +/** + * ib_mad_snoop_handler - Callback handler for snooping sent MADs. + * @mad_agent: MAD agent that snooped the MAD. + * @send_wr: Work request information on the sent MAD. + * @mad_send_wc: Work completion information on the sent MAD. Valid + * only for snooping that occurs on a send completion. + * + * Clients snooping MADs should not modify data referenced by the @send_wr + * or @mad_send_wc. + */ +typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent, + struct _ib_send_wr *send_wr, + struct ib_mad_send_wc *mad_send_wc); + +/** + * ib_mad_recv_handler - callback handler for a received MAD. + * @mad_agent: MAD agent requesting the received MAD. + * @mad_recv_wc: Received work completion information on the received MAD. + * + * MADs received in response to a send request operation will be handed to + * the user after the send operation completes. All data buffers given + * to registered agents through this routine are owned by the receiving + * client, except for snooping agents. Clients snooping MADs should not + * modify the data referenced by @mad_recv_wc. + */ +typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent, + struct ib_mad_recv_wc *mad_recv_wc); + +/** + * ib_mad_agent - Used to track MAD registration with the access layer. + * @device: Reference to device registration is on. + * @qp: Reference to QP used for sending and receiving MADs. + * @mr: Memory region for system memory usable for DMA. + * @recv_handler: Callback handler for a received MAD. + * @send_handler: Callback handler for a sent MAD. + * @snoop_handler: Callback handler for snooped sent MADs. + * @context: User-specified context associated with this registration. + * @hi_tid: Access layer assigned transaction ID for this client. + * Unsolicited MADs sent by this client will have the upper 32-bits + * of their TID set to this value. + * @port_num: Port number on which QP is registered + * @rmpp_version: If set, indicates the RMPP version used by this agent. + */ +struct ib_mad_agent { + struct ib_device *device; + struct ib_qp *qp; + struct ib_mr *mr; + ib_mad_recv_handler recv_handler; + ib_mad_send_handler send_handler; + ib_mad_snoop_handler snoop_handler; + void *context; + u32 hi_tid; + u8 port_num; + u8 rmpp_version; +}; + +/** + * ib_mad_send_wc - MAD send completion information. + * @wr_id: Work request identifier associated with the send MAD request. + * @status: Completion status. + * @vendor_err: Optional vendor error information returned with a failed + * request. + */ +struct ib_mad_send_wc { + u64 wr_id; + enum ib_wc_status status; + u32 vendor_err; +}; + +/** + * ib_mad_recv_buf - received MAD buffer information. + * @list: Reference to next data buffer for a received RMPP MAD. + * @grh: References a data buffer containing the global route header. + * The data refereced by this buffer is only valid if the GRH is + * valid. + * @mad: References the start of the received MAD. + */ +struct ib_mad_recv_buf { + struct list_head list; + struct ib_grh *grh; + struct ib_mad *mad; +}; + +/** + * ib_mad_recv_wc - received MAD information. + * @wc: Completion information for the received data. + * @recv_buf: Specifies the location of the received data buffer(s). + * @rmpp_list: Specifies a list of RMPP reassembled received MAD buffers. + * @mad_len: The length of the received MAD, without duplicated headers. + * + * For received response, the wr_id field of the wc is set to the wr_id + * for the corresponding send request. + */ +struct ib_mad_recv_wc { + struct _ib_wc *wc; + struct ib_mad_recv_buf recv_buf; + struct list_head rmpp_list; + int mad_len; +}; + +/** + * ib_mad_reg_req - MAD registration request + * @mgmt_class: Indicates which management class of MADs should be receive + * by the caller. This field is only required if the user wishes to + * receive unsolicited MADs, otherwise it should be 0. + * @mgmt_class_version: Indicates which version of MADs for the given + * management class to receive. + * @oui: Indicates IEEE OUI when mgmt_class is a vendor class + * in the range from 0x30 to 0x4f. Otherwise not used. + * @method_mask: The caller will receive unsolicited MADs for any method + * where @method_mask = 1. + */ +struct ib_mad_reg_req { + u8 mgmt_class; + u8 mgmt_class_version; + u8 oui[3]; + DECLARE_BITMAP(method_mask, IB_MGMT_MAX_METHODS); +}; + +/** + * ib_register_mad_agent - Register to send/receive MADs. + * @device: The device to register with. + * @port_num: The port on the specified device to use. + * @qp_type: Specifies which QP to access. Must be either + * IB_QPT_QP0 or IB_QPT_QP1. + * @mad_reg_req: Specifies which unsolicited MADs should be received + * by the caller. This parameter may be NULL if the caller only + * wishes to receive solicited responses. + * @rmpp_version: If set, indicates that the client will send + * and receive MADs that contain the RMPP header for the given version. + * If set to 0, indicates that RMPP is not used by this client. + * @send_handler: The completion callback routine invoked after a send + * request has completed. + * @recv_handler: The completion callback routine invoked for a received + * MAD. + * @context: User specified context associated with the registration. + */ +struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, + u8 port_num, + enum ib_qp_type_t qp_type, + struct ib_mad_reg_req *mad_reg_req, + u8 rmpp_version, + ib_mad_send_handler send_handler, + ib_mad_recv_handler recv_handler, + void *context); + +enum ib_mad_snoop_flags { + /*IB_MAD_SNOOP_POSTED_SENDS = 1,*/ + /*IB_MAD_SNOOP_RMPP_SENDS = (1<<1),*/ + IB_MAD_SNOOP_SEND_COMPLETIONS = (1<<2), + /*IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS = (1<<3),*/ + IB_MAD_SNOOP_RECVS = (1<<4) + /*IB_MAD_SNOOP_RMPP_RECVS = (1<<5),*/ + /*IB_MAD_SNOOP_REDIRECTED_QPS = (1<<6)*/ +}; + +/** + * ib_register_mad_snoop - Register to snoop sent and received MADs. + * @device: The device to register with. + * @port_num: The port on the specified device to use. + * @qp_type: Specifies which QP traffic to snoop. Must be either + * IB_QPT_QP0 or IB_QPT_QP1. + * @mad_snoop_flags: Specifies information where snooping occurs. + * @send_handler: The callback routine invoked for a snooped send. + * @recv_handler: The callback routine invoked for a snooped receive. + * @context: User specified context associated with the registration. + */ +struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device, + u8 port_num, + enum ib_qp_type_t qp_type, + int mad_snoop_flags, + ib_mad_snoop_handler snoop_handler, + ib_mad_recv_handler recv_handler, + void *context); + +/** + * ib_unregister_mad_agent - Unregisters a client from using MAD services. + * @mad_agent: Corresponding MAD registration request to deregister. + * + * After invoking this routine, MAD services are no longer usable by the + * client on the associated QP. + */ +int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent); + +/** + * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated + * with the registered client. + * @mad_agent: Specifies the associated registration to post the send to. + * @send_wr: Specifies the information needed to send the MAD(s). + * @bad_send_wr: Specifies the MAD on which an error was encountered. + * + * Sent MADs are not guaranteed to complete in the order that they were posted. + * + * If the MAD requires RMPP, the data buffer should contain a single copy + * of the common MAD, RMPP, and class specific headers, followed by the class + * defined data. If the class defined data would not divide evenly into + * RMPP segments, then space must be allocated at the end of the referenced + * buffer for any required padding. To indicate the amount of class defined + * data being transferred, the paylen_newwin field in the RMPP header should + * be set to the size of the class specific header plus the amount of class + * defined data being transferred. The paylen_newwin field should be + * specified in network-byte order. + */ +int ib_post_send_mad(struct ib_mad_agent *mad_agent, + struct _ib_send_wr *send_wr, + struct _ib_send_wr **bad_send_wr); + +/** + * ib_coalesce_recv_mad - Coalesces received MAD data into a single buffer. + * @mad_recv_wc: Work completion information for a received MAD. + * @buf: User-provided data buffer to receive the coalesced buffers. The + * referenced buffer should be at least the size of the mad_len specified + * by @mad_recv_wc. + * + * This call copies a chain of received MAD segments into a single data buffer, + * removing duplicated headers. + */ +void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc, void *buf); + +/** + * ib_free_recv_mad - Returns data buffers used to receive a MAD. + * @mad_recv_wc: Work completion information for a received MAD. + * + * Clients receiving MADs through their ib_mad_recv_handler must call this + * routine to return the work completion buffers to the access layer. + */ +void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc); + +/** + * ib_cancel_mad - Cancels an outstanding send MAD operation. + * @mad_agent: Specifies the registration associated with sent MAD. + * @wr_id: Indicates the work request identifier of the MAD to cancel. + * + * MADs will be returned to the user through the corresponding + * ib_mad_send_handler. + */ +void ib_cancel_mad(struct ib_mad_agent *mad_agent, u64 wr_id); + +/** + * ib_modify_mad - Modifies an outstanding send MAD operation. + * @mad_agent: Specifies the registration associated with sent MAD. + * @wr_id: Indicates the work request identifier of the MAD to modify. + * @timeout_ms: New timeout value for sent MAD. + * + * This call will reset the timeout value for a sent MAD to the specified + * value. + */ +int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms); + +/** + * ib_redirect_mad_qp - Registers a QP for MAD services. + * @qp: Reference to a QP that requires MAD services. + * @rmpp_version: If set, indicates that the client will send + * and receive MADs that contain the RMPP header for the given version. + * If set to 0, indicates that RMPP is not used by this client. + * @send_handler: The completion callback routine invoked after a send + * request has completed. + * @recv_handler: The completion callback routine invoked for a received + * MAD. + * @context: User specified context associated with the registration. + * + * Use of this call allows clients to use MAD services, such as RMPP, + * on user-owned QPs. After calling this routine, users may send + * MADs on the specified QP by calling ib_mad_post_send. + */ +struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp, + u8 rmpp_version, + ib_mad_send_handler send_handler, + ib_mad_recv_handler recv_handler, + void *context); + +/** + * ib_process_mad_wc - Processes a work completion associated with a + * MAD sent or received on a redirected QP. + * @mad_agent: Specifies the registered MAD service using the redirected QP. + * @wc: References a work completion associated with a sent or received + * MAD segment. + * + * This routine is used to complete or continue processing on a MAD request. + * If the work completion is associated with a send operation, calling + * this routine is required to continue an RMPP transfer or to wait for a + * corresponding response, if it is a request. If the work completion is + * associated with a receive operation, calling this routine is required to + * process an inbound or outbound RMPP transfer, or to match a response MAD + * with its corresponding request. + */ +int ib_process_mad_wc(struct ib_mad_agent *mad_agent, + struct _ib_wc *wc); + +/** + * ib_create_send_mad - Allocate and initialize a data buffer and work request + * for sending a MAD. + * @mad_agent: Specifies the registered MAD service to associate with the MAD. + * @remote_qpn: Specifies the QPN of the receiving node. + * @pkey_index: Specifies which PKey the MAD will be sent using. This field + * is valid only if the remote_qpn is QP 1. + * @ah: References the address handle used to transfer to the remote node. + * @rmpp_active: Indicates if the send will enable RMPP. + * @hdr_len: Indicates the size of the data header of the MAD. This length + * should include the common MAD header, RMPP header, plus any class + * specific header. + * @data_len: Indicates the size of any user-transferred data. The call will + * automatically adjust the allocated buffer size to account for any + * additional padding that may be necessary. + * @gfp_mask: GFP mask used for the memory allocation. + * + * This is a helper routine that may be used to allocate a MAD. Users are + * not required to allocate outbound MADs using this call. The returned + * MAD send buffer will reference a data buffer usable for sending a MAD, along + * with an initialized work request structure. Users may modify the returned + * MAD data buffer or work request before posting the send. + * + * The returned data buffer will be cleared. Users are responsible for + * initializing the common MAD and any class specific headers. If @rmpp_active + * is set, the RMPP header will be initialized for sending. + */ +struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, + u32 remote_qpn, u16 pkey_index, + struct ib_ah *ah, int rmpp_active, + int hdr_len, int data_len, + unsigned int gfp_mask); + +/** + * ib_free_send_mad - Returns data buffers used to send a MAD. + * @send_buf: Previously allocated send data buffer. + */ +void ib_free_send_mad(struct ib_mad_send_buf *send_buf); + +#endif /* IB_MAD_H */ diff --git a/branches/IBFD/hw/mthca/kernel/ib_pack.h b/branches/IBFD/hw/mthca/kernel/ib_pack.h new file mode 100644 index 00000000..deb42e6c --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/ib_pack.h @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#ifndef IB_PACK_H +#define IB_PACK_H + +#include + +enum { + IB_LRH_BYTES = 8, + IB_GRH_BYTES = 40, + IB_BTH_BYTES = 12, + IB_DETH_BYTES = 8 +}; + +struct ib_field { + int struct_offset_bytes; + int struct_size_bytes; + int offset_words; + int offset_bits; + int size_bits; + char *field_name; +}; + +#define RESERVED \ + .field_name = "reserved" + +/* + * This macro cleans up the definitions of constants for BTH opcodes. + * It is used to define constants such as IB_OPCODE_UD_SEND_ONLY, + * which becomes IB_OPCODE_UD + IB_OPCODE_SEND_ONLY, and this gives + * the correct value. + * + * In short, user code should use the constants defined using the + * macro rather than worrying about adding together other constants. +*/ +#define IB_OPCODE(transport, op) \ + IB_OPCODE_ ## transport ## _ ## op = \ + IB_OPCODE_ ## transport + IB_OPCODE_ ## op + +enum { + /* transport types -- just used to define real constants */ + IB_OPCODE_RC = 0x00, + IB_OPCODE_UC = 0x20, + IB_OPCODE_RD = 0x40, + IB_OPCODE_UD = 0x60, + + /* operations -- just used to define real constants */ + IB_OPCODE_SEND_FIRST = 0x00, + IB_OPCODE_SEND_MIDDLE = 0x01, + IB_OPCODE_SEND_LAST = 0x02, + IB_OPCODE_SEND_LAST_WITH_IMMEDIATE = 0x03, + IB_OPCODE_SEND_ONLY = 0x04, + IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE = 0x05, + IB_OPCODE_RDMA_WRITE_FIRST = 0x06, + IB_OPCODE_RDMA_WRITE_MIDDLE = 0x07, + IB_OPCODE_RDMA_WRITE_LAST = 0x08, + IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE = 0x09, + IB_OPCODE_RDMA_WRITE_ONLY = 0x0a, + IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE = 0x0b, + IB_OPCODE_RDMA_READ_REQUEST = 0x0c, + IB_OPCODE_RDMA_READ_RESPONSE_FIRST = 0x0d, + IB_OPCODE_RDMA_READ_RESPONSE_MIDDLE = 0x0e, + IB_OPCODE_RDMA_READ_RESPONSE_LAST = 0x0f, + IB_OPCODE_RDMA_READ_RESPONSE_ONLY = 0x10, + IB_OPCODE_ACKNOWLEDGE = 0x11, + IB_OPCODE_ATOMIC_ACKNOWLEDGE = 0x12, + IB_OPCODE_COMPARE_SWAP = 0x13, + IB_OPCODE_FETCH_ADD = 0x14, + + /* real constants follow -- see comment about above IB_OPCODE() + macro for more details */ + + /* RC */ + IB_OPCODE(RC, SEND_FIRST), + IB_OPCODE(RC, SEND_MIDDLE), + IB_OPCODE(RC, SEND_LAST), + IB_OPCODE(RC, SEND_LAST_WITH_IMMEDIATE), + IB_OPCODE(RC, SEND_ONLY), + IB_OPCODE(RC, SEND_ONLY_WITH_IMMEDIATE), + IB_OPCODE(RC, RDMA_WRITE_FIRST), + IB_OPCODE(RC, RDMA_WRITE_MIDDLE), + IB_OPCODE(RC, RDMA_WRITE_LAST), + IB_OPCODE(RC, RDMA_WRITE_LAST_WITH_IMMEDIATE), + IB_OPCODE(RC, RDMA_WRITE_ONLY), + IB_OPCODE(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE), + IB_OPCODE(RC, RDMA_READ_REQUEST), + IB_OPCODE(RC, RDMA_READ_RESPONSE_FIRST), + IB_OPCODE(RC, RDMA_READ_RESPONSE_MIDDLE), + IB_OPCODE(RC, RDMA_READ_RESPONSE_LAST), + IB_OPCODE(RC, RDMA_READ_RESPONSE_ONLY), + IB_OPCODE(RC, ACKNOWLEDGE), + IB_OPCODE(RC, ATOMIC_ACKNOWLEDGE), + IB_OPCODE(RC, COMPARE_SWAP), + IB_OPCODE(RC, FETCH_ADD), + + /* UC */ + IB_OPCODE(UC, SEND_FIRST), + IB_OPCODE(UC, SEND_MIDDLE), + IB_OPCODE(UC, SEND_LAST), + IB_OPCODE(UC, SEND_LAST_WITH_IMMEDIATE), + IB_OPCODE(UC, SEND_ONLY), + IB_OPCODE(UC, SEND_ONLY_WITH_IMMEDIATE), + IB_OPCODE(UC, RDMA_WRITE_FIRST), + IB_OPCODE(UC, RDMA_WRITE_MIDDLE), + IB_OPCODE(UC, RDMA_WRITE_LAST), + IB_OPCODE(UC, RDMA_WRITE_LAST_WITH_IMMEDIATE), + IB_OPCODE(UC, RDMA_WRITE_ONLY), + IB_OPCODE(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE), + + /* RD */ + IB_OPCODE(RD, SEND_FIRST), + IB_OPCODE(RD, SEND_MIDDLE), + IB_OPCODE(RD, SEND_LAST), + IB_OPCODE(RD, SEND_LAST_WITH_IMMEDIATE), + IB_OPCODE(RD, SEND_ONLY), + IB_OPCODE(RD, SEND_ONLY_WITH_IMMEDIATE), + IB_OPCODE(RD, RDMA_WRITE_FIRST), + IB_OPCODE(RD, RDMA_WRITE_MIDDLE), + IB_OPCODE(RD, RDMA_WRITE_LAST), + IB_OPCODE(RD, RDMA_WRITE_LAST_WITH_IMMEDIATE), + IB_OPCODE(RD, RDMA_WRITE_ONLY), + IB_OPCODE(RD, RDMA_WRITE_ONLY_WITH_IMMEDIATE), + IB_OPCODE(RD, RDMA_READ_REQUEST), + IB_OPCODE(RD, RDMA_READ_RESPONSE_FIRST), + IB_OPCODE(RD, RDMA_READ_RESPONSE_MIDDLE), + IB_OPCODE(RD, RDMA_READ_RESPONSE_LAST), + IB_OPCODE(RD, RDMA_READ_RESPONSE_ONLY), + IB_OPCODE(RD, ACKNOWLEDGE), + IB_OPCODE(RD, ATOMIC_ACKNOWLEDGE), + IB_OPCODE(RD, COMPARE_SWAP), + IB_OPCODE(RD, FETCH_ADD), + + /* UD */ + IB_OPCODE(UD, SEND_ONLY), + IB_OPCODE(UD, SEND_ONLY_WITH_IMMEDIATE) +}; + +enum { + IB_LNH_RAW = 0, + IB_LNH_IP = 1, + IB_LNH_IBA_LOCAL = 2, + IB_LNH_IBA_GLOBAL = 3 +}; + +struct ib_unpacked_lrh { + u8 virtual_lane; + u8 link_version; + u8 service_level; + u8 link_next_header; + __be16 destination_lid; + __be16 packet_length; + __be16 source_lid; +}; + +struct ib_unpacked_grh { + u8 ip_version; + u8 traffic_class; + __be32 flow_label; + __be16 payload_length; + u8 next_header; + u8 hop_limit; + union ib_gid source_gid; + union ib_gid destination_gid; +}; + +struct ib_unpacked_bth { + u8 opcode; + u8 solicited_event; + u8 mig_req; + u8 pad_count; + u8 transport_header_version; + __be16 pkey; + __be32 destination_qpn; + u8 ack_req; + __be32 psn; +}; + +struct ib_unpacked_deth { + __be32 qkey; + __be32 source_qpn; +}; + +struct ib_ud_header { + struct ib_unpacked_lrh lrh; + int grh_present; + struct ib_unpacked_grh grh; + struct ib_unpacked_bth bth; + struct ib_unpacked_deth deth; + int immediate_present; + __be32 immediate_data; +}; + +void ib_pack(const struct ib_field *desc, + int desc_len, + void *structure, + void *buf); + +void ib_unpack(const struct ib_field *desc, + int desc_len, + void *buf, + void *structure); + +void ib_ud_header_init(int payload_bytes, + int grh_present, + struct ib_ud_header *header); + +int ib_ud_header_pack(struct ib_ud_header *header, + void *buf); + +int ib_ud_header_unpack(void *buf, + struct ib_ud_header *header); + +#endif /* IB_PACK_H */ diff --git a/branches/IBFD/hw/mthca/kernel/ib_smi.h b/branches/IBFD/hw/mthca/kernel/ib_smi.h new file mode 100644 index 00000000..8cfe1a2a --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/ib_smi.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#if !defined( IB_SMI_H ) +#define IB_SMI_H + +#include + +#define IB_SMP_DATA_SIZE 64 +#define IB_SMP_MAX_PATH_HOPS 64 + +#pragma pack(push,1) +struct ib_smp { + u8 base_version; + u8 mgmt_class; + u8 class_version; + u8 method; + __be16 status; + u8 hop_ptr; + u8 hop_cnt; + __be64 tid; + __be16 attr_id; + __be16 resv; + __be32 attr_mod; + __be64 mkey; + __be16 dr_slid; + __be16 dr_dlid; + u8 reserved[28]; + u8 data[IB_SMP_DATA_SIZE]; + u8 initial_path[IB_SMP_MAX_PATH_HOPS]; + u8 return_path[IB_SMP_MAX_PATH_HOPS]; +}; +#pragma pack(pop) + + +/* Subnet management attributes */ +#define IB_SMP_ATTR_NOTICE cl_hton16(0x0002) +#define IB_SMP_ATTR_NODE_DESC cl_hton16(0x0010) +#define IB_SMP_ATTR_NODE_INFO cl_hton16(0x0011) +#define IB_SMP_ATTR_SWITCH_INFO cl_hton16(0x0012) +#define IB_SMP_ATTR_GUID_INFO cl_hton16(0x0014) +#define IB_SMP_ATTR_PORT_INFO cl_hton16(0x0015) +#define IB_SMP_ATTR_PKEY_TABLE cl_hton16(0x0016) +#define IB_SMP_ATTR_SL_TO_VL_TABLE cl_hton16(0x0017) +#define IB_SMP_ATTR_VL_ARB_TABLE cl_hton16(0x0018) +#define IB_SMP_ATTR_LINEAR_FORWARD_TABLE cl_hton16(0x0019) +#define IB_SMP_ATTR_RANDOM_FORWARD_TABLE cl_hton16(0x001A) +#define IB_SMP_ATTR_MCAST_FORWARD_TABLE cl_hton16(0x001B) +#define IB_SMP_ATTR_SM_INFO cl_hton16(0x0020) +#define IB_SMP_ATTR_VENDOR_DIAG cl_hton16(0x0030) +#define IB_SMP_ATTR_LED_INFO cl_hton16(0x0031) +#define IB_SMP_ATTR_VENDOR_MASK cl_hton16(0xFF00) + +static inline u8 +ib_get_smp_direction(struct ib_smp *smp) +{ + return (u8)((smp->status & IB_SMP_DIRECTION) == IB_SMP_DIRECTION); +} + +#endif /* IB_SMI_H */ diff --git a/branches/IBFD/hw/mthca/kernel/ib_verbs.h b/branches/IBFD/hw/mthca/kernel/ib_verbs.h new file mode 100644 index 00000000..af98d422 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/ib_verbs.h @@ -0,0 +1,1343 @@ +/* + * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#if !defined(IB_VERBS_H) +#define IB_VERBS_H + +#include +#include +#include + +union ib_gid { + u8 raw[16]; + struct { + __be64 subnet_prefix; + __be64 interface_id; + } global; +}; + +enum ib_node_type { + IB_NODE_CA = 1, + IB_NODE_SWITCH, + IB_NODE_ROUTER +}; + +enum ib_device_cap_flags { + IB_DEVICE_RESIZE_MAX_WR = 1, + IB_DEVICE_BAD_PKEY_CNTR = (1<<1), + IB_DEVICE_BAD_QKEY_CNTR = (1<<2), + IB_DEVICE_RAW_MULTI = (1<<3), + IB_DEVICE_AUTO_PATH_MIG = (1<<4), + IB_DEVICE_CHANGE_PHY_PORT = (1<<5), + IB_DEVICE_UD_AV_PORT_ENFORCE = (1<<6), + IB_DEVICE_CURR_QP_STATE_MOD = (1<<7), + IB_DEVICE_SHUTDOWN_PORT = (1<<8), + IB_DEVICE_INIT_TYPE = (1<<9), + IB_DEVICE_PORT_ACTIVE_EVENT = (1<<10), + IB_DEVICE_SYS_IMAGE_GUID = (1<<11), + IB_DEVICE_RC_RNR_NAK_GEN = (1<<12), + IB_DEVICE_SRQ_RESIZE = (1<<13), + IB_DEVICE_N_NOTIFY_CQ = (1<<14), +}; + +struct ib_device_attr { + u64 fw_ver; + __be64 sys_image_guid; + u64 max_mr_size; + u64 page_size_cap; + u32 vendor_id; + u32 vendor_part_id; + u32 hw_ver; + int max_qp; + int max_qp_wr; + int device_cap_flags; + int max_sge; + int max_sge_rd; + int max_cq; + int max_cqe; + int max_mr; + int max_pd; + int max_qp_rd_atom; + int max_ee_rd_atom; + int max_res_rd_atom; + int max_qp_init_rd_atom; + int max_ee_init_rd_atom; + enum ib_atomic_cap atomic_cap; + int max_ee; + int max_rdd; + int max_mw; + int max_raw_ipv6_qp; + int max_raw_ethy_qp; + int max_mcast_grp; + int max_mcast_qp_attach; + int max_total_mcast_qp_attach; + int max_ah; + int max_fmr; + int max_map_per_fmr; + int max_srq; + int max_srq_wr; + int max_srq_sge; + u16 max_pkeys; + u8 local_ca_ack_delay; +}; + +static inline int ib_mtu_enum_to_int(int mtu) +{ + switch (mtu) { + case IB_MTU_LEN_256: return 256; + case IB_MTU_LEN_512: return 512; + case IB_MTU_LEN_1024: return 1024; + case IB_MTU_LEN_2048: return 2048; + case IB_MTU_LEN_4096: return 4096; + default: return -1; + } +} + +enum ib_port_state { + IB_PORT_NOP = 0, + IB_PORT_DOWN = 1, + IB_PORT_INIT = 2, + IB_PORT_ARMED = 3, + IB_PORT_ACTIVE = 4, + IB_PORT_ACTIVE_DEFER = 5 +}; + +enum ib_port_cap_flags { + IB_PORT_SM = 1 << 1, + IB_PORT_NOTICE_SUP = 1 << 2, + IB_PORT_TRAP_SUP = 1 << 3, + IB_PORT_OPT_IPD_SUP = 1 << 4, + IB_PORT_AUTO_MIGR_SUP = 1 << 5, + IB_PORT_SL_MAP_SUP = 1 << 6, + IB_PORT_MKEY_NVRAM = 1 << 7, + IB_PORT_PKEY_NVRAM = 1 << 8, + IB_PORT_LED_INFO_SUP = 1 << 9, + IB_PORT_SM_DISABLED = 1 << 10, + IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11, + IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12, + IB_PORT_CM_SUP = 1 << 16, + IB_PORT_SNMP_TUNNEL_SUP = 1 << 17, + IB_PORT_REINIT_SUP = 1 << 18, + IB_PORT_DEVICE_MGMT_SUP = 1 << 19, + IB_PORT_VENDOR_CLASS_SUP = 1 << 20, + IB_PORT_DR_NOTICE_SUP = 1 << 21, + IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22, + IB_PORT_BOOT_MGMT_SUP = 1 << 23, + IB_PORT_LINK_LATENCY_SUP = 1 << 24, + IB_PORT_CLIENT_REG_SUP = 1 << 25 +}; + +enum ib_port_width { + IB_WIDTH_1X = 1, + IB_WIDTH_4X = 2, + IB_WIDTH_8X = 4, + IB_WIDTH_12X = 8 +}; + +static inline int ib_width_enum_to_int(enum ib_port_width width) +{ + switch (width) { + case IB_WIDTH_1X: return 1; + case IB_WIDTH_4X: return 4; + case IB_WIDTH_8X: return 8; + case IB_WIDTH_12X: return 12; + default: return -1; + } +} + +struct ib_port_attr { + enum ib_port_state state; + enum ib_mtu max_mtu; + enum ib_mtu active_mtu; + int gid_tbl_len; + u32 port_cap_flags; + u32 max_msg_sz; + u32 bad_pkey_cntr; + u32 qkey_viol_cntr; + u16 pkey_tbl_len; + u16 lid; + u16 sm_lid; + u8 lmc; + u8 max_vl_num; + u8 sm_sl; + u8 subnet_timeout; + u8 init_type_reply; + u8 active_width; + u8 active_speed; + u8 phys_state; +}; + +enum ib_device_modify_flags { + IB_DEVICE_MODIFY_SYS_IMAGE_GUID = 1 +}; + +struct ib_device_modify { + u64 sys_image_guid; +}; + +enum ib_port_modify_flags { + IB_PORT_SHUTDOWN = 1, + IB_PORT_INIT_TYPE = (1<<2), + IB_PORT_RESET_QKEY_CNTR = (1<<3) +}; + +struct ib_port_modify { + u32 set_port_cap_mask; + u32 clr_port_cap_mask; + u8 init_type; +}; + +enum ib_event_type { + IB_EVENT_CQ_ERR = IB_AE_CQ_ERROR, + IB_EVENT_QP_FATAL = IB_AE_QP_FATAL, + IB_EVENT_QP_REQ_ERR = IB_AE_WQ_REQ_ERROR, + IB_EVENT_QP_ACCESS_ERR = IB_AE_WQ_ACCESS_ERROR, + IB_EVENT_COMM_EST = IB_AE_QP_COMM, + IB_EVENT_SQ_DRAINED = IB_AE_SQ_DRAINED, + IB_EVENT_PATH_MIG = IB_AE_QP_APM, + IB_EVENT_PATH_MIG_ERR = IB_AE_QP_APM_ERROR, + IB_EVENT_DEVICE_FATAL = IB_AE_LOCAL_FATAL, + IB_EVENT_PORT_ACTIVE = IB_AE_PORT_ACTIVE, + IB_EVENT_PORT_ERR = IB_AE_PORT_DOWN, + IB_EVENT_SRQ_LIMIT_REACHED = IB_AE_SRQ_LIMIT_REACHED, + IB_EVENT_SRQ_CATAS_ERROR = IB_AE_SRQ_CATAS_ERROR, + IB_EVENT_SRQ_QP_LAST_WQE_REACHED = IB_AE_SRQ_QP_LAST_WQE_REACHED, + IB_EVENT_LID_CHANGE = IB_AE_UNKNOWN + 1, + IB_EVENT_PKEY_CHANGE, + IB_EVENT_SM_CHANGE +}; + +struct ib_event { + struct ib_device *device; + union { + struct ib_cq *cq; + struct ib_qp *qp; + struct ib_srq *srq; + u8 port_num; + } element; + enum ib_event_type event; + uint64_t vendor_specific; +}; + +struct ib_event_handler { + struct ib_device *device; + void (*handler)(struct ib_event_handler *, struct ib_event *); + struct list_head list; +}; + +#define INIT_IB_EVENT_HANDLER(_ptr, _device, _handler) \ + (_ptr)->device = _device; \ + (_ptr)->handler = _handler; \ + INIT_LIST_HEAD(&(_ptr)->list) + +struct ib_global_route { + union ib_gid dgid; + u32 flow_label; + u8 sgid_index; + u8 hop_limit; + u8 traffic_class; +}; + +struct ib_grh { + __be32 version_tclass_flow; + __be16 paylen; + u8 next_hdr; + u8 hop_limit; + union ib_gid sgid; + union ib_gid dgid; +}; + +enum { + IB_MULTICAST_QPN = 0xffffff +}; + +enum ib_ah_flags { + IB_AH_GRH = 1 +}; + +struct ib_ah_attr { + struct ib_global_route grh; + u16 dlid; + u8 sl; + u8 src_path_bits; + u8 static_rate; + u8 ah_flags; + u8 port_num; +}; + +#ifdef WIN_TO_BE_REMOVE +//define in ib_types.h +enum ib_wc_status { + IB_WC_SUCCESS, + IB_WC_LOC_LEN_ERR, + IB_WC_LOC_QP_OP_ERR, + IB_WC_LOC_EEC_OP_ERR, + IB_WC_LOC_PROT_ERR, + IB_WC_WR_FLUSH_ERR, + IB_WC_MW_BIND_ERR, + IB_WC_BAD_RESP_ERR, + IB_WC_LOC_ACCESS_ERR, + IB_WC_REM_INV_REQ_ERR, + IB_WC_REM_ACCESS_ERR, + IB_WC_REM_OP_ERR, + IB_WC_RETRY_EXC_ERR, + IB_WC_RNR_RETRY_EXC_ERR, + IB_WC_LOC_RDD_VIOL_ERR, + IB_WC_REM_INV_RD_REQ_ERR, + IB_WC_REM_ABORT_ERR, + IB_WC_INV_EECN_ERR, + IB_WC_INV_EEC_STATE_ERR, + IB_WC_FATAL_ERR, + IB_WC_RESP_TIMEOUT_ERR, + IB_WC_GENERAL_ERR +}; +#endif + +enum ib_cq_notify { + IB_CQ_SOLICITED, + IB_CQ_NEXT_COMP +}; + +struct ib_srq_init_attr { + void (*event_handler)(struct ib_event *, void *); + void *srq_context; + ib_srq_attr_t attr; +}; + +struct ib_qp_cap { + u32 max_send_wr; + u32 max_recv_wr; + u32 max_send_sge; + u32 max_recv_sge; + u32 max_inline_data; +}; + +enum ib_sig_type { + IB_SIGNAL_ALL_WR, + IB_SIGNAL_REQ_WR +}; + +struct ib_qp_init_attr { + void (*event_handler)(struct ib_event *, void *); + void *qp_context; + struct ib_cq *send_cq; + struct ib_cq *recv_cq; + struct ib_srq *srq; + struct ib_qp_cap cap; + enum ib_sig_type sq_sig_type; + enum ib_qp_type_t qp_type; + u8 port_num; /* special QP types only */ +}; + +enum ib_rnr_timeout { + IB_RNR_TIMER_655_36 = 0, + IB_RNR_TIMER_000_01 = 1, + IB_RNR_TIMER_000_02 = 2, + IB_RNR_TIMER_000_03 = 3, + IB_RNR_TIMER_000_04 = 4, + IB_RNR_TIMER_000_06 = 5, + IB_RNR_TIMER_000_08 = 6, + IB_RNR_TIMER_000_12 = 7, + IB_RNR_TIMER_000_16 = 8, + IB_RNR_TIMER_000_24 = 9, + IB_RNR_TIMER_000_32 = 10, + IB_RNR_TIMER_000_48 = 11, + IB_RNR_TIMER_000_64 = 12, + IB_RNR_TIMER_000_96 = 13, + IB_RNR_TIMER_001_28 = 14, + IB_RNR_TIMER_001_92 = 15, + IB_RNR_TIMER_002_56 = 16, + IB_RNR_TIMER_003_84 = 17, + IB_RNR_TIMER_005_12 = 18, + IB_RNR_TIMER_007_68 = 19, + IB_RNR_TIMER_010_24 = 20, + IB_RNR_TIMER_015_36 = 21, + IB_RNR_TIMER_020_48 = 22, + IB_RNR_TIMER_030_72 = 23, + IB_RNR_TIMER_040_96 = 24, + IB_RNR_TIMER_061_44 = 25, + IB_RNR_TIMER_081_92 = 26, + IB_RNR_TIMER_122_88 = 27, + IB_RNR_TIMER_163_84 = 28, + IB_RNR_TIMER_245_76 = 29, + IB_RNR_TIMER_327_68 = 30, + IB_RNR_TIMER_491_52 = 31 +}; + +enum ib_qp_attr_mask { + IB_QP_STATE = 1, + IB_QP_CUR_STATE = (1<<1), + IB_QP_EN_SQD_ASYNC_NOTIFY = (1<<2), + IB_QP_ACCESS_FLAGS = (1<<3), + IB_QP_PKEY_INDEX = (1<<4), + IB_QP_PORT = (1<<5), + IB_QP_QKEY = (1<<6), + IB_QP_AV = (1<<7), + IB_QP_PATH_MTU = (1<<8), + IB_QP_TIMEOUT = (1<<9), + IB_QP_RETRY_CNT = (1<<10), + IB_QP_RNR_RETRY = (1<<11), + IB_QP_RQ_PSN = (1<<12), + IB_QP_MAX_QP_RD_ATOMIC = (1<<13), + IB_QP_ALT_PATH = (1<<14), + IB_QP_MIN_RNR_TIMER = (1<<15), + IB_QP_SQ_PSN = (1<<16), + IB_QP_MAX_DEST_RD_ATOMIC = (1<<17), + IB_QP_PATH_MIG_STATE = (1<<18), + IB_QP_CAP = (1<<19), + IB_QP_DEST_QPN = (1<<20) +}; + +//TODO: these literals are also defined in ib_types.h and have there ANOTHER VALUES !!! +enum ib_qp_state { + IBQPS_RESET, + IBQPS_INIT, + IBQPS_RTR, + IBQPS_RTS, + IBQPS_SQD, + IBQPS_SQE, + IBQPS_ERR +}; + + +struct ib_qp_attr { + enum ib_qp_state qp_state; + enum ib_qp_state cur_qp_state; + enum ib_mtu path_mtu; + ib_apm_state_t path_mig_state; + u32 qkey; + u32 rq_psn; + u32 sq_psn; + u32 dest_qp_num; + int qp_access_flags; + struct ib_qp_cap cap; + struct ib_ah_attr ah_attr; + struct ib_ah_attr alt_ah_attr; + u16 pkey_index; + u16 alt_pkey_index; + u8 en_sqd_async_notify; + u8 sq_draining; + u8 max_rd_atomic; + u8 max_dest_rd_atomic; + u8 min_rnr_timer; + u8 port_num; + u8 timeout; + u8 retry_cnt; + u8 rnr_retry; + u8 alt_port_num; + u8 alt_timeout; +}; + +struct ib_sge { + u64 addr; + u32 length; + u32 lkey; +}; + + +typedef enum MTHCA_QP_ACCESS_FLAGS { + MTHCA_ACCESS_LOCAL_WRITE = 1, + MTHCA_ACCESS_REMOTE_WRITE = (1<<1), + MTHCA_ACCESS_REMOTE_READ = (1<<2), + MTHCA_ACCESS_REMOTE_ATOMIC = (1<<3), + MTHCA_ACCESS_MW_BIND = (1<<4) +} mthca_qp_access_t; + +struct ib_phys_buf { + u64 addr; + u64 size; +}; + +struct ib_mr_attr { + struct ib_pd *pd; + u64 device_virt_addr; + u64 size; + mthca_qp_access_t mr_access_flags; + u32 lkey; + u32 rkey; +}; + +enum ib_mr_rereg_flags { + IB_MR_REREG_TRANS = 1, + IB_MR_REREG_PD = (1<<1), + IB_MR_REREG_ACCESS = (1<<2) +}; + +struct ib_mw_bind { + struct ib_mr *mr; + u64 wr_id; + u64 addr; + u32 length; + int send_flags; + int mw_access_flags; +}; + +struct ib_fmr_attr { + int max_pages; + int max_maps; + u8 page_shift; +}; + +struct ib_ucontext { + struct ib_device *device; + PVOID user_uar; + struct ib_pd *pd; + atomic_t usecnt; /* count all resources */ + ULONG is_removing; + cl_list_item_t list_item; // chain of user contexts + // for tools support + KMUTEX mutex; + PMDL p_mdl; + PVOID va; + int fw_if_open; +}; + +struct ib_uobject { + u64 user_handle; /* handle given to us by userspace */ + struct ib_ucontext *context; /* associated user context */ + struct list_head list; /* link to context's list */ + u32 id; /* index into kernel idr */ +}; + +struct ib_umem { + u64 user_base; + u64 virt_base; + u64 length; + int offset; + int page_size; + int writable; + struct list_head chunk_list; +}; + +#pragma warning( disable : 4200 ) +struct ib_umem_chunk { + struct list_head list; + int nents; + int nmap; + struct scatterlist page_list[0]; +}; +#pragma warning( default : 4200 ) + +#define IB_UMEM_MAX_PAGE_CHUNK \ + ((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) / \ + ((char *) &((struct ib_umem_chunk *) 0)->page_list[1] - \ + (char *) &((struct ib_umem_chunk *) 0)->page_list[0])) + +struct ib_pd { + struct list_head list; /* for chaining AV MRs (for user mode only) */ + struct ib_device *device; + struct ib_ucontext *ucontext; + atomic_t usecnt; /* count all resources */ + KMUTEX mutex; /* for chaining AV MRs (for user mode only) */ +}; + +struct ib_ah { + struct ib_device *device; + struct ib_pd *pd; + struct ib_ucontext *ucontext; +}; + +typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context); + +struct ib_cq { + struct ib_device *device; + struct ib_ucontext *ucontext; + struct ib_mr *ib_mr; + ib_comp_handler comp_handler; + void (*event_handler)(struct ib_event *, void *); + void * cq_context; + int cqe; + atomic_t usecnt; /* count number of work queues */ +}; + +struct ib_srq { + struct ib_device *device; + struct ib_pd *pd; + struct ib_ucontext *ucontext; + struct ib_mr *ib_mr; + void (*event_handler)(struct ib_event *, void *); + void *srq_context; + atomic_t usecnt; /* count number of work queues */ +}; + +struct ib_qp { + struct ib_device *device; + struct ib_pd *pd; + struct ib_cq *send_cq; + struct ib_cq *recv_cq; + struct ib_srq *srq; + struct ib_ucontext *ucontext; + struct ib_mr *ib_mr; + void (*event_handler)(struct ib_event *, void *); + void *qp_context; + u32 qp_num; + enum ib_qp_type_t qp_type; +}; + +struct ib_mr { + struct list_head list; /* for chaining AV MRs (for user mode only) */ + struct ib_device *device; + struct ib_pd *pd; + u32 lkey; + u32 rkey; + atomic_t usecnt; /* count number of MWs */ +}; + +struct ib_mw { + struct ib_device *device; + struct ib_pd *pd; + u32 rkey; +}; + +struct ib_fmr { + struct ib_device *device; + struct ib_pd *pd; + struct list_head list; + u32 lkey; + u32 rkey; +}; + +struct ib_mad; +struct ib_grh; + +enum ib_process_mad_flags { + IB_MAD_IGNORE_MKEY = 1, + IB_MAD_IGNORE_BKEY = 2, + IB_MAD_IGNORE_ALL = IB_MAD_IGNORE_MKEY | IB_MAD_IGNORE_BKEY +}; + +enum ib_mad_result { + IB_MAD_RESULT_FAILURE = 0, /* (!SUCCESS is the important flag) */ + IB_MAD_RESULT_SUCCESS = 1 << 0, /* MAD was successfully processed */ + IB_MAD_RESULT_REPLY = 1 << 1, /* Reply packet needs to be sent */ + IB_MAD_RESULT_CONSUMED = 1 << 2 /* Packet consumed: stop processing */ +}; + +#define IB_DEVICE_NAME_MAX 64 + +struct ib_cache { + rwlock_t lock; + struct ib_event_handler event_handler; + struct ib_pkey_cache **pkey_cache; + struct ib_gid_cache **gid_cache; +}; + +struct mthca_dev; + +struct ib_device { + struct mthca_dev *mdev; + + char name[IB_DEVICE_NAME_MAX]; + + struct list_head event_handler_list; + spinlock_t event_handler_lock; + + struct list_head core_list; + struct list_head client_data_list; + spinlock_t client_data_lock; + + struct ib_cache cache; + + u32 flags; + + int (*query_device)(struct ib_device *device, + struct ib_device_attr *device_attr); + int (*query_port)(struct ib_device *device, + u8 port_num, + struct ib_port_attr *port_attr); + int (*query_gid_chunk)(struct ib_device *device, + u8 port_num, int index, + union ib_gid gid[8]); + int (*query_pkey_chunk)(struct ib_device *device, + u8 port_num, u16 index, u16 pkey[32]); + int (*modify_device)(struct ib_device *device, + int device_modify_mask, + struct ib_device_modify *device_modify); + int (*modify_port)(struct ib_device *device, + u8 port_num, int port_modify_mask, + struct ib_port_modify *port_modify); + struct ib_ucontext * (*alloc_ucontext)(struct ib_device *device, + ci_umv_buf_t* const p_umv_buf); + int (*dealloc_ucontext)(struct ib_ucontext *context); + struct ib_pd * (*alloc_pd)(struct ib_device *device, + struct ib_ucontext *context, + ci_umv_buf_t* const p_umv_buf); + int (*dealloc_pd)(struct ib_pd *pd); + struct ib_ah * (*create_ah)(struct ib_pd *pd, + struct ib_ah_attr *ah_attr); + int (*modify_ah)(struct ib_ah *ah, + struct ib_ah_attr *ah_attr); + int (*query_ah)(struct ib_ah *ah, + struct ib_ah_attr *ah_attr); + int (*destroy_ah)(struct ib_ah *ah); + struct ib_srq * (*create_srq)(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + ci_umv_buf_t* const p_umv_buf); + int (*modify_srq)(struct ib_srq *srq, + ib_srq_attr_t *srq_attr, + ib_srq_attr_mask_t srq_attr_mask); + int (*query_srq)(struct ib_srq *srq, + ib_srq_attr_t *srq_attr); + int (*destroy_srq)(struct ib_srq *srq); + int (*post_srq_recv)(struct ib_srq *srq, + struct _ib_recv_wr *recv_wr, + struct _ib_recv_wr **bad_recv_wr); + struct ib_qp * (*create_qp)(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + ci_umv_buf_t* const p_umv_buf); + int (*modify_qp)(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask); + int (*query_qp)(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr); + int (*destroy_qp)(struct ib_qp *qp); + int (*post_send)(struct ib_qp *qp, + struct _ib_send_wr *send_wr, + struct _ib_send_wr **bad_send_wr); + int (*post_recv)(struct ib_qp *qp, + struct _ib_recv_wr *recv_wr, + struct _ib_recv_wr **bad_recv_wr); + struct ib_cq * (*create_cq)(struct ib_device *device, int cqe, + struct ib_ucontext *context, + ci_umv_buf_t* const p_umv_buf); + int (*destroy_cq)(struct ib_cq *cq); + int (*resize_cq)(struct ib_cq *cq, int *cqe); + int (*poll_cq)(struct ib_cq *cq, int num_entries, + struct _ib_wc *wc); + int (*peek_cq)(struct ib_cq *cq, int wc_cnt); + int (*req_notify_cq)(struct ib_cq *cq, + enum ib_cq_notify cq_notify); + int (*req_ncomp_notif)(struct ib_cq *cq, + int wc_cnt); + struct ib_mr * (*get_dma_mr)(struct ib_pd *pd, + mthca_qp_access_t mr_access_flags); + struct ib_mr * (*reg_phys_mr)(struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + mthca_qp_access_t mr_access_flags, + u64 *iova_start); + struct ib_mr * (*reg_virt_mr)(struct ib_pd *pd, + void* __ptr64 vaddr, uint64_t length, uint64_t hca_va, + mthca_qp_access_t acc, boolean_t um_call); + int (*query_mr)(struct ib_mr *mr, + struct ib_mr_attr *mr_attr); + int (*dereg_mr)(struct ib_mr *mr); + int (*rereg_phys_mr)(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + mthca_qp_access_t mr_access_flags, + u64 *iova_start); + struct ib_mw * (*alloc_mw)(struct ib_pd *pd); + int (*bind_mw)(struct ib_qp *qp, + struct ib_mw *mw, + struct ib_mw_bind *mw_bind); + int (*dealloc_mw)(struct ib_mw *mw); + struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd, + mthca_qp_access_t mr_access_flags, + struct ib_fmr_attr *fmr_attr); + int (*map_phys_fmr)(struct ib_fmr *fmr, + u64 *page_list, int list_len, + u64 iova); + int (*unmap_fmr)(struct list_head *fmr_list); + int (*dealloc_fmr)(struct ib_fmr *fmr); + int (*attach_mcast)(struct ib_qp *qp, + union ib_gid *gid, + u16 lid); + int (*detach_mcast)(struct ib_qp *qp, + union ib_gid *gid, + u16 lid); + int (*process_mad)(struct ib_device *device, + int process_mad_flags, + u8 port_num, + struct _ib_wc *in_wc, + struct _ib_grh *in_grh, + struct ib_mad *in_mad, + struct ib_mad *out_mad); + + struct list_head port_list; + + u64 uverbs_cmd_mask; + __be64 node_guid; + u8 node_type; + u8 phys_port_cnt; +}; + +struct ib_client { + char *name; + void (*add) (struct ib_device *); + void (*remove)(struct ib_device *); + + struct list_head list; +}; + +struct ib_device *ib_alloc_device(size_t size); +void ib_dealloc_device(struct ib_device *device); + +int ib_register_device (struct ib_device *device); +void ib_unregister_device(struct ib_device *device); + +int ib_register_client (struct ib_client *client); +void ib_unregister_client(struct ib_client *client); + +void *ib_get_client_data(struct ib_device *device, struct ib_client *client); +void ib_set_client_data(struct ib_device *device, struct ib_client *client, + void *data); + +int ib_core_init(void); + +void ib_core_cleanup(void); + +int ib_register_event_handler (struct ib_event_handler *event_handler); +int ib_unregister_event_handler(struct ib_event_handler *event_handler); +void ib_dispatch_event(struct ib_event *event); + +int ib_query_device(struct ib_device *device, + struct ib_device_attr *device_attr); + +int ib_query_port(struct ib_device *device, + u8 port_num, struct ib_port_attr *port_attr); + +int ib_query_gid_chunk(struct ib_device *device, + u8 port_num, int index, union ib_gid gid[8]); + +int ib_query_pkey_chunk(struct ib_device *device, + u8 port_num, u16 index, u16 pkey[32]); + +int ib_modify_device(struct ib_device *device, + int device_modify_mask, + struct ib_device_modify *device_modify); + +int ib_modify_port(struct ib_device *device, + u8 port_num, int port_modify_mask, + struct ib_port_modify *port_modify); + +/** + * ibv_alloc_pd - Allocates an unused protection domain. + * @device: The device on which to allocate the protection domain. + * @context: user process context (for application calls only) + * @p_umv_buf: parameters structure (for application calls only) + * + * A protection domain object provides an association between QPs, shared + * receive queues, address handles, memory regions, and memory windows. + */ +struct ib_pd *ibv_alloc_pd(struct ib_device *device, + struct ib_ucontext *context, ci_umv_buf_t* const p_umv_buf); + +/** + * ibv_dealloc_pd - Deallocates a protection domain. + * @pd: The protection domain to deallocate. + */ +int ibv_dealloc_pd(struct ib_pd *pd); + +/** + * ibv_create_ah - Creates an address handle for the given address vector. + * @pd: The protection domain associated with the address handle. + * @ah_attr: The attributes of the address vector. + * @context: user process context (for application calls only) + * @p_umv_buf: parameters structure (for application calls only) + * + * The address handle is used to reference a local or global destination + * in all UD QP post sends. + */ +struct ib_ah *ibv_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_ucontext *context, ci_umv_buf_t* const p_umv_buf); + +/** + * ibv_create_ah_from_wc - Creates an address handle associated with the + * sender of the specified work completion. + * @pd: The protection domain associated with the address handle. + * @wc: Work completion information associated with a received message. + * @grh: References the received global route header. This parameter is + * ignored unless the work completion indicates that the GRH is valid. + * @port_num: The outbound port number to associate with the address. + * + * The address handle is used to reference a local or global destination + * in all UD QP post sends. + */ +struct ib_ah *ibv_create_ah_from_wc(struct ib_pd *pd, struct _ib_wc *wc, + struct ib_grh *grh, u8 port_num); + +/** + * ibv_modify_ah - Modifies the address vector associated with an address + * handle. + * @ah: The address handle to modify. + * @ah_attr: The new address vector attributes to associate with the + * address handle. + */ +int ibv_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); + +/** + * ibv_query_ah - Queries the address vector associated with an address + * handle. + * @ah: The address handle to query. + * @ah_attr: The address vector attributes associated with the address + * handle. + */ +int ibv_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); + +/** + * ibv_destroy_ah - Destroys an address handle. + * @ah: The address handle to destroy. + */ +int ibv_destroy_ah(struct ib_ah *ah); + +/** + * ibv_create_srq - Creates a SRQ associated with the specified protection + * domain. + * @pd: The protection domain associated with the SRQ. + * @srq_init_attr: A list of initial attributes required to create the + * SRQ. If SRQ creation succeeds, then the attributes are updated to + * the actual capabilities of the created SRQ. + * @context: user process context (for application calls only) + * @p_umv_buf: parameters structure (for application calls only) + * + * srq_attr->max_wr and srq_attr->max_sge are read the determine the + * requested size of the SRQ, and set to the actual values allocated + * on return. If ibv_create_srq() succeeds, then max_wr and max_sge + * will always be at least as large as the requested values. + */ +struct ib_srq *ibv_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_ucontext *context, ci_umv_buf_t* const p_umv_buf); + + +/** + * ibv_modify_srq - Modifies the attributes for the specified SRQ. + * @srq: The SRQ to modify. + * @srq_attr: On input, specifies the SRQ attributes to modify. On output, + * the current values of selected SRQ attributes are returned. + * @srq_attr_mask: A bit-mask used to specify which attributes of the SRQ + * are being modified. + * + * The mask may contain IB_SRQ_MAX_WR to resize the SRQ and/or + * IB_SRQ_LIMIT to set the SRQ's limit and request notification when + * the number of receives queued drops below the limit. + */ +int ibv_modify_srq(struct ib_srq *srq, + ib_srq_attr_t *srq_attr, + ib_srq_attr_mask_t srq_attr_mask); + +/** + * ibv_query_srq - Returns the attribute list and current values for the + * specified SRQ. + * @srq: The SRQ to query. + * @srq_attr: The attributes of the specified SRQ. + */ +int ibv_query_srq(struct ib_srq *srq, + ib_srq_attr_t *srq_attr); + +/** + * ibv_destroy_srq - Destroys the specified SRQ. + * @srq: The SRQ to destroy. + */ +int ibv_destroy_srq(struct ib_srq *srq); + +/** + * ibv_post_srq_recv - Posts a list of work requests to the specified SRQ. + * @srq: The SRQ to post the work request on. + * @recv_wr: A list of work requests to post on the receive queue. + * @bad_recv_wr: On an immediate failure, this parameter will reference + * the work request that failed to be posted on the QP. + */ +static inline int ibv_post_srq_recv(struct ib_srq *srq, + struct _ib_recv_wr *recv_wr, + struct _ib_recv_wr **bad_recv_wr) +{ + return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr); +} + +/** + * ibv_create_qp - Creates a QP associated with the specified protection + * domain. + * @pd: The protection domain associated with the QP. + * @qp_init_attr: A list of initial attributes required to create the + * QP. If QP creation succeeds, then the attributes are updated to + * the actual capabilities of the created QP. + * @context: user process context (for application calls only) + * @p_umv_buf: parameters structure (for application calls only) + */ +struct ib_qp *ibv_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + struct ib_ucontext *context, ci_umv_buf_t* const p_umv_buf); + +/** + * ibv_modify_qp - Modifies the attributes for the specified QP and then + * transitions the QP to the given state. + * @qp: The QP to modify. + * @qp_attr: On input, specifies the QP attributes to modify. On output, + * the current values of selected QP attributes are returned. + * @qp_attr_mask: A bit-mask used to specify which attributes of the QP + * are being modified. + */ +int ibv_modify_qp(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask); + +/** + * ibv_query_qp - Returns the attribute list and current values for the + * specified QP. + * @qp: The QP to query. + * @qp_attr: The attributes of the specified QP. + * @qp_attr_mask: A bit-mask used to select specific attributes to query. + * @qp_init_attr: Additional attributes of the selected QP. + * + * The qp_attr_mask may be used to limit the query to gathering only the + * selected attributes. + */ +int ibv_query_qp(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr); + +/** + * ibv_destroy_qp - Destroys the specified QP. + * @qp: The QP to destroy. + */ +int ibv_destroy_qp(struct ib_qp *qp); + +/** + * ib_post_send - Posts a list of work requests to the send queue of + * the specified QP. + * @qp: The QP to post the work request on. + * @send_wr: A list of work requests to post on the send queue. + * @bad_send_wr: On an immediate failure, this parameter will reference + * the work request that failed to be posted on the QP. + */ +static inline int ib_post_send(struct ib_qp *qp, + struct _ib_send_wr *send_wr, + struct _ib_send_wr **bad_send_wr) +{ + return qp->device->post_send(qp, send_wr, bad_send_wr); +} + +/** + * ib_post_recv - Posts a list of work requests to the receive queue of + * the specified QP. + * @qp: The QP to post the work request on. + * @recv_wr: A list of work requests to post on the receive queue. + * @bad_recv_wr: On an immediate failure, this parameter will reference + * the work request that failed to be posted on the QP. + */ +static inline int ib_post_recv(struct ib_qp *qp, + struct _ib_recv_wr *recv_wr, + struct _ib_recv_wr **bad_recv_wr) +{ + return qp->device->post_recv(qp, recv_wr, bad_recv_wr); +} + +/** + * ibv_create_cq - Creates a CQ on the specified device. + * @device: The device on which to create the CQ. + * @comp_handler: A user-specified callback that is invoked when a + * completion event occurs on the CQ. + * @event_handler: A user-specified callback that is invoked when an + * asynchronous event not associated with a completion occurs on the CQ. + * @cq_context: Context associated with the CQ returned to the user via + * the associated completion and event handlers. + * @cqe: The minimum size of the CQ. + * @context: user process context (for application calls only) + * @p_umv_buf: parameters structure (for application calls only) + * + * Users can examine the cq structure to determine the actual CQ size. + */ +struct ib_cq *ibv_create_cq(struct ib_device *device, + ib_comp_handler comp_handler, + void (*event_handler)(struct ib_event *, void *), + void *cq_context, int cqe, + struct ib_ucontext *context, ci_umv_buf_t* const p_umv_buf); + +/** + * ibv_resize_cq - Modifies the capacity of the CQ. + * @cq: The CQ to resize. + * @cqe: The minimum size of the CQ. + * + * Users can examine the cq structure to determine the actual CQ size. + */ +int ibv_resize_cq(struct ib_cq *cq, int cqe); + +/** + * ibv_destroy_cq - Destroys the specified CQ. + * @cq: The CQ to destroy. + */ +int ibv_destroy_cq(struct ib_cq *cq); + +/** + * ib_poll_cq - poll a CQ for completion(s) + * @cq:the CQ being polled + * @num_entries:maximum number of completions to return + * @wc:array of at least @num_entries &struct _ib_wc where completions + * will be returned + * + * Poll a CQ for (possibly multiple) completions. If the return value + * is < 0, an error occurred. If the return value is >= 0, it is the + * number of completions returned. If the return value is + * non-negative and < num_entries, then the CQ was emptied. + */ +static inline int ib_poll_cq(struct ib_cq *cq, int num_entries, + struct _ib_wc *wc) +{ + return cq->device->poll_cq(cq, num_entries, wc); +} + +/** + * ib_peek_cq - Returns the number of unreaped completions currently + * on the specified CQ. + * @cq: The CQ to peek. + * @wc_cnt: A minimum number of unreaped completions to check for. + * + * If the number of unreaped completions is greater than or equal to wc_cnt, + * this function returns wc_cnt, otherwise, it returns the actual number of + * unreaped completions. + */ +int ib_peek_cq(struct ib_cq *cq, int wc_cnt); + +/** + * ib_req_notify_cq - Request completion notification on a CQ. + * @cq: The CQ to generate an event for. + * @cq_notify: If set to %IB_CQ_SOLICITED, completion notification will + * occur on the next solicited event. If set to %IB_CQ_NEXT_COMP, + * notification will occur on the next completion. + */ +static inline int ib_req_notify_cq(struct ib_cq *cq, + enum ib_cq_notify cq_notify) +{ + return cq->device->req_notify_cq(cq, cq_notify); +} + +/** + * ib_req_ncomp_notif - Request completion notification when there are + * at least the specified number of unreaped completions on the CQ. + * @cq: The CQ to generate an event for. + * @wc_cnt: The number of unreaped completions that should be on the + * CQ before an event is generated. + */ +static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) +{ + return cq->device->req_ncomp_notif ? + cq->device->req_ncomp_notif(cq, wc_cnt) : + -ENOSYS; +} + +/** + * ibv_reg_mr - Prepares a virtually addressed memory region for use + * by an HCA. + * @pd: The protection domain associated assigned to the registered region. + * @vaddr: virtual address of the region + * @length: Specifies the size of the region. + * @hca_va: virtual address in HCA + * @mr_access_flags: Specifies the memory access rights. + * @um_call: call from user, when TRUE. + */ +struct ib_mr *ibv_reg_mr(struct ib_pd *pd, + mthca_qp_access_t mr_access_flags, + void* __ptr64 vaddr, + uint64_t length, + uint64_t hca_va, + boolean_t um_call + ); + +/** + * ibv_get_dma_mr - Returns a memory region for system memory that is + * usable for DMA. + * @pd: The protection domain associated with the memory region. + * @mr_access_flags: Specifies the memory access rights. + */ +struct ib_mr *ibv_get_dma_mr(struct ib_pd *pd, mthca_qp_access_t mr_access_flags); + +/** + * ibv_reg_phys_mr - Prepares a virtually addressed memory region for use + * by an HCA. + * @pd: The protection domain associated assigned to the registered region. + * @phys_buf_array: Specifies a list of physical buffers to use in the + * memory region. + * @num_phys_buf: Specifies the size of the phys_buf_array. + * @mr_access_flags: Specifies the memory access rights. + * @iova_start: The offset of the region's starting I/O virtual address. + */ +struct ib_mr *ibv_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + mthca_qp_access_t mr_access_flags, + u64 *iova_start); + +/** + * ibv_rereg_phys_mr - Modifies the attributes of an existing memory region. + * Conceptually, this call performs the functions deregister memory region + * followed by register physical memory region. Where possible, + * resources are reused instead of deallocated and reallocated. + * @mr: The memory region to modify. + * @mr_rereg_mask: A bit-mask used to indicate which of the following + * properties of the memory region are being modified. + * @pd: If %IB_MR_REREG_PD is set in mr_rereg_mask, this field specifies + * the new protection domain to associated with the memory region, + * otherwise, this parameter is ignored. + * @phys_buf_array: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this + * field specifies a list of physical buffers to use in the new + * translation, otherwise, this parameter is ignored. + * @num_phys_buf: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this + * field specifies the size of the phys_buf_array, otherwise, this + * parameter is ignored. + * @mr_access_flags: If %IB_MR_REREG_ACCESS is set in mr_rereg_mask, this + * field specifies the new memory access rights, otherwise, this + * parameter is ignored. + * @iova_start: The offset of the region's starting I/O virtual address. + */ +int ibv_rereg_phys_mr(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + mthca_qp_access_t mr_access_flags, + u64 *iova_start); + +/** + * ibv_query_mr - Retrieves information about a specific memory region. + * @mr: The memory region to retrieve information about. + * @mr_attr: The attributes of the specified memory region. + */ +int ibv_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); + +/** + * ibv_dereg_mr - Deregisters a memory region and removes it from the + * HCA translation table. + * @mr: The memory region to deregister. + */ +int ibv_dereg_mr(struct ib_mr *mr); + +/** + * ibv_alloc_mw - Allocates a memory window. + * @pd: The protection domain associated with the memory window. + */ +struct ib_mw *ibv_alloc_mw(struct ib_pd *pd); + +/** + * ib_bind_mw - Posts a work request to the send queue of the specified + * QP, which binds the memory window to the given address range and + * remote access attributes. + * @qp: QP to post the bind work request on. + * @mw: The memory window to bind. + * @mw_bind: Specifies information about the memory window, including + * its address range, remote access rights, and associated memory region. + */ +static inline int ib_bind_mw(struct ib_qp *qp, + struct ib_mw *mw, + struct ib_mw_bind *mw_bind) +{ + /* XXX reference counting in corresponding MR? */ + return mw->device->bind_mw ? + mw->device->bind_mw(qp, mw, mw_bind) : + -ENOSYS; +} + +/** + * ibv_dealloc_mw - Deallocates a memory window. + * @mw: The memory window to deallocate. + */ +int ibv_dealloc_mw(struct ib_mw *mw); + +/** + * ibv_alloc_fmr - Allocates a unmapped fast memory region. + * @pd: The protection domain associated with the unmapped region. + * @mr_access_flags: Specifies the memory access rights. + * @fmr_attr: Attributes of the unmapped region. + * + * A fast memory region must be mapped before it can be used as part of + * a work request. + */ +struct ib_fmr *ibv_alloc_fmr(struct ib_pd *pd, + mthca_qp_access_t mr_access_flags, + struct ib_fmr_attr *fmr_attr); + +/** + * ib_map_phys_fmr - Maps a list of physical pages to a fast memory region. + * @fmr: The fast memory region to associate with the pages. + * @page_list: An array of physical pages to map to the fast memory region. + * @list_len: The number of pages in page_list. + * @iova: The I/O virtual address to use with the mapped region. + */ +int ibv_map_phys_fmr(struct ib_fmr *fmr, + u64 *page_list, int list_len, + u64 iova); + +/** + * ibv_unmap_fmr - Removes the mapping from a list of fast memory regions. + * @fmr_list: A linked list of fast memory regions to unmap. + */ +int ibv_unmap_fmr(struct list_head *fmr_list); + +/** + * ibv_dealloc_fmr - Deallocates a fast memory region. + * @fmr: The fast memory region to deallocate. + */ +int ibv_dealloc_fmr(struct ib_fmr *fmr); + +/** + * ibv_attach_mcast - Attaches the specified QP to a multicast group. + * @qp: QP to attach to the multicast group. The QP must be type + * IB_QPT_UNRELIABLE_DGRM. + * @gid: Multicast group GID. + * @lid: Multicast group LID in host byte order. + * + * In order to send and receive multicast packets, subnet + * administration must have created the multicast group and configured + * the fabric appropriately. The port associated with the specified + * QP must also be a member of the multicast group. + */ +int ibv_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); + +/** + * ibv_detach_mcast - Detaches the specified QP from a multicast group. + * @qp: QP to detach from the multicast group. + * @gid: Multicast group GID. + * @lid: Multicast group LID in host byte order. + */ +int ibv_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); + +/** + * ibv_um_close - Releases application. + * @h_um_ca: application context + */ +void ibv_um_close(struct ib_ucontext * h_um_ca); + +#endif /* IB_VERBS_H */ diff --git a/branches/IBFD/hw/mthca/kernel/mt_atomic.h b/branches/IBFD/hw/mthca/kernel/mt_atomic.h new file mode 100644 index 00000000..4d1411e7 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mt_atomic.h @@ -0,0 +1,49 @@ +#ifndef MT_ATOMIC_H +#define MT_ATOMIC_H + +#include "complib/cl_atomic.h" + +typedef atomic32_t atomic_t; + +#define atomic_inc cl_atomic_inc +#define atomic_dec cl_atomic_dec + +static inline atomic_t atomic_read(atomic_t *pval) +{ + return *pval; +} + +static inline void atomic_set(atomic_t *pval, long val) +{ + *pval = (atomic_t)val; +} + +/** +* atomic_inc_and_test - decrement and test +* pval: pointer of type atomic_t +* +* Atomically increments pval by 1 and +* returns true if the result is 0, or false for all other +* cases. +*/ +static inline int +atomic_inc_and_test(atomic_t *pval) +{ + return cl_atomic_inc(pval) == 0; +} + +/** +* atomic_dec_and_test - decrement and test +* pval: pointer of type atomic_t +* +* Atomically decrements pval by 1 and +* returns true if the result is 0, or false for all other +* cases. +*/ +static inline int +atomic_dec_and_test(atomic_t *pval) +{ + return cl_atomic_dec(pval) == 0; +} + +#endif diff --git a/branches/IBFD/hw/mthca/kernel/mt_bitmap.h b/branches/IBFD/hw/mthca/kernel/mt_bitmap.h new file mode 100644 index 00000000..550528d3 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mt_bitmap.h @@ -0,0 +1,107 @@ +#ifndef MT_BITMAP_H +#define MT_BITMAP_H + +#include + +// DECLARE_BITMAP +#define DECLARE_BITMAP(name,bits) \ + unsigned long name[BITS_TO_LONGS(bits)] + +/** +* atomic_set_bit - Atomically set a bit in memory +* @nr: the bit to set +* @addr: the address to start counting from +* +* This function is atomic and may not be reordered. See __set_bit() +* if you do not require the atomic guarantees. +* +* Note: there are no guarantees that this function will not be reordered +* on non x86 architectures, so if you are writting portable code, +* make sure not to rely on its reordering guarantees. +* +* Note that @nr may be almost arbitrarily large; this function is not +* restricted to acting on a single-word quantity. +*/ +static inline unsigned long atomic_set_bit(int nr, volatile long * addr) +{ + return InterlockedOr( addr, (1 << nr) ); +} + +/** +* atomic_clear_bit - Clears a bit in memory +* @nr: Bit to clear +* @addr: Address to start counting from +* +* clear_bit() is atomic and may not be reordered. However, it does +* not contain a memory barrier, so if it is used for locking purposes, +* you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() +* in order to ensure changes are visible on other processors. +*/ +static inline unsigned long atomic_clear_bit(int nr, volatile long * addr) +{ + return InterlockedAnd( addr, ~(1 << nr) ); +} + +static inline int set_bit(int nr,long * addr) +{ + addr += nr >> 5; + return atomic_set_bit( nr & 0x1f, (volatile long *)addr ); +} + +static inline int clear_bit(int nr, long * addr) +{ + addr += nr >> 5; + return atomic_clear_bit( nr & 0x1f, (volatile long *)addr ); +} + +static inline int test_bit(int nr, const unsigned long * addr) +{ + int mask; + + addr += nr >> 5; + mask = 1 << (nr & 0x1f); + return ((mask & *addr) != 0); +} + + +/** +* bitmap_zero - clear the bitmap +* @dst: the bitmap address +* @nbits: the bitmap size in bits +* +*/ +static inline void bitmap_zero(unsigned long *dst, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = 0UL; + else { + int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); + RtlZeroMemory(dst, len); + } +} + +#define BITMAP_LAST_WORD_MASK(nbits) \ + ( ((nbits) % BITS_PER_LONG) ? (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL ) + +int __bitmap_full(const unsigned long *bitmap, int bits); + +static inline int bitmap_full(const unsigned long *src, int nbits) +{ + if (nbits <= BITS_PER_LONG) + return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits)); + else + return __bitmap_full(src, nbits); +} + +int __bitmap_empty(const unsigned long *bitmap, int bits); + +static inline int bitmap_empty(const unsigned long *src, int nbits) +{ + if (nbits <= BITS_PER_LONG) + return ! (*src & BITMAP_LAST_WORD_MASK(nbits)); + else + return __bitmap_empty(src, nbits); +} + + +#endif diff --git a/branches/IBFD/hw/mthca/kernel/mt_cache.c b/branches/IBFD/hw/mthca/kernel/mt_cache.c new file mode 100644 index 00000000..6dc9ce72 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mt_cache.c @@ -0,0 +1,415 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include +#include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mt_cache.tmh" +#endif +#include + +#include "ib_cache.h" + +#ifdef ALLOC_PRAGMA +#pragma alloc_text (PAGE, ib_cache_setup) +#pragma alloc_text (PAGE, ib_cache_cleanup) +#endif + + +#pragma warning( disable : 4200) +struct ib_pkey_cache { + int table_len; + u16 table[0]; +}; + +struct ib_gid_cache { + int table_len; + union ib_gid table[0]; +}; +#pragma warning( default : 4200) + +struct ib_update_work { + PIO_WORKITEM work_item; + struct ib_device *device; + u8 port_num; +}; + +int ib_get_cached_gid(struct ib_device *device, + u8 port_num, + int index, + union ib_gid *gid) +{ + struct ib_gid_cache *cache; + int ret = 0; + SPIN_LOCK_PREP(lh); + + // sanity checks + if (port_num < start_port(device) || port_num > end_port(device)) + return -EINVAL; + if (!device->cache.gid_cache) + return -EFAULT; + + read_lock_irqsave(&device->cache.lock, &lh); + + cache = device->cache.gid_cache[port_num - start_port(device)]; + + if (index < 0 || index >= cache->table_len) + ret = -EINVAL; + else + *gid = cache->table[index]; + + read_unlock_irqrestore(&lh); + + return ret; +} + +int ib_find_cached_gid(struct ib_device *device, + union ib_gid *gid, + u8 *port_num, + u16 *index) +{ + struct ib_gid_cache *cache; + int i; + u8 p; + int ret = -ENOENT; + SPIN_LOCK_PREP(lh); + + *port_num = (u8)-1; + if (index) + *index = (u16)-1; + + read_lock_irqsave(&device->cache.lock, &lh); + + for (p = 0; p <= end_port(device) - start_port(device); ++p) { + cache = device->cache.gid_cache[p]; + for (i = 0; i < cache->table_len; ++i) { + if (!memcmp(gid, &cache->table[i], sizeof *gid)) { + *port_num = p + start_port(device); + if (index) + *index = (u16)i; + ret = 0; + goto found; + } + } + } +found: + read_unlock_irqrestore(&lh); + + return ret; +} + +int ib_get_cached_pkey(struct ib_device *device, + u8 port_num, + int index, + u16 *pkey) +{ + struct ib_pkey_cache *cache; + int ret = 0; + SPIN_LOCK_PREP(lh); + + // sanity checks + if (port_num < start_port(device) || port_num > end_port(device)) + return -EINVAL; + if (!device->cache.gid_cache) + return -EFAULT; + + read_lock_irqsave(&device->cache.lock, &lh); + + cache = device->cache.pkey_cache[port_num - start_port(device)]; + + if (index < 0 || index >= cache->table_len) + ret = -EINVAL; + else + *pkey = cache->table[index]; + + read_unlock_irqrestore(&lh); + + return ret; +} + +int ib_find_cached_pkey(struct ib_device *device, + u8 port_num, + u16 pkey, + u16 *index) +{ + struct ib_pkey_cache *cache; + int i; + int ret = -ENOENT; + SPIN_LOCK_PREP(lh); + + if (port_num < start_port(device) || port_num > end_port(device)) + return -EINVAL; + + read_lock_irqsave(&device->cache.lock, &lh); + + cache = device->cache.pkey_cache[port_num - start_port(device)]; + + *index = (u16)-1; + + for (i = 0; i < cache->table_len; ++i) + if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) { + *index = (u16)i; + ret = 0; + break; + } + + read_unlock_irqrestore(&lh); + + return ret; +} + +static void ib_cache_update(struct ib_device *device, + u8 port) +{ + struct ib_port_attr *tprops = NULL; + struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache; + struct ib_gid_cache *gid_cache = NULL, *old_gid_cache; + int i; + int ret; + SPIN_LOCK_PREP(lh); + + tprops = kmalloc(sizeof *tprops, GFP_KERNEL); + if (!tprops) + return; + + ret = ib_query_port(device, port, tprops); + if (ret) { + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_LOW,("ib_query_port failed (%d) for %s, port %d\n", + ret, device->name, port)); + goto err; + } + + pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len * + sizeof *pkey_cache->table, GFP_KERNEL); + if (!pkey_cache) + goto err; + + pkey_cache->table_len = tprops->pkey_tbl_len; + + gid_cache = kmalloc(sizeof *gid_cache + tprops->gid_tbl_len * + sizeof *gid_cache->table, GFP_KERNEL); + if (!gid_cache) + goto err; + + gid_cache->table_len = tprops->gid_tbl_len; + + for (i = 0; i < pkey_cache->table_len; i+=32) { + u16 pkey_chunk[32]; + int size; + ret = ib_query_pkey_chunk(device, port, (u16)i, pkey_chunk); + if (ret) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW,("ib_query_pkey_chunk failed (%d) for %s (index %d)\n", + ret, device->name, i)); + goto err; + } + size = min(32, pkey_cache->table_len - i); + RtlCopyMemory(pkey_cache->table + i, pkey_chunk, size*sizeof(u16)); + } + + for (i = 0; i < gid_cache->table_len; i+=8) { + union ib_gid gid_chunk[8]; + int size; + ret = ib_query_gid_chunk(device, port, i, gid_chunk); + if (ret) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW,("ib_query_gid_chunk failed (%d) for %s (index %d)\n", + ret, device->name, i)); + goto err; + } + size = min(8, gid_cache->table_len - i); + RtlCopyMemory(gid_cache->table + i, gid_chunk, size*sizeof(union ib_gid)); + } + + write_lock_irq(&device->cache.lock, &lh); + + old_pkey_cache = device->cache.pkey_cache[port - start_port(device)]; + old_gid_cache = device->cache.gid_cache [port - start_port(device)]; + + device->cache.pkey_cache[port - start_port(device)] = pkey_cache; + device->cache.gid_cache [port - start_port(device)] = gid_cache; + + write_unlock_irq(&lh); + + kfree(old_pkey_cache); + kfree(old_gid_cache); + kfree(tprops); + return; + +err: + kfree(pkey_cache); + kfree(gid_cache); + kfree(tprops); +} + +static void ib_cache_task(void *work_ptr) +{ + struct ib_update_work *work = work_ptr; + + ib_cache_update(work->device, work->port_num); +} + +/* leo: wrapper for Linux work_item callback */ +VOID + ib_work_item ( + IN PDEVICE_OBJECT DeviceObject, + IN PVOID Context + ) +{ + struct ib_update_work *work = (struct ib_update_work *)Context; + UNREFERENCED_PARAMETER(DeviceObject); + ib_cache_task(Context); + IoFreeWorkItem(work->work_item); + kfree(Context); +} + +static void ib_cache_event(struct ib_event_handler *handler, + struct ib_event *event) +{ + struct ib_update_work *work; + static int temp_skip = 10; + + if (temp_skip-- <= 0) + return; + + if (event->event == IB_EVENT_PORT_ERR || + event->event == IB_EVENT_PORT_ACTIVE || + event->event == IB_EVENT_LID_CHANGE || + event->event == IB_EVENT_PKEY_CHANGE || + event->event == IB_EVENT_SM_CHANGE) { + work = kmalloc(sizeof *work, GFP_ATOMIC); + //TODO: what will happen on allocation failure ? + if (work) { + work->device = event->device; + work->port_num = event->element.port_num; + + { // schedule a work item to work + // get PDO + PDEVICE_OBJECT pdo = handler->device->mdev->ext->cl_ext.p_self_do; + + // allocate work item + work->work_item = IoAllocateWorkItem(pdo); + if (work->work_item == NULL) { + //TODO: at least - print error. Need to return code, but the function is void + } + else { // schedule the work + IoQueueWorkItem( + work->work_item, + ib_work_item, + DelayedWorkQueue, + work + ); + } + } + + } + } +} + +static void ib_cache_setup_one(struct ib_device *device) +{ + u8 p; + + rwlock_init(&device->cache.lock); + + device->cache.pkey_cache = + kmalloc(sizeof *device->cache.pkey_cache * + (end_port(device) - start_port(device) + 1), GFP_KERNEL); + device->cache.gid_cache = + kmalloc(sizeof *device->cache.gid_cache * + (end_port(device) - start_port(device) + 1), GFP_KERNEL); + + if (!device->cache.pkey_cache || !device->cache.gid_cache) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW,("Couldn't allocate cache " + "for %s\n", device->name)); + goto err; + } + + for (p = 0; p <= end_port(device) - start_port(device); ++p) { + device->cache.pkey_cache[p] = NULL; + device->cache.gid_cache [p] = NULL; + ib_cache_update(device, p + start_port(device)); + } + + INIT_IB_EVENT_HANDLER(&device->cache.event_handler, + device, ib_cache_event); + if (ib_register_event_handler(&device->cache.event_handler)) + goto err_cache; + + return; + +err_cache: + for (p = 0; p <= end_port(device) - start_port(device); ++p) { + kfree(device->cache.pkey_cache[p]); + kfree(device->cache.gid_cache[p]); + } + +err: + kfree(device->cache.pkey_cache); + kfree(device->cache.gid_cache); +} + +static void ib_cache_cleanup_one(struct ib_device *device) +{ + int p; + + ib_unregister_event_handler(&device->cache.event_handler); + //TODO: how to do that ? + // LINUX: flush_scheduled_work(); + + for (p = 0; p <= end_port(device) - start_port(device); ++p) { + kfree(device->cache.pkey_cache[p]); + kfree(device->cache.gid_cache[p]); + } + + kfree(device->cache.pkey_cache); + kfree(device->cache.gid_cache); +} + +static struct ib_client cache_client = { "cache", ib_cache_setup_one, ib_cache_cleanup_one }; + +int ib_cache_setup(void) +{ + return ib_register_client(&cache_client); +} + +void ib_cache_cleanup(void) +{ + ib_unregister_client(&cache_client); +} + diff --git a/branches/IBFD/hw/mthca/kernel/mt_device.c b/branches/IBFD/hw/mthca/kernel/mt_device.c new file mode 100644 index 00000000..93504583 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mt_device.c @@ -0,0 +1,567 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include "hca_driver.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mt_device.tmh" +#endif +#include "ib_verbs.h" +#include "ib_cache.h" + +struct ib_client_data { + struct list_head list; + struct ib_client *client; + void * data; +}; + +static LIST_HEAD(device_list); +static LIST_HEAD(client_list); + +/* + * device_mutex protects access to both device_list and client_list. + * There's no real point to using multiple locks or something fancier + * like an rwsem: we always access both lists, and we're always + * modifying one list or the other list. In any case this is not a + * hot path so there's no point in trying to optimize. + */ +KMUTEX device_mutex; + +static int ib_device_check_mandatory(struct ib_device *device) +{ +#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device, x), #x } + static const struct { + size_t offset; + char *name; + } mandatory_table[] = { + IB_MANDATORY_FUNC(query_device), + IB_MANDATORY_FUNC(query_port), + IB_MANDATORY_FUNC(query_pkey_chunk), + IB_MANDATORY_FUNC(query_gid_chunk), + IB_MANDATORY_FUNC(alloc_pd), + IB_MANDATORY_FUNC(dealloc_pd), + IB_MANDATORY_FUNC(create_ah), + IB_MANDATORY_FUNC(destroy_ah), + IB_MANDATORY_FUNC(create_qp), + IB_MANDATORY_FUNC(modify_qp), + IB_MANDATORY_FUNC(destroy_qp), + IB_MANDATORY_FUNC(post_send), + IB_MANDATORY_FUNC(post_recv), + IB_MANDATORY_FUNC(create_cq), + IB_MANDATORY_FUNC(destroy_cq), + IB_MANDATORY_FUNC(poll_cq), + IB_MANDATORY_FUNC(req_notify_cq), + IB_MANDATORY_FUNC(get_dma_mr), + IB_MANDATORY_FUNC(dereg_mr) + }; + int i; + + for (i = 0; i < sizeof mandatory_table / sizeof mandatory_table[0]; ++i) { + if (!*(void **) ((u8 *) device + mandatory_table[i].offset)) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW,("Device %s is missing mandatory function %s\n", + device->name, mandatory_table[i].name)); + return -EINVAL; + } + } + + return 0; +} + +static struct ib_device *__ib_device_get_by_name(const char *name) +{ + struct ib_device *device; + + list_for_each_entry(device, &device_list, core_list,struct ib_device) + if (!strncmp(name, device->name, IB_DEVICE_NAME_MAX)) + return device; + + return NULL; +} + +static int __extract_number(char *dest_str, const char *format, int *num) +{ + char *ptr; + UNREFERENCED_PARAMETER(format); + for (ptr = dest_str; *ptr; ptr++) { + if (*ptr >= '0' && *ptr <= '9') { + *num = atoi(ptr); + return 1; + } + } + return 0; +} +static int alloc_name(char *name) +{ + long *inuse; + char buf[IB_DEVICE_NAME_MAX]; + struct ib_device *device; + int i; + + inuse = (long *) get_zeroed_page(GFP_KERNEL); + if (!inuse) + return -ENOMEM; + + list_for_each_entry(device, &device_list, core_list,struct ib_device) { + if (!__extract_number(device->name, name, &i)) + continue; + if (i < 0 || i >= PAGE_SIZE * 8) + continue; + snprintf(buf, sizeof(buf)-1, name, i); + buf[sizeof(buf)-1] = '\0'; + if (!strncmp(buf, device->name, IB_DEVICE_NAME_MAX)) + set_bit(i, inuse); + } + + i = find_first_zero_bit((const unsigned long *)inuse, PAGE_SIZE * 8); + free_page(inuse); + snprintf(buf, sizeof(buf)-1, name, i); + buf[sizeof(buf)-1] = '\0'; + + if (__ib_device_get_by_name(buf)) + return -ENFILE; + + strlcpy(name, buf, IB_DEVICE_NAME_MAX); + return 0; +} + +static int add_client_context(struct ib_device *device, struct ib_client *client) +{ + struct ib_client_data *context; + SPIN_LOCK_PREP(lh); + + context = kmalloc(sizeof *context, GFP_KERNEL); + if (!context) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW,("Couldn't allocate client context for %s/%s\n", + device->name, client->name)); + return -ENOMEM; + } + + context->client = client; + context->data = NULL; + + spin_lock_irqsave(&device->client_data_lock, &lh); + list_add(&context->list, &device->client_data_list); + spin_unlock_irqrestore(&lh); + + return 0; +} + +/** + * ib_register_device - Register an IB device with IB core + * @device:Device to register + * + * Low-level drivers use ib_register_device() to register their + * devices with the IB core. All registered clients will receive a + * callback for each device that is added. @device must be allocated + * with ib_alloc_device(). + */ +int ib_register_device(struct ib_device *device) +{ + int ret = 0; + + down(&device_mutex); + + if (strchr(device->name, '%')) { + ret = alloc_name(device->name); + if (ret) + goto out; + } + + if (ib_device_check_mandatory(device)) { + ret = -EINVAL; + goto out; + } + + INIT_LIST_HEAD(&device->event_handler_list); + INIT_LIST_HEAD(&device->client_data_list); + spin_lock_init(&device->event_handler_lock); + spin_lock_init(&device->client_data_lock); + + list_add_tail(&device->core_list, &device_list); + + { + struct ib_client *client; + + list_for_each_entry(client, &client_list, list,struct ib_client) + if (client->add && !add_client_context(device, client)) + client->add(device); + } + + out: + up(&device_mutex); + return ret; +} + + +/** + * ib_unregister_device - Unregister an IB device + * @device:Device to unregister + * + * Unregister an IB device. All clients will receive a remove callback. + */ +void ib_unregister_device(struct ib_device *device) +{ + struct ib_client *client; + struct ib_client_data *context, *tmp; + SPIN_LOCK_PREP(lh); + + down(&device_mutex); + + list_for_each_entry_reverse(client, &client_list, list,struct ib_client) + if (client->remove) + client->remove(device); + + list_del(&device->core_list); + + up(&device_mutex); + + spin_lock_irqsave(&device->client_data_lock, &lh); + list_for_each_entry_safe(context, tmp, &device->client_data_list, list,struct ib_client_data,struct ib_client_data) + kfree(context); + spin_unlock_irqrestore(&lh); + +} + + +/** + * ib_register_client - Register an IB client + * @client:Client to register + * + * Upper level users of the IB drivers can use ib_register_client() to + * register callbacks for IB device addition and removal. When an IB + * device is added, each registered client's add method will be called + * (in the order the clients were registered), and when a device is + * removed, each client's remove method will be called (in the reverse + * order that clients were registered). In addition, when + * ib_register_client() is called, the client will receive an add + * callback for all devices already registered. + */ +int ib_register_client(struct ib_client *client) +{ + struct ib_device *device; + + down(&device_mutex); + + list_add_tail(&client->list, &client_list); + list_for_each_entry(device, &device_list, core_list,struct ib_device) + if (client->add && !add_client_context(device, client)) + client->add(device); + + up(&device_mutex); + + return 0; +} + + +/** + * ib_unregister_client - Unregister an IB client + * @client:Client to unregister + * + * Upper level users use ib_unregister_client() to remove their client + * registration. When ib_unregister_client() is called, the client + * will receive a remove callback for each IB device still registered. + */ +void ib_unregister_client(struct ib_client *client) +{ + struct ib_client_data *context, *tmp; + struct ib_device *device; + SPIN_LOCK_PREP(lh); + + down(&device_mutex); + + list_for_each_entry(device, &device_list, core_list,struct ib_device) { + if (client->remove) + client->remove(device); + + spin_lock_irqsave(&device->client_data_lock, &lh); + list_for_each_entry_safe(context, tmp, &device->client_data_list, list,struct ib_client_data,struct ib_client_data) + if (context->client == client) { + list_del(&context->list); + kfree(context); + } + spin_unlock_irqrestore(&lh); + } + list_del(&client->list); + + up(&device_mutex); +} + + +/** + * ib_get_client_data - Get IB client context + * @device:Device to get context for + * @client:Client to get context for + * + * ib_get_client_data() returns client context set with + * ib_set_client_data(). + */ +void *ib_get_client_data(struct ib_device *device, struct ib_client *client) +{ + struct ib_client_data *context; + void *ret = NULL; + SPIN_LOCK_PREP(lh); + + spin_lock_irqsave(&device->client_data_lock, &lh); + list_for_each_entry(context, &device->client_data_list, list,struct ib_client_data) + if (context->client == client) { + ret = context->data; + break; + } + spin_unlock_irqrestore(&lh); + + return ret; +} + + +/** + * ib_set_client_data - Get IB client context + * @device:Device to set context for + * @client:Client to set context for + * @data:Context to set + * + * ib_set_client_data() sets client context that can be retrieved with + * ib_get_client_data(). + */ +void ib_set_client_data(struct ib_device *device, struct ib_client *client, + void *data) +{ + struct ib_client_data *context; + SPIN_LOCK_PREP(lh); + + spin_lock_irqsave(&device->client_data_lock, &lh); + list_for_each_entry(context, &device->client_data_list, list,struct ib_client_data) + if (context->client == client) { + context->data = data; + goto out; + } + + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("No client context found for %s/%s\n", + device->name, client->name)); + +out: + spin_unlock_irqrestore(&lh); +} + + +/** + * ib_register_event_handler - Register an IB event handler + * @event_handler:Handler to register + * + * ib_register_event_handler() registers an event handler that will be + * called back when asynchronous IB events occur (as defined in + * chapter 11 of the InfiniBand Architecture Specification). This + * callback may occur in interrupt context. + */ +int ib_register_event_handler (struct ib_event_handler *event_handler) +{ + SPIN_LOCK_PREP(lh); + + spin_lock_irqsave(&event_handler->device->event_handler_lock, &lh); + list_add_tail(&event_handler->list, + &event_handler->device->event_handler_list); + spin_unlock_irqrestore(&lh); + + return 0; +} + + +/** + * ib_unregister_event_handler - Unregister an event handler + * @event_handler:Handler to unregister + * + * Unregister an event handler registered with + * ib_register_event_handler(). + */ +int ib_unregister_event_handler(struct ib_event_handler *event_handler) +{ + SPIN_LOCK_PREP(lh); + spin_lock_irqsave(&event_handler->device->event_handler_lock, &lh); + list_del(&event_handler->list); + spin_unlock_irqrestore(&lh); + + return 0; +} + + +/** + * ib_dispatch_event - Dispatch an asynchronous event + * @event:Event to dispatch + * + * Low-level drivers must call ib_dispatch_event() to dispatch the + * event to all registered event handlers when an asynchronous event + * occurs. + */ +void ib_dispatch_event(struct ib_event *event) +{ + struct ib_event_handler *handler; + SPIN_LOCK_PREP(lh); + + spin_lock_irqsave(&event->device->event_handler_lock, &lh); + + list_for_each_entry(handler, &event->device->event_handler_list, list,struct ib_event_handler) + handler->handler(handler, event); + + spin_unlock_irqrestore(&lh); +} + + +/** + * ib_query_device - Query IB device attributes + * @device:Device to query + * @device_attr:Device attributes + * + * ib_query_device() returns the attributes of a device through the + * @device_attr pointer. + */ +int ib_query_device(struct ib_device *device, + struct ib_device_attr *device_attr) +{ + return device->query_device(device, device_attr); +} + + +/** + * ib_query_port - Query IB port attributes + * @device:Device to query + * @port_num:Port number to query + * @port_attr:Port attributes + * + * ib_query_port() returns the attributes of a port through the + * @port_attr pointer. + */ +int ib_query_port(struct ib_device *device, + u8 port_num, + struct ib_port_attr *port_attr) +{ + if (port_num < start_port(device) || port_num > end_port(device)) + return -EINVAL; + return device->query_port(device, port_num, port_attr); +} + + +/** + * ib_query_gid_chunk - Get a chunk of GID table entries + * @device:Device to query + * @port_num:Port number to query + * @index:GID table index to query + * @gid:Returned GIDs chunk + * + * ib_query_gid_chunk() fetches the specified GID table enties chunk. + */ +int ib_query_gid_chunk(struct ib_device *device, + u8 port_num, int index, union ib_gid gid[8]) +{ + return device->query_gid_chunk(device, port_num, index, gid); +} + + +/** + * ib_query_pkey_chunk - Get a chunk of P_Key table entries + * @device:Device to query + * @port_num:Port number to query + * @index:P_Key table index to query + * @pkey:Returned P_Keys chunk + * + * ib_query_pkey_chunk() fetches the specified P_Key table entries chunk. + */ +int ib_query_pkey_chunk(struct ib_device *device, + u8 port_num, u16 index, u16 pkey[32]) +{ + return device->query_pkey_chunk(device, port_num, index, pkey); +} + + +/** + * ib_modify_device - Change IB device attributes + * @device:Device to modify + * @device_modify_mask:Mask of attributes to change + * @device_modify:New attribute values + * + * ib_modify_device() changes a device's attributes as specified by + * the @device_modify_mask and @device_modify structure. + */ +int ib_modify_device(struct ib_device *device, + int device_modify_mask, + struct ib_device_modify *device_modify) +{ + return device->modify_device(device, device_modify_mask, + device_modify); +} + + +/** + * ib_modify_port - Modifies the attributes for the specified port. + * @device: The device to modify. + * @port_num: The number of the port to modify. + * @port_modify_mask: Mask used to specify which attributes of the port + * to change. + * @port_modify: New attribute values for the port. + * + * ib_modify_port() changes a port's attributes as specified by the + * @port_modify_mask and @port_modify structure. + */ +int ib_modify_port(struct ib_device *device, + u8 port_num, int port_modify_mask, + struct ib_port_modify *port_modify) +{ + if (port_num < start_port(device) || port_num > end_port(device)) + return -EINVAL; + + return device->modify_port(device, port_num, port_modify_mask, + port_modify); +} + +int ib_core_init(void) +{ + int ret; + + /* leo: added because there is no static init of semaphore in Windows */ + KeInitializeMutex(&device_mutex,0); + + ret = ib_cache_setup(); + if (ret) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Couldn't set up InfiniBand P_Key/GID cache\n")); + } + + return ret; +} + +void ib_core_cleanup(void) +{ + ib_cache_cleanup(); +} + diff --git a/branches/IBFD/hw/mthca/kernel/mt_l2w.c b/branches/IBFD/hw/mthca/kernel/mt_l2w.c new file mode 100644 index 00000000..f1b2f027 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mt_l2w.c @@ -0,0 +1,132 @@ +#include +#include +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mt_l2w.tmh" +#endif + +pci_pool_t * +pci_pool_create (const char *name, struct mthca_dev *mdev, + size_t size, size_t align, size_t allocation) +{ + pci_pool_t *pool; + UNREFERENCED_PARAMETER(align); + UNREFERENCED_PARAMETER(allocation); + + MT_ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL); + + // allocation parameter is not handled yet + ASSERT(allocation == 0); + + // allocate object + pool = (pci_pool_t *)ExAllocatePoolWithTag( NonPagedPool, sizeof(pci_pool_t), MT_TAG_PCIPOOL ); + if (pool == NULL) + return NULL; + + //TODO: not absolutely correct: Linux's pci_pool_alloc provides contiguous physical memory, + // while default alloc function - ExAllocatePoolWithTag -doesn't. + // But for now it is used for elements of size <= PAGE_SIZE + // Anyway - a sanity check: + ASSERT(size <= PAGE_SIZE); + if (size > PAGE_SIZE) + return NULL; + + //TODO: not too effective: one can read its own alloc/free functions + ExInitializeNPagedLookasideList( &pool->pool_hdr, NULL, NULL, 0, size, MT_TAG_PCIPOOL, 0 ); + + // fill the object + pool->mdev = mdev; + pool->size = size; + strncpy( pool->name, name, sizeof pool->name ); + + return pool; +} + +// from lib/string.c +/** +* strlcpy - Copy a %NUL terminated string into a sized buffer +* @dest: Where to copy the string to +* @src: Where to copy the string from +* @size: size of destination buffer +* +* Compatible with *BSD: the result is always a valid +* NUL-terminated string that fits in the buffer (unless, +* of course, the buffer size is zero). It does not pad +* out the result like strncpy() does. +*/ +SIZE_T strlcpy(char *dest, const char *src, SIZE_T size) +{ + SIZE_T ret = strlen(src); + + if (size) { + SIZE_T len = (ret >= size) ? size-1 : ret; + memcpy(dest, src, len); + dest[len] = '\0'; + } + return ret; +} + + +int __bitmap_full(const unsigned long *bitmap, int bits) +{ + int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + if (~bitmap[k]) + return 0; + + if (bits % BITS_PER_LONG) + if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) + return 0; + + return 1; +} + +int __bitmap_empty(const unsigned long *bitmap, int bits) +{ + int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + if (bitmap[k]) + return 0; + + if (bits % BITS_PER_LONG) + if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) + return 0; + + return 1; +} + +int request_irq( + IN CM_PARTIAL_RESOURCE_DESCRIPTOR *int_info, /* interrupt resources */ + IN KSPIN_LOCK *isr_lock, /* spin lock for ISR */ + IN PKSERVICE_ROUTINE isr, /* ISR */ + IN void *isr_ctx, /* ISR context */ + OUT PKINTERRUPT *int_obj /* interrupt object */ + ) +{ + NTSTATUS status; + + status = IoConnectInterrupt( + int_obj, /* InterruptObject */ + isr, /* ISR */ + isr_ctx, /* ISR context */ + isr_lock, /* spinlock */ + int_info->u.Interrupt.Vector, /* interrupt vector */ + (KIRQL)int_info->u.Interrupt.Level, /* IRQL */ + (KIRQL)int_info->u.Interrupt.Level, /* Synchronize IRQL */ + (BOOLEAN)((int_info->Flags == CM_RESOURCE_INTERRUPT_LATCHED) ? + Latched : LevelSensitive), /* interrupt type: LATCHED or LEVEL */ + (BOOLEAN)(int_info->ShareDisposition == CmResourceShareShared), /* vector shared or not */ + g_processor_affinity ? g_processor_affinity : (KAFFINITY)int_info->u.Interrupt.Affinity, /* interrupt affinity */ + FALSE /* whether to save Float registers */ + ); + + if (!NT_SUCCESS(status)) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_INIT ,("IoConnectInterrupt failed status %d (did you change the processor_affinity ? )\n",status)); + return -EFAULT; /* failed to connect interrupt */ + } + else + return 0; +} + diff --git a/branches/IBFD/hw/mthca/kernel/mt_l2w.h b/branches/IBFD/hw/mthca/kernel/mt_l2w.h new file mode 100644 index 00000000..faf34055 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mt_l2w.h @@ -0,0 +1,92 @@ +#ifndef MT_L2W_H +#define MT_L2W_H + +// =========================================== +// INCLUDES +// =========================================== + +// OS +#include +#include +#include +#include + +// ours - the order is important +#include +#include +#include +#include +#include +#include +#include +#include +#include +//#include +#include +#include +#include + + +// =========================================== +// SUBSTITUTIONS +// =========================================== + +#define BUG_ON(exp) ASSERT(!(exp)) /* in Linux follows here panic() !*/ +#define WARN_ON(exp) ASSERT(!(exp)) /* in Linux follows here panic() !*/ +#define snprintf _snprintf + +// memory barriers +#define wmb KeMemoryBarrier +#define rmb KeMemoryBarrier +#define mb KeMemoryBarrier + +// =========================================== +// LITERALS +// =========================================== + + + + +// =========================================== +// TYPES +// =========================================== + +// rw_lock +typedef spinlock_t rwlock_t; + +// dummy function +typedef void (*MT_EMPTY_FUNC)(); + +// =========================================== +// MACROS +// =========================================== + +// ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +// ALIGN +#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1)) + +// there is a bug in Microsoft compiler, that when _byteswap_uint64() gets an expression +// it executes the expression but doesn't swap tte dwords +// So, there's a workaround +#ifdef BYTESWAP_UINT64_BUG_FIXED +#define CPU_2_BE64_PREP +#define CPU_2_BE64(x) cl_hton64(x) +#else +#define CPU_2_BE64_PREP unsigned __int64 __tmp__ +#define CPU_2_BE64(x) ( __tmp__ = x, cl_hton64(__tmp__) ) +#endif + + +SIZE_T strlcpy(char *dest, const char *src, SIZE_T size); +void MT_time_calibrate(); + +#define ERR_PTR(error) ((void*)(LONG_PTR)(error)) +#define PTR_ERR(ptr) ((long)(LONG_PTR)(void*)(ptr)) +//TODO: there are 2 assumptions here: +// - pointer can't be too big (around -1) +// - error can't be bigger than 1000 +#define IS_ERR(ptr) ((ULONG_PTR)ptr > (ULONG_PTR)-1000L) + +#endif diff --git a/branches/IBFD/hw/mthca/kernel/mt_list.h b/branches/IBFD/hw/mthca/kernel/mt_list.h new file mode 100644 index 00000000..9fa96d8b --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mt_list.h @@ -0,0 +1,168 @@ +#ifndef MT_LIST_H +#define MT_LIST_H + +// taken from list.h + +/* + * These are non-NULL pointers that will result in page faults + * under normal circumstances, used to verify that nobody uses + * non-initialized list entries. + */ +#define LIST_POISON1 ((void *) 0x00100100) +#define LIST_POISON2 ((void *) 0x00200200) + +/* +* Simple doubly linked list implementation. +* +* Some of the internal functions ("__xxx") are useful when +* manipulating whole lists rather than single entries, as +* sometimes we already know the next/prev entries and we can +* generate better code by using them directly rather than +* using the generic single-entry routines. +*/ + +struct list_head { + struct list_head *next, *prev; +}; + +#define LIST_HEAD_INIT(name) { &(name), &(name) } + +#define LIST_HEAD(name) \ + struct list_head name = LIST_HEAD_INIT(name) + +#define INIT_LIST_HEAD(ptr) \ + (ptr)->next = (ptr); (ptr)->prev = (ptr) + + +/* +* Insert a new entry between two known consecutive entries. +* +* This is only for internal list manipulation where we know +* the prev/next entries already! +*/ +static inline void __list_add(struct list_head *new, + struct list_head *prev, + struct list_head *next) +{ + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} + +/** +* list_add - add a new entry +* @new: new entry to be added +* @head: list head to add it after +* +* Insert a new entry after the specified head. +* This is good for implementing stacks. +*/ +static inline void list_add(struct list_head *new, struct list_head *head) +{ + __list_add(new, head, head->next); +} + +/** +* list_add_tail - add a new entry +* @new: new entry to be added +* @head: list head to add it before +* +* Insert a new entry before the specified head. +* This is useful for implementing queues. +*/ +static inline void list_add_tail(struct list_head *new, struct list_head *head) +{ + __list_add(new, head->prev, head); +} + + /* + * Delete a list entry by making the prev/next entries + * point to each other. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ + static inline void __list_del(struct list_head * prev, struct list_head * next) + { + next->prev = prev; + prev->next = next; + } + + /** + * list_del - deletes entry from list. + * @entry: the element to delete from the list. + * Note: list_empty on entry does not return true after this, the entry is + * in an undefined state. + */ + static inline void list_del(struct list_head *entry) + { + __list_del(entry->prev, entry->next); + entry->next = LIST_POISON1; + entry->prev = LIST_POISON2; + } + +/** +* list_empty - tests whether a list is empty +* @head: the list to test. +*/ +static inline int list_empty(const struct list_head *head) +{ + return head->next == head; +} + + /** + * list_entry - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + */ +#define list_entry(ptr, type, member) \ + container_of(ptr, type, member) + +//leo: macro changed out of unportable operator typeof +/** +* list_for_each_entry - iterate over list of given type +* @pos: the type * to use as a loop counter. +* @head: the head for your list. +* @member: the name of the list_struct within the struct. +* @type: typeof(*pos) +*/ +#define list_for_each_entry(pos, head, member,type) \ + for (pos = list_entry((head)->next, type, member); \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, type, member)) + + +//leo: macro changed out of unportable operator typeof +/** +* list_for_each_entry_reverse - iterate backwards over list of given type. +* @pos: the type * to use as a loop counter. +* @head: the head for your list. +* @member: the name of the list_struct within the struct. +* @type: typeof(*pos) +*/ +#define list_for_each_entry_reverse(pos, head, member,type) \ + for (pos = list_entry((head)->prev, type, member); \ + &pos->member != (head); \ + pos = list_entry(pos->member.prev, type, member)) + + +//leo: macro changed out of unportable operator typeof +/** +* list_for_each_entry_safe - iterate over list of given type safe against removal of list entry +* @pos: the type * to use as a loop counter. +* @n: another type * to use as temporary storage +* @head: the head for your list. +* @member: the name of the list_struct within the struct. +* @type: typeof(*pos) +* @type_n: typeof(*n) +*/ +#define list_for_each_entry_safe(pos, n, head, member,type,type_n) \ + for (pos = list_entry((head)->next, type, member), \ + n = list_entry(pos->member.next, type, member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, type_n, member)) + + +#endif diff --git a/branches/IBFD/hw/mthca/kernel/mt_memory.c b/branches/IBFD/hw/mthca/kernel/mt_memory.c new file mode 100644 index 00000000..868472a9 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mt_memory.c @@ -0,0 +1,761 @@ +/* + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + #include "hca_driver.h" +#include "mthca_dev.h" +#if defined (EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mt_memory.tmh" +#endif + +#include "mt_pa_cash.h" + + +/* +* Function: map user buffer to kernel and lock it +* +* Return: +*/ +int get_user_pages( + IN struct mthca_dev *dev, /* device */ + IN u64 start, /* address in user space */ + IN int npages, /* size in pages */ + IN int write_access, /* access rights */ + OUT struct scatterlist *sg /* s/g list */ + ) +{ + PMDL mdl_p; + int size = npages << PAGE_SHIFT; + int access = (write_access) ? IoWriteAccess : IoReadAccess; + int err; + void * kva; /* kernel virtual address */ + + UNREFERENCED_PARAMETER(dev); + + HCA_ENTER(HCA_DBG_MEMORY); + ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL); + + /* allocate MDL */ + mdl_p = IoAllocateMdl( (PVOID)(ULONG_PTR)start, (ULONG)size, + FALSE, + FALSE, /* not charge quota */ + NULL); + if (mdl_p == NULL) { + err = -ENOMEM; + goto err0; + } + + /* lock memory */ + __try { + MmProbeAndLockPages( mdl_p, UserMode, access ); + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + NTSTATUS Status = GetExceptionCode(); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY ,("Exception 0x%x on MmProbeAndLockPages(), addr 0x%I64x, size %d\n", Status, start, size)); + switch(Status){ + case STATUS_WORKING_SET_QUOTA: + err = -ENOMEM;break; + case STATUS_ACCESS_VIOLATION: + err = -EACCES;break; + default : + err = -EINVAL; + } + + goto err1; + } + + /* map it to kernel */ + kva = MmMapLockedPagesSpecifyCache( mdl_p, + KernelMode, MmNonCached, + NULL, FALSE, NormalPagePriority ); + if (kva == NULL) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY ,("MmMapLockedPagesSpecifyCache failed\n")); + err = -EFAULT; + goto err2; + } + + sg->page = kva; + sg->length = size; + sg->offset = (unsigned int)(start & ~PAGE_MASK); + sg->p_mdl = mdl_p; + sg->dma_address = MmGetPhysicalAddress(kva).QuadPart; + return 0; + +err2: + MmUnlockPages(mdl_p); +err1: + IoFreeMdl(mdl_p); +err0: + HCA_EXIT(HCA_DBG_MEMORY); + return err; + + } + +void put_page(struct scatterlist *sg) +{ + if (sg->p_mdl) { + MmUnmapLockedPages( sg->page, sg->p_mdl ); + MmUnlockPages(sg->p_mdl); + IoFreeMdl(sg->p_mdl); + } +} + +VOID + AdapterListControl( + IN PDEVICE_OBJECT DeviceObject, + IN PIRP Irp, + IN PSCATTER_GATHER_LIST ScatterGather, + IN PVOID Context + ) +{ + struct scatterlist *p_sg = (struct scatterlist *)Context; + + UNREFERENCED_PARAMETER(DeviceObject); + UNREFERENCED_PARAMETER(Irp); + + // sanity checks + if (!ScatterGather || !Context) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("AdapterListControl failed: invalid parameters\n")); + return; + } + if (ScatterGather->NumberOfElements > 1) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("AdapterListControl failed: unexpected sg size; %d elements \n", + ScatterGather->NumberOfElements )); + } + if (ScatterGather->Elements[0].Length != p_sg->length) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("AdapterListControl failed: unexpected buffer size %#x (expected %#x) \n", + ScatterGather->Elements[0].Length, p_sg->length )); + } + + // results + p_sg->dma_address = ScatterGather->Elements[0].Address.QuadPart; // get logical address + p_sg->p_os_sg = ScatterGather; // store sg list address for releasing + //NB: we do not flush the buffers by FlushAdapterBuffers(), because we don't really transfer data +} + +/* Returns: the number of mapped sg elements */ +int pci_map_sg(struct mthca_dev *dev, + struct scatterlist *sg, int nents, int direction) +{ +#ifndef USE_GET_SG_LIST + + UNREFERENCED_PARAMETER(dev); + UNREFERENCED_PARAMETER(sg); + UNREFERENCED_PARAMETER(direction); + + // mapping was performed in alloc_dma_mem + return nents; + +#else + + int i; + NTSTATUS status; + hca_dev_ext_t *p_ext = dev->ext; + struct scatterlist *p_sg = sg; + KIRQL irql = KeRaiseIrqlToDpcLevel(); + + for (i=0; ip_dma_adapter->DmaOperations->GetScatterGatherList( + p_ext->p_dma_adapter, p_ext->cl_ext.p_self_do, p_sg->p_mdl, p_sg->page, + p_sg->length, AdapterListControl, sg, (BOOLEAN)direction ); + if (!NT_SUCCESS(status)) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("GetScatterGatherList failed %#x\n", status))); + break; + } + } + KeLowerIrql(irql); + return i; /* i.e., we mapped all the entries */ + +#endif +} + +/* Returns: the number of unmapped sg elements */ +int pci_unmap_sg(struct mthca_dev *dev, + struct scatterlist *sg, int nents, int direction) +{ +#ifndef USE_GET_SG_LIST + + UNREFERENCED_PARAMETER(dev); + UNREFERENCED_PARAMETER(sg); + UNREFERENCED_PARAMETER(direction); + // mapping was performed in alloc_dma_mem + return nents; + +#else + + int i; + hca_dev_ext_t *p_ext = dev->ext; + struct scatterlist *p_sg = sg; + KIRQL irql = KeRaiseIrqlToDpcLevel(); + void *p_os_sg = p_sg->p_os_sg; + + for (i=0; ip_os_sg = NULL; + p_ext->p_dma_adapter->DmaOperations->PutScatterGatherList( + p_ext->p_dma_adapter, p_os_sg, (BOOLEAN)direction ); + } + KeLowerIrql(irql); + return i; /* i.e., we mapped all the entries */ + +#endif +} + +/* The function zeroes 'struct scatterlist' and then fills it with values. + On error 'struct scatterlist' is returned zeroed */ +void *alloc_dma_mem( + IN struct mthca_dev *dev, + IN unsigned long size, + OUT struct scatterlist *p_sg) +{ + void *va; + DMA_ADAPTER *p_dma = dev->ext->p_dma_adapter; + +#ifndef USE_GET_SG_LIST + + PHYSICAL_ADDRESS pa = {0}; + ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL); + + RtlZeroMemory(p_sg,sizeof *p_sg); + if (!size) + return NULL; + + va = p_dma->DmaOperations->AllocateCommonBuffer( + p_dma, size, &pa, FALSE ); + if (va) { + p_sg->length = size; + p_sg->dma_address = pa.QuadPart; + p_sg->page = va; + } + +#else + + int err; + PHYSICAL_ADDRESS la = {0}, ba = {0}, ha = {(u64)(-1I64)}; + PMDL p_mdl; + + ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL); + + RtlZeroMemory(p_sg,sizeof *p_sg); + if (!size) + return NULL; + + // allocate memory + va = MmAllocateContiguousMemorySpecifyCache( + size, la, ha, ba, MmNonCached ); + if (!va) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("MmAllocateContiguousMemorySpecifyCache failed on %#x size\n", size ))); + goto err_alloc; + } + + // allocate MDL + p_mdl = IoAllocateMdl( va, size, FALSE, FALSE, NULL ); + if (!p_mdl) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("MmAllocateContiguousMemorySpecifyCache failed on %#x size\n", size ))); + goto err_mdl; + } + MmBuildMdlForNonPagedPool( p_mdl ); + + p_sg->p_mdl = p_mdl; + p_sg->length = size; + p_sg->page = va; + + goto end; + +err_mdl: + MmFreeContiguousMemory(va); + va = NULL; +err_alloc: +end: + +#endif + + return va; +} + +void free_dma_mem( + IN struct mthca_dev *dev, + IN struct scatterlist *p_sg) +{ +#ifndef USE_GET_SG_LIST + + PHYSICAL_ADDRESS pa; + DMA_ADAPTER *p_dma = dev->ext->p_dma_adapter; + + ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL); + + if (p_sg->length) { + pa.QuadPart = p_sg->dma_address; + p_dma->DmaOperations->FreeCommonBuffer( + p_dma, p_sg->length, pa, + p_sg->page, FALSE ); + } + +#else + + PMDL p_mdl = p_sg->p_mdl; + PVOID page = p_sg->page; + + ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL); + if (p_mdl) { + p_sg->p_mdl = NULL; + IoFreeMdl( p_mdl ); + } + if (page) { + p_sg->page = NULL; + MmFreeContiguousMemory(page); + } + +#endif +} + + +typedef struct _mt_iobuf_seg { + LIST_ENTRY link; + PMDL mdl_p; + u64 va; /* virtual address of the buffer */ + u64 size; /* size in bytes of the buffer */ + u32 nr_pages; + int is_user; +} mt_iobuf_seg_t; + +// Returns: 0 on success, -ENOMEM or -EACCESS on error +static int register_segment( + IN u64 va, + IN u64 size, + IN int is_user, + IN ib_access_t acc, + OUT mt_iobuf_seg_t **iobuf_seg) +{ + PMDL mdl_p; + int rc; + KPROCESSOR_MODE mode; + mt_iobuf_seg_t * new_iobuf; + static ULONG cnt=0; + LOCK_OPERATION Operation; + + // set Operation + if (acc & IB_AC_LOCAL_WRITE) + Operation = IoModifyAccess; + else + Operation = IoReadAccess; + + // allocate IOBUF segment object + new_iobuf = (mt_iobuf_seg_t *)kmalloc(sizeof(mt_iobuf_seg_t), GFP_KERNEL ); + if (new_iobuf == NULL) { + rc = -ENOMEM; + goto err_nomem; + } + + // allocate MDL + mdl_p = IoAllocateMdl( (PVOID)(ULONG_PTR)va, (ULONG)size, FALSE,FALSE,NULL); + if (mdl_p == NULL) { + rc = -ENOMEM; + goto err_alloc_mdl; + } + + // make context-dependent things + if (is_user) { + ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL); + mode = UserMode; + } + else { /* Mapping to kernel virtual address */ + // MmBuildMdlForNonPagedPool(mdl_p); // fill MDL ??? - should we do that really ? + mode = KernelMode; + } + + __try { /* try */ + MmProbeAndLockPages( mdl_p, mode, Operation ); /* lock memory */ + } /* try */ + + __except (EXCEPTION_EXECUTE_HANDLER) { + HCA_PRINT(TRACE_LEVEL_ERROR, HCA_DBG_MEMORY, + ("MOSAL_iobuf_register: Exception 0x%x on MmProbeAndLockPages(), va %I64d, sz %I64d\n", + GetExceptionCode(), va, size)); + rc = -EACCES; + goto err_probe; + } + + // fill IOBUF object + new_iobuf->va = va; + new_iobuf->size= size; + new_iobuf->nr_pages = ADDRESS_AND_SIZE_TO_SPAN_PAGES( va, size ); + new_iobuf->mdl_p = mdl_p; + new_iobuf->is_user = is_user; + *iobuf_seg = new_iobuf; + return 0; + +err_probe: + IoFreeMdl(mdl_p); +err_alloc_mdl: + ExFreePool((PVOID)new_iobuf); +err_nomem: + return rc; +} + +void iobuf_init( + IN u64 va, + IN u64 size, + IN int is_user, + IN OUT mt_iobuf_t *iobuf_p) +{ + iobuf_p->va = va; + iobuf_p->size= size; + iobuf_p->is_user = is_user; + InitializeListHead( &iobuf_p->seg_que ); + iobuf_p->seg_num = 0; + iobuf_p->nr_pages = 0; + iobuf_p->is_cashed = 0; +} + +int iobuf_register( + IN u64 va, + IN u64 size, + IN int is_user, + IN ib_access_t acc, + IN OUT mt_iobuf_t *iobuf_p) +{ + int rc=0; + u64 seg_va; // current segment start + u64 seg_size; // current segment size + u64 rdc; // remain data counter - what is rest to lock + u64 delta; // he size of the last not full page of the first segment + mt_iobuf_seg_t * new_iobuf; + unsigned page_size = PAGE_SIZE; + +// 32 - for any case +#define PFNS_IN_PAGE_SIZE_MDL ((PAGE_SIZE - sizeof(struct _MDL) - 32) / sizeof(long)) +#define MIN_IOBUF_SEGMENT_SIZE (PAGE_SIZE * PFNS_IN_PAGE_SIZE_MDL) // 4MB + + ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL); + + // we'll try to register all at once. + seg_va = va; + seg_size = rdc = size; + + // allocate segments + while (rdc > 0) { + // map a segment + rc = register_segment(seg_va, seg_size, is_user, acc, &new_iobuf ); + + // success - move to another segment + if (!rc) { + rdc -= seg_size; + seg_va += seg_size; + InsertTailList( &iobuf_p->seg_que, &new_iobuf->link ); + iobuf_p->seg_num++; + // round the segment size to the next page boundary + delta = (seg_va + seg_size) & (page_size - 1); + if (delta) { + seg_size -= delta; + seg_size += page_size; + } + if (seg_size > rdc) + seg_size = rdc; + continue; + } + + // failure - too large a buffer: lessen it and try once more + if (rc == -ENOMEM) { + // no where to lessen - too low memory + if (seg_size <= MIN_IOBUF_SEGMENT_SIZE) + break; + // lessen the size + seg_size >>= 1; + // round the segment size to the next page boundary + delta = (seg_va + seg_size) & (page_size - 1); + if (delta) { + seg_size -= delta; + seg_size += page_size; + } + if (seg_size > rdc) + seg_size = rdc; + continue; + } + + // got unrecoverable error + break; + } + + // SUCCESS + if (rc) + iobuf_deregister( iobuf_p ); + else + iobuf_p->nr_pages += ADDRESS_AND_SIZE_TO_SPAN_PAGES( va, size ); + + return rc; +} + + +static void __iobuf_copy( + IN OUT mt_iobuf_t *dst_iobuf_p, + IN mt_iobuf_t *src_iobuf_p + ) +{ + int i; + mt_iobuf_seg_t *iobuf_seg_p; + + *dst_iobuf_p = *src_iobuf_p; + InitializeListHead( &dst_iobuf_p->seg_que ); + for (i=0; iseg_num; ++i) { + iobuf_seg_p = (mt_iobuf_seg_t *)(PVOID)RemoveHeadList( &src_iobuf_p->seg_que ); + InsertTailList( &dst_iobuf_p->seg_que, &iobuf_seg_p->link ); + } +} + +/* if the buffer to be registered overlaps a buffer, already registered, + a race can happen between HCA, writing to the previously registered + buffer and the probing functions (MmProbeAndLockPages, MmSecureVirtualMemory), + used in the algorithm of memory registration. + To prevent the race we maintain reference counters for the physical pages, being registered, + and register every physical page FOR THE WRITE ACCESS only once.*/ + +int iobuf_register_with_cash( + IN u64 vaddr, + IN u64 size, + IN int is_user, + IN OUT ib_access_t *acc_p, + IN OUT mt_iobuf_t *iobuf_p) +{ + int rc, pa_in; + mt_iobuf_t sec_iobuf; + int i, page_in , page_out, page_in_total; + int nr_pages; + char *subregion_start, *va; + u64 subregion_size; + u64 rdc; // remain data counter - what is rest to lock + u64 delta; // he size of the last not full page of the first segment + ib_access_t acc; + + down(&g_pa_mutex); + + // register memory for read access to bring pages into the memory + rc = iobuf_register( vaddr, size, is_user, 0, iobuf_p); + + // on error or read access - exit + if (rc || !(*acc_p & IB_AC_LOCAL_WRITE)) + goto exit; + + // re-register buffer with the correct access rights + iobuf_init( (u64)vaddr, size, is_user, &sec_iobuf ); + nr_pages = ADDRESS_AND_SIZE_TO_SPAN_PAGES( vaddr, size ); + subregion_start = va = (char*)(ULONG_PTR)vaddr; + rdc = size; + pa_in = page_in = page_in_total = page_out = 0; + + for (i=0; i rdc) + subregion_size = rdc; + + // register the subregion + rc = iobuf_register( (u64)subregion_start, subregion_size, is_user, acc, &sec_iobuf); + if (rc) + goto cleanup; + + // prepare to the next loop + rdc -= subregion_size; + subregion_start +=subregion_size; + } + } + + // prepare to registration of the subregion + if (pa_in) { // SUBREGION WITH READ ACCESS + acc = 0; + subregion_size = (u64)page_in * PAGE_SIZE; + } + else { // SUBREGION WITH WRITE ACCESS + acc = IB_AC_LOCAL_WRITE; + subregion_size = (u64)page_out * PAGE_SIZE; + } + + // round the subregion size to the page boundary + delta = (u64)(subregion_start + subregion_size) & (PAGE_SIZE - 1); + subregion_size -= delta; + if (subregion_size > rdc) + subregion_size = rdc; + + // register the subregion + rc = iobuf_register( (u64)subregion_start, subregion_size, is_user, acc, &sec_iobuf); + if (rc) + goto cleanup; + + // cash phys pages + rc = pa_register(iobuf_p); + if (rc) + goto err_pa_reg; + + // replace the iobuf + iobuf_deregister( iobuf_p ); + sec_iobuf.is_cashed = TRUE; + __iobuf_copy( iobuf_p, &sec_iobuf ); + + // buffer is a part of also registered buffer - change the rights + if (page_in_total) + *acc_p = MTHCA_ACCESS_REMOTE_READ; + + goto exit; + +err_pa_reg: + iobuf_deregister( &sec_iobuf ); +cleanup: + iobuf_deregister( iobuf_p ); +exit: + up(&g_pa_mutex); + return rc; +} + +static void deregister_segment(mt_iobuf_seg_t * iobuf_seg_p) +{ + MmUnlockPages( iobuf_seg_p->mdl_p ); // unlock the buffer + IoFreeMdl( iobuf_seg_p->mdl_p ); // free MDL + ExFreePool(iobuf_seg_p); +} + +void iobuf_deregister(mt_iobuf_t *iobuf_p) +{ + mt_iobuf_seg_t *iobuf_seg_p; // pointer to current segment object + + ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL); + + // release segments + while (!IsListEmpty( &iobuf_p->seg_que )) { + iobuf_seg_p = (mt_iobuf_seg_t *)(PVOID)RemoveTailList( &iobuf_p->seg_que ); + deregister_segment(iobuf_seg_p); + iobuf_p->seg_num--; + } + ASSERT(iobuf_p->seg_num == 0); +} + +void iobuf_deregister_with_cash(mt_iobuf_t *iobuf_p) +{ + ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL); + + down(&g_pa_mutex); + if (iobuf_p->is_cashed) + pa_deregister(iobuf_p); + iobuf_deregister(iobuf_p); + up(&g_pa_mutex); +} + +void iobuf_iter_init( + IN mt_iobuf_t *iobuf_p, + IN OUT mt_iobuf_iter_t *iterator_p) +{ + iterator_p->seg_p = iobuf_p->seg_que.Flink; + iterator_p->pfn_ix = 0; +} + +// the function returns phys addresses of the pages, also for the first page +// if one wants to get the phys address of the buffer, one has to +// add the offset from the start of the page to the first phys address +// Returns: the number of entries, filled in page_tbl_p +// Returns 0 while at the end of list. +uint32_t iobuf_get_tpt_seg( + IN mt_iobuf_t *iobuf_p, + IN OUT mt_iobuf_iter_t *iterator_p, + IN uint32_t n_pages_in, + IN OUT uint64_t *page_tbl_p ) +{ + uint32_t i=0; // has to be initialized here for a premature exit + mt_iobuf_seg_t *seg_p; // pointer to current segment object + PPFN_NUMBER pfn_p; + uint32_t pfn_ix; // index of PFN in PFN array of the current segment + uint64_t *pa_buf_p = page_tbl_p; + + // prepare to the loop + seg_p = iterator_p->seg_p; // first segment of the first iobuf + pfn_ix= iterator_p->pfn_ix; + + // check, whether we at the end of the list + if ((PVOID)seg_p == (PVOID)&iobuf_p->seg_que) + goto exit; + pfn_p = MmGetMdlPfnArray( seg_p->mdl_p ) + pfn_ix; + + // pass along all the PFN arrays + for (; i < n_pages_in; i++, pa_buf_p++) { + // convert PFN to the physical address + *pa_buf_p = (uint64_t)*pfn_p++ << PAGE_SHIFT; + + // get to the next PFN + if (++pfn_ix >= seg_p->nr_pages) { + seg_p = (mt_iobuf_seg_t*)seg_p->link.Flink; + pfn_ix = 0; + if ((PVOID)seg_p == (PVOID)&iobuf_p->seg_que) { + i++; + break; + } + pfn_p = MmGetMdlPfnArray( seg_p->mdl_p ); + } + } + +exit: + iterator_p->seg_p = seg_p; + iterator_p->pfn_ix = pfn_ix; + return i; +} + + + + diff --git a/branches/IBFD/hw/mthca/kernel/mt_memory.h b/branches/IBFD/hw/mthca/kernel/mt_memory.h new file mode 100644 index 00000000..66be696d --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mt_memory.h @@ -0,0 +1,307 @@ +#ifndef MT_MEMORY_H +#define MT_MEMORY_H + +#include "iba/ib_types.h" + +// =========================================== +// CONSTANTS +// =========================================== + +#define MT_TAG_ATOMIC 'MOTA' +#define MT_TAG_KERNEL 'LNRK' +#define MT_TAG_HIGH 'HGIH' +#define MT_TAG_PCIPOOL 'PICP' +#define MT_TAG_IOMAP 'PAMI' + +// =========================================== +// SUBSTITUTIONS +// =========================================== + +#define memcpy_toio memcpy + +// =========================================== +// MACROS +// =========================================== + +#define PAGE_MASK (~(PAGE_SIZE-1)) +#define NEXT_PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) + + +// =========================================== +// SYSTEM MEMORY +// =========================================== + +// memory +#define __GFP_NOWARN 0 /* Suppress page allocation failure warning */ +#define __GFP_HIGHMEM 0 + +#define GFP_ATOMIC 1 /* can't wait (i.e. DPC or higher) */ +#define GFP_KERNEL 2 /* can wait (npaged) */ +#define GFP_HIGHUSER 4 /* GFP_KERNEL, that can be in HIGH memory */ + + +#define SLAB_ATOMIC GFP_ATOMIC +#define SLAB_KERNEL GFP_KERNEL + +#if 1 +static inline void * kmalloc( SIZE_T bsize, unsigned int gfp_mask) +{ + void *ptr; + MT_ASSERT( KeGetCurrentIrql() <= DISPATCH_LEVEL ); + switch (gfp_mask) { + case GFP_ATOMIC: + ptr = ExAllocatePoolWithTag( NonPagedPool, bsize, MT_TAG_ATOMIC ); + break; + case GFP_KERNEL: + ptr = ExAllocatePoolWithTag( NonPagedPool, bsize, MT_TAG_KERNEL ); + break; + case GFP_HIGHUSER: + ptr = ExAllocatePoolWithTag( NonPagedPool, bsize, MT_TAG_HIGH ); + break; + default: + DbgPrint("kmalloc: unsupported flag %d\n", gfp_mask); + ptr = NULL; + break; + } + return ptr; +} +#else +#define kmalloc(bsize,flags) ExAllocatePoolWithTag( NonPagedPool, bsize, MT_TAG_KERNEL ) +#endif + +static inline void * kzalloc( SIZE_T bsize, unsigned int gfp_mask) +{ + void* va = kmalloc(bsize, gfp_mask); + if (va) + RtlZeroMemory(va, bsize); + return va; +} + +static inline void kfree (const void *pobj) +{ + MT_ASSERT( KeGetCurrentIrql() <= DISPATCH_LEVEL ); + if (pobj) + ExFreePool((void *)pobj); +} + +#define get_zeroed_page(mask) kzalloc(PAGE_SIZE, mask) +#define free_page(ptr) kfree(ptr) + + +// =========================================== +// IO SPACE <==> SYSTEM MEMORY +// =========================================== + + +/** +* ioremap - map bus memory into CPU space +* @offset: bus address of the memory +* @size: size of the resource to map +* +* ioremap performs a platform specific sequence of operations to +* make bus memory CPU accessible via the readb/readw/readl/writeb/ +* writew/writel functions and the other mmio helpers. The returned +* address is not guaranteed to be usable directly as a virtual +* address. +*/ +static inline void *ioremap(io_addr_t addr, SIZE_T size, SIZE_T* psize) +{ + PHYSICAL_ADDRESS pa; + void *va; + + MT_ASSERT( KeGetCurrentIrql() <= DISPATCH_LEVEL ); + pa.QuadPart = addr; + va = MmMapIoSpace( pa, size, MmNonCached ); + *psize = size; + return va; +} + +static inline void iounmap(void *va, SIZE_T size) +{ + MmUnmapIoSpace( va, size); +} + + // =========================================== + // DMA SUPPORT + // =========================================== + +#define PCI_DMA_BIDIRECTIONAL 0 +#define PCI_DMA_TODEVICE 1 +#define PCI_DMA_FROMDEVICE 2 +#define DMA_TO_DEVICE PCI_DMA_TODEVICE + + struct scatterlist { + dma_addr_t dma_address; /* logical (device) address */ + void * page; /* kernel virtual address */ + PMDL p_mdl; /* MDL, if any (used for user space buffers) */ + PSCATTER_GATHER_LIST p_os_sg; /* adapter scatter-gather list */ + unsigned int offset; /* offset in the first page */ + unsigned int length; /* buffer length */ + }; + + #define sg_dma_address(sg) ((sg)->dma_address) + #define sg_dma_len(sg) ((sg)->length) + + struct mthca_dev; + + int pci_map_sg(struct mthca_dev *dev, + struct scatterlist *sg, int nents, int direction); + + int pci_unmap_sg(struct mthca_dev *dev, + struct scatterlist *sg, int nents, int direction); + + void free_dma_mem( + IN struct mthca_dev *dev, + IN struct scatterlist *p_sg); + + void *alloc_dma_mem( + IN struct mthca_dev *dev, + IN unsigned long size, + OUT struct scatterlist *p_sg); + +static inline void *alloc_dma_zmem( + IN struct mthca_dev *dev, + IN unsigned long size, + OUT struct scatterlist *p_sg) +{ + void *va = alloc_dma_mem( dev, size, p_sg ); + if (va) + RtlZeroMemory(va, size); + return va; +} + +static inline void *alloc_dma_zmem_map( + IN struct mthca_dev *dev, + IN unsigned long size, + IN int direction, + OUT struct scatterlist *p_sg) +{ + void *va = alloc_dma_zmem( dev, size, p_sg ); + if (va) { + if (!pci_map_sg( dev, p_sg, 1, direction )) { + free_dma_mem( dev, p_sg ); + va = NULL; + } + } + return va; +} + +static inline void free_dma_mem_map( + IN struct mthca_dev *dev, + IN struct scatterlist *p_sg, + IN int direction ) +{ + pci_unmap_sg( dev, p_sg, 1, direction ); + free_dma_mem( dev, p_sg ); +} + + static inline dma_addr_t pci_mape_page(struct mthca_dev *dev, + void *va, unsigned long offset, SIZE_T size, int direction) + { + UNREFERENCED_PARAMETER(dev); + UNREFERENCED_PARAMETER(va); + UNREFERENCED_PARAMETER(offset); + UNREFERENCED_PARAMETER(size); + UNREFERENCED_PARAMETER(direction); + /* suppose, that pages where always translated to DMA space */ + return 0; /* i.e., we unmapped all the entries */ + } + + // =========================================== + // HELPERS + // =========================================== + + static inline int get_order(unsigned long size) +{ + int order; + + size = (size-1) >> (PAGE_SHIFT-1); + order = -1; + do { + size >>= 1; + order++; + } while (size); + return order; +} + +static inline int long_log2(unsigned long x) +{ + int r = 0; + for (x >>= 1; x > 0; x >>= 1) + r++; + return r; +} + +static inline unsigned long roundup_pow_of_two(unsigned long x) +{ + return (1UL << fls(x - 1)); +} + +// =========================================== +// PROTOTYPES +// =========================================== + +void put_page(struct scatterlist *sg); +int get_user_pages( + IN struct mthca_dev *dev, /* device */ + IN u64 start, /* address in user space */ + IN int npages, /* size in pages */ + IN int write_access, /* access rights */ + OUT struct scatterlist *sg /* s/g list */ + ); + +typedef struct _mt_iobuf { + u64 va; /* virtual address of the buffer */ + u64 size; /* size in bytes of the buffer */ + LIST_ENTRY seg_que; + u32 nr_pages; + int is_user; + int seg_num; + int is_cashed; +} mt_iobuf_t; + +/* iterator for getting segments of tpt */ +typedef struct _mt_iobuf_iter { + void * seg_p; /* the item from where to take the next translations */ + unsigned int pfn_ix; /* index from where to take the next translation */ +} mt_iobuf_iter_t; + +void iobuf_deregister_with_cash(IN mt_iobuf_t *iobuf_p); + +void iobuf_deregister(IN mt_iobuf_t *iobuf_p); + +void iobuf_init( + IN u64 va, + IN u64 size, + IN int is_user, + IN OUT mt_iobuf_t *iobuf_p); + +int iobuf_register_with_cash( + IN u64 vaddr, + IN u64 size, + IN int is_user, + IN OUT ib_access_t *acc_p, + IN OUT mt_iobuf_t *iobuf_p); + +int iobuf_register( + IN u64 va, + IN u64 size, + IN int is_user, + IN ib_access_t acc, + IN OUT mt_iobuf_t *iobuf_p); + +void iobuf_iter_init( + IN mt_iobuf_t *iobuf_p, + IN OUT mt_iobuf_iter_t *iterator_p); + +uint32_t iobuf_get_tpt_seg( + IN mt_iobuf_t *iobuf_p, + IN OUT mt_iobuf_iter_t *iterator_p, + IN uint32_t n_pages_in, + IN OUT uint64_t *page_tbl_p ); + +unsigned long copy_from_user(void *to, const void *from, unsigned long n); +unsigned long copy_to_user(void *to, const void *from, unsigned long n); + + +#endif diff --git a/branches/IBFD/hw/mthca/kernel/mt_pa_cash.c b/branches/IBFD/hw/mthca/kernel/mt_pa_cash.c new file mode 100644 index 00000000..3b0f27bc --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mt_pa_cash.c @@ -0,0 +1,364 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mlnx_uvp_cq.c 1611 2006-08-20 14:48:55Z sleybo $ + */ + +#include "mt_pa_cash.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mt_pa_cash.tmh" +#endif + +/////////////////////////////////////////////////////////////////////////// +// +// RESTRICTIONS +// +/////////////////////////////////////////////////////////////////////////// + +#ifdef _WIN64 +#define MAX_PAGES_SUPPORTED (64 * 1024 * 1024) // 256 GB +#else +#define MAX_PAGES_SUPPORTED (16 * 1024 * 1024) // 64 GB +#endif + +#define FREE_LIST_TRESHOLD 256 // max number of pages in free list + +/////////////////////////////////////////////////////////////////////////// +// +// CONSTANTS +// +/////////////////////////////////////////////////////////////////////////// + +#define PA_TABLE_ENTRY_SIZE sizeof(pa_table_entry_t) +#define PA_TABLE_ENTRY_NUM (PAGE_SIZE / PA_TABLE_ENTRY_SIZE) +#define PA_TABLE_SIZE (PA_TABLE_ENTRY_SIZE * PA_TABLE_ENTRY_NUM) + +#define PA_DIR_ENTRY_SIZE sizeof(pa_dir_entry_t) +#define PA_DIR_ENTRY_NUM (MAX_PAGES_SUPPORTED /PA_TABLE_ENTRY_NUM) +#define PA_DIR_SIZE (PA_DIR_ENTRY_SIZE * PA_DIR_ENTRY_NUM) + + +/////////////////////////////////////////////////////////////////////////// +// +// STRUCTURES +// +/////////////////////////////////////////////////////////////////////////// + +typedef struct { + int ref_cnt; +} pa_table_entry_t; + +typedef struct { + pa_table_entry_t *pa_te; /* pointer to one page of pa_table_entry_t elements */ + int used; /* number of pa_table_entry_t elements, used now. When 0 - the page may be freed */ +} pa_dir_entry_t; + +typedef struct pa_cash_s { + pa_dir_entry_t *pa_dir; + SINGLE_LIST_ENTRY free_list_hdr; + uint32_t free_nr_pages; + uint32_t free_list_threshold; + uint32_t max_nr_pages; + uint32_t cur_nr_pages; +} pa_cash_t; + + + +/////////////////////////////////////////////////////////////////////////// +// +// GLOBALS +// +/////////////////////////////////////////////////////////////////////////// + +KMUTEX g_pa_mutex; +u64 g_pa[1024]; +pa_cash_t g_cash; + + +/////////////////////////////////////////////////////////////////////////// +// +// STATIC FUNCTIONS +// +/////////////////////////////////////////////////////////////////////////// + +static uint32_t __calc_threshold() +{ + // threshold expresses the max length of free pages list, which gets released only at driver unload time + // so it can be calculated to be proportional to the system memory size + return FREE_LIST_TRESHOLD; +} + +static pa_table_entry_t *__alloc_page() +{ + pa_table_entry_t *pa_te; + + /* take from the list of reserved if it is not empty */ + if (g_cash.free_nr_pages) { + pa_te = (pa_table_entry_t *)PopEntryList( &g_cash.free_list_hdr ); + ((SINGLE_LIST_ENTRY*)pa_te)->Next = NULL; + g_cash.free_nr_pages--; + } + else /* allocate new page */ + pa_te = (pa_table_entry_t *)kzalloc( PA_TABLE_SIZE, GFP_KERNEL ); + + return pa_te; +} + +static void __free_page(pa_table_entry_t *pa_te) +{ + if (g_cash.free_nr_pages < g_cash.free_list_threshold) { + PushEntryList( &g_cash.free_list_hdr, (SINGLE_LIST_ENTRY*)pa_te ); + g_cash.free_nr_pages++; + } + else + kfree(pa_te); +} + +static pa_table_entry_t * __get_page(uint32_t ix) +{ + pa_table_entry_t *pa_te = g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te; + + /* no this page_table - add a new one */ + if (!pa_te) { + pa_te = __alloc_page(); + if (!pa_te) + return NULL; + g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te = pa_te; + g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used = 0; + g_cash.cur_nr_pages++; + } + + return pa_te; +} + +static void __put_page(uint32_t ix) +{ + __free_page(g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te); + g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te = NULL; + g_cash.cur_nr_pages--; +} + +static int __add_pa(uint64_t pa) +{ + uint32_t ix = (uint32_t)(pa >> PAGE_SHIFT); + pa_table_entry_t *pa_te; + + /* or pa is incorrect or memory that big is not supported */ + if (ix > g_cash.max_nr_pages) { + ASSERT(FALSE); + return -EFAULT; + } + + /* get page address */ + pa_te = __get_page(ix); + if (!pa_te) + return -ENOMEM; + + /* register page address */ + if (!pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt) + ++g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used; + ++pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt; + + return 0; +} + + +static int __rmv_pa(uint64_t pa) +{ + uint32_t ix = (uint32_t)(pa >> PAGE_SHIFT); + pa_table_entry_t *pa_te; + + /* or pa is incorrect or memory that big is not supported */ + if (ix > g_cash.max_nr_pages) { + ASSERT(FALSE); + return -EFAULT; + } + + pa_te = g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te; + + /* no this page_table - error*/ + if (!pa_te) { + ASSERT(FALSE); + return -EFAULT; + } + + /* deregister page address */ + --pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt; + ASSERT(pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt >= 0); + + /* release the page on need */ + if (!pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt) + --g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used; + if (!g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].used) + __put_page(ix); + + return 0; +} + + + +/////////////////////////////////////////////////////////////////////////// +// +// PUBLIC FUNCTIONS +// +/////////////////////////////////////////////////////////////////////////// + + +int pa_register(mt_iobuf_t *iobuf_p) +{ + int i,j,n; + mt_iobuf_iter_t iobuf_iter; + + iobuf_iter_init( iobuf_p, &iobuf_iter ); + n = 0; + for (;;) { + i = iobuf_get_tpt_seg( iobuf_p, &iobuf_iter, + sizeof(g_pa) / sizeof (u64), g_pa ); + if (!i) + break; + for (j=0; j> PAGE_SHIFT); + pa_table_entry_t *pa_te; + + /* or pa is incorrect or memory that big is not supported */ + if (ix > g_cash.max_nr_pages) { + ASSERT(FALSE); + return -EFAULT; + } + + pa_te = g_cash.pa_dir[ix / PA_TABLE_ENTRY_NUM].pa_te; + + /* no this page_table */ + if (!pa_te) + return 0; + + return pa_te[ix % PA_TABLE_ENTRY_NUM].ref_cnt; +} + +int pa_cash_init() +{ + void *pa_dir; + pa_dir = kzalloc(PA_DIR_SIZE, GFP_KERNEL); + + if (!pa_dir) + return -ENOMEM; + g_cash.pa_dir = pa_dir; + g_cash.max_nr_pages = PA_TABLE_ENTRY_NUM * PA_DIR_ENTRY_NUM; + g_cash.free_list_hdr.Next = NULL; + g_cash.cur_nr_pages = 0; + g_cash.free_nr_pages = 0; + g_cash.free_list_threshold = __calc_threshold(); + KeInitializeMutex(&g_pa_mutex, 0); + return 0; +} + diff --git a/branches/IBFD/hw/mthca/kernel/mt_pa_cash.h b/branches/IBFD/hw/mthca/kernel/mt_pa_cash.h new file mode 100644 index 00000000..4ca6eb57 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mt_pa_cash.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mlnx_uvp_cq.c 1611 2006-08-20 14:48:55Z sleybo $ + */ + +#include "mthca_dev.h" + +extern KMUTEX g_pa_mutex; + +int pa_cash_init(); + +void pa_cash_release(); + +int pa_is_registered(uint64_t pa); + +int pa_register(mt_iobuf_t *iobuf_p); + +void pa_deregister(mt_iobuf_t *iobuf_p); + +void pa_cash_print(); + diff --git a/branches/IBFD/hw/mthca/kernel/mt_packer.c b/branches/IBFD/hw/mthca/kernel/mt_packer.c new file mode 100644 index 00000000..a61cece8 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mt_packer.c @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mt_packer.tmh" +#endif + +static u64 value_read(int offset, int size, u8 *structure) +{ + switch (size) { + case 1: return *(u8 *) (structure + offset); + case 2: return cl_ntoh16(*(__be16 *) (structure + offset)); + case 4: return cl_ntoh32(*(__be32 *) (structure + offset)); + case 8: return cl_ntoh64(*(__be64 *) (structure + offset)); + default: + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Field size %d bits not handled\n", size * 8)); + return 0; + } +} + +/** + * ib_pack - Pack a structure into a buffer + * @desc:Array of structure field descriptions + * @desc_len:Number of entries in @desc + * @structure:Structure to pack from + * @buf:Buffer to pack into + * + * ib_pack() packs a list of structure fields into a buffer, + * controlled by the array of fields in @desc. + */ +void ib_pack(const struct ib_field *desc, + int desc_len, + u8 *structure, + u8 *buf) +{ + int i; + CPU_2_BE64_PREP; + + for (i = 0; i < desc_len; ++i) { + if (desc[i].size_bits <= 32) { + int shift; + u32 val; + __be32 mask; + __be32 *addr; + + shift = 32 - desc[i].offset_bits - desc[i].size_bits; + if (desc[i].struct_size_bytes) + val = (u32)value_read(desc[i].struct_offset_bytes, + desc[i].struct_size_bytes, + structure) << shift; + else + val = 0; + + mask = cl_hton32(((1Ui64 << desc[i].size_bits) - 1) << shift); + addr = (__be32 *) buf + desc[i].offset_words; + *addr = (*addr & ~mask) | (cl_hton32(val) & mask); + } else if (desc[i].size_bits <= 64) { + int shift; + u64 val; + __be64 mask; + __be64 *addr; + + shift = 64 - desc[i].offset_bits - desc[i].size_bits; + if (desc[i].struct_size_bytes) + val = value_read(desc[i].struct_offset_bytes, + desc[i].struct_size_bytes, + structure) << shift; + else + val = 0; + + mask = CPU_2_BE64((~0Ui64 >> (64 - desc[i].size_bits)) << shift); + addr = (__be64 *) ((__be32 *) buf + desc[i].offset_words); + *addr = (*addr & ~mask) | (cl_hton64(val) & mask); + } else { + if (desc[i].offset_bits % 8 || + desc[i].size_bits % 8) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Structure field %s of size %d " + "bits is not byte-aligned\n", + desc[i].field_name, desc[i].size_bits)); + } + + if (desc[i].struct_size_bytes) + memcpy(buf + desc[i].offset_words * 4 + + desc[i].offset_bits / 8, + structure + desc[i].struct_offset_bytes, + desc[i].size_bits / 8); + else + RtlZeroMemory(buf + desc[i].offset_words * 4 + desc[i].offset_bits / 8, + desc[i].size_bits / 8); + } + } +} + +static void value_write(int offset, int size, u64 val, u8 *structure) +{ + switch (size * 8) { + case 8: *( u8 *) (structure + offset) = (u8)val; break; + case 16: *(__be16 *) (structure + offset) = cl_hton16((u16)val); break; + case 32: *(__be32 *) (structure + offset) = cl_hton32((u32)val); break; + case 64: *(__be64 *) (structure + offset) = cl_hton64(val); break; + default: + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Field size %d bits not handled\n", size * 8)); + } +} + +/** + * ib_unpack - Unpack a buffer into a structure + * @desc:Array of structure field descriptions + * @desc_len:Number of entries in @desc + * @buf:Buffer to unpack from + * @structure:Structure to unpack into + * + * ib_pack() unpacks a list of structure fields from a buffer, + * controlled by the array of fields in @desc. + */ +void ib_unpack(const struct ib_field *desc, + int desc_len, + u8 *buf, + u8 *structure) +{ + int i; + + for (i = 0; i < desc_len; ++i) { + if (!desc[i].struct_size_bytes) + continue; + + if (desc[i].size_bits <= 32) { + int shift; + u32 val; + u32 mask; + __be32 *addr; + + shift = 32 - desc[i].offset_bits - desc[i].size_bits; + mask = ((1Ui64 << desc[i].size_bits) - 1) << shift; + addr = (__be32 *) buf + desc[i].offset_words; + val = (cl_ntoh32(*addr) & mask) >> shift; + value_write(desc[i].struct_offset_bytes, + desc[i].struct_size_bytes, + val, + structure); + } else if (desc[i].size_bits <= 64) { + int shift; + u64 val; + u64 mask; + __be64 *addr; + + shift = 64 - desc[i].offset_bits - desc[i].size_bits; + mask = (~0Ui64 >> (64 - desc[i].size_bits)) << shift; + addr = (__be64 *) buf + desc[i].offset_words; + val = (cl_ntoh64(*addr) & mask) >> shift; + value_write(desc[i].struct_offset_bytes, + desc[i].struct_size_bytes, + val, + structure); + } else { + if (desc[i].offset_bits % 8 || + desc[i].size_bits % 8) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Structure field %s of size %d " + "bits is not byte-aligned\n", + desc[i].field_name, desc[i].size_bits)); + } + + memcpy(structure + desc[i].struct_offset_bytes, + buf + desc[i].offset_words * 4 + + desc[i].offset_bits / 8, + desc[i].size_bits / 8); + } + } +} diff --git a/branches/IBFD/hw/mthca/kernel/mt_pci.h b/branches/IBFD/hw/mthca/kernel/mt_pci.h new file mode 100644 index 00000000..3f389ca9 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mt_pci.h @@ -0,0 +1,131 @@ +#ifndef MT_PCI_H +#define MT_PCI_H + +// =========================================== +// LITERALS +// =========================================== + +#ifndef PCI_VENDOR_ID_MELLANOX +#define PCI_VENDOR_ID_MELLANOX 0x15b3 +#endif + +#ifndef PCI_DEVICE_ID_MELLANOX_TAVOR +#define PCI_DEVICE_ID_MELLANOX_TAVOR 0x5a44 +#endif + +#ifndef PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT +#define PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT 0x6278 +#endif + +#ifndef PCI_DEVICE_ID_MELLANOX_ARBEL +#define PCI_DEVICE_ID_MELLANOX_ARBEL 0x6282 +#endif + +#ifndef PCI_DEVICE_ID_MELLANOX_SINAI_OLD +#define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c +#endif + +#ifndef PCI_DEVICE_ID_MELLANOX_SINAI +#define PCI_DEVICE_ID_MELLANOX_SINAI 0x6274 +#endif + +#ifndef PCI_VENDOR_ID_TOPSPIN +#define PCI_VENDOR_ID_TOPSPIN 0x1867 +#endif + +/* live fishes */ +#ifndef PCI_DEVICE_ID_MELLANOX_TAVOR_BD +#define PCI_DEVICE_ID_MELLANOX_TAVOR_BD 0x5a45 +#endif + +#ifndef PCI_DEVICE_ID_MELLANOX_ARBEL_BD +#define PCI_DEVICE_ID_MELLANOX_ARBEL_BD 0x6279 +#endif + +#ifndef PCI_DEVICE_ID_MELLANOX_SINAI_OLD_BD +#define PCI_DEVICE_ID_MELLANOX_SINAI_OLD_BD 0x5e8d +#endif + +#ifndef PCI_DEVICE_ID_MELLANOX_SINAI_BD +#define PCI_DEVICE_ID_MELLANOX_SINAI_BD 0x6275 +#endif + +// =========================================== +// TYPES +// =========================================== + + +// =========================================== +// MACROS/FUNCTIONS +// =========================================== + +// get bar boundaries +#if 1 +#define pci_resource_start(dev,bar_num) ((dev)->ext->bar[bar_num].phys) +#define pci_resource_len(dev,bar_num) ((dev)->ext->bar[bar_num].size) +#else +static inline uint64_t pci_resource_start(struct mthca_dev *dev, int bar_num) +{ + return dev->ext->bar[bar_num].phys; +} +#endif + + +// i/o to registers + +static inline u64 readq(const volatile void __iomem *addr) +{ + //TODO: write atomic implementation of _IO_READ_QWORD and change mthca_doorbell.h + u64 val; + READ_REGISTER_BUFFER_ULONG((PULONG)(addr), (PULONG)&val, 2 ); + return val; +} + +static inline u32 readl(const volatile void __iomem *addr) +{ + return READ_REGISTER_ULONG((PULONG)(addr)); +} + +static inline u16 reads(const volatile void __iomem *addr) +{ + return READ_REGISTER_USHORT((PUSHORT)(addr)); +} + +static inline u8 readb(const volatile void __iomem *addr) +{ + return READ_REGISTER_UCHAR((PUCHAR)(addr)); +} + +#define __raw_readq readq +#define __raw_readl readl +#define __raw_reads reads +#define __raw_readb readb + +static inline void writeq(unsigned __int64 val, volatile void __iomem *addr) +{ + //TODO: write atomic implementation of _IO_WRITE_QWORD and change mthca_doorbell.h + WRITE_REGISTER_BUFFER_ULONG( (PULONG)(addr), (PULONG)&val, 2 ); +} + +static inline void writel(unsigned int val, volatile void __iomem *addr) +{ + WRITE_REGISTER_ULONG((PULONG)(addr),val); +} + +static inline void writes(unsigned short val, volatile void __iomem *addr) +{ + WRITE_REGISTER_USHORT((PUSHORT)(addr),val); +} + +static inline void writeb(unsigned char val, volatile void __iomem *addr) +{ + WRITE_REGISTER_UCHAR((PUCHAR)(addr),val); +} + +#define __raw_writeq writeq +#define __raw_writel writel +#define __raw_writes writes +#define __raw_writeb writeb + +#endif + diff --git a/branches/IBFD/hw/mthca/kernel/mt_pcipool.h b/branches/IBFD/hw/mthca/kernel/mt_pcipool.h new file mode 100644 index 00000000..996cb11d --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mt_pcipool.h @@ -0,0 +1,103 @@ +#ifndef MT_PCIPOOL_H +#define MT_PCIPOOL_H + +typedef struct pci_pool { + size_t size; + struct mthca_dev *mdev; + char name [32]; + NPAGED_LOOKASIDE_LIST pool_hdr; +} pci_pool_t; + +// taken from dmapool.c + +/** +* pci_pool_create - Creates a pool of consistent memory blocks, for dma. +* @name: name of pool, for diagnostics +* @mdev: device that will be doing the DMA +* @size: size of the blocks in this pool. +* @align: alignment requirement for blocks; must be a power of two +* @allocation: returned blocks won't cross this boundary (or zero) +* Context: !in_interrupt() +* +* Returns a dma allocation pool with the requested characteristics, or +* null if one can't be created. Given one of these pools, dma_pool_alloc() +* may be used to allocate memory. Such memory will all have "consistent" +* DMA mappings, accessible by the device and its driver without using +* cache flushing primitives. The actual size of blocks allocated may be +* larger than requested because of alignment. +* +* If allocation is nonzero, objects returned from dma_pool_alloc() won't + * cross that size boundary. This is useful for devices which have + * addressing restrictions on individual DMA transfers, such as not crossing + * boundaries of 4KBytes. + */ + +pci_pool_t * +pci_pool_create (const char *name, struct mthca_dev *mdev, + size_t size, size_t align, size_t allocation); + +/** + * dma_pool_alloc - get a block of consistent memory + * @pool: dma pool that will produce the block + * @mem_flags: GFP_* bitmask + * @handle: pointer to dma address of block + * + * This returns the kernel virtual address of a currently unused block, + * and reports its dma address through the handle. + * If such a memory block can't be allocated, null is returned. + */ +static inline void * +pci_pool_alloc (pci_pool_t *pool, int mem_flags, dma_addr_t *handle) +{ + PHYSICAL_ADDRESS pa; + void * ptr; + UNREFERENCED_PARAMETER(mem_flags); + + MT_ASSERT( KeGetCurrentIrql() <= DISPATCH_LEVEL ); + + ptr = ExAllocateFromNPagedLookasideList( &pool->pool_hdr ); + if (ptr != NULL) { + pa = MmGetPhysicalAddress( ptr ); + *handle = pa.QuadPart; + } + return ptr; +} + + +/** +* dma_pool_free - put block back into dma pool +* @pool: the dma pool holding the block +* @vaddr: virtual address of block +* @dma: dma address of block +* +* Caller promises neither device nor driver will again touch this block +* unless it is first re-allocated. +*/ +static inline void +pci_pool_free (pci_pool_t *pool, void *vaddr, dma_addr_t dma) +{ + UNREFERENCED_PARAMETER(dma); + MT_ASSERT( KeGetCurrentIrql() <= DISPATCH_LEVEL ); + ExFreeToNPagedLookasideList( &pool->pool_hdr, vaddr ); +} + + + +/** + * pci_pool_destroy - destroys a pool of dma memory blocks. + * @pool: dma pool that will be destroyed + * Context: !in_interrupt() + * + * Caller guarantees that no more memory from the pool is in use, + * and that nothing will try to use the pool after this call. + */ +static inline void +pci_pool_destroy (pci_pool_t *pool) +{ + ExDeleteNPagedLookasideList( &pool->pool_hdr ); + ExFreePool( pool); +} + + + +#endif diff --git a/branches/IBFD/hw/mthca/kernel/mt_reset_tavor.c b/branches/IBFD/hw/mthca/kernel/mt_reset_tavor.c new file mode 100644 index 00000000..399c2f16 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mt_reset_tavor.c @@ -0,0 +1,485 @@ +#include +#include +#include "hca_driver.h" +#include "mthca.h" +#include "hca_debug.h" +#include "Mt_l2w.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mt_reset_tavor.tmh" +#endif + + +#pragma warning(disable : 4996) + +/* limitations */ +#define N_BUSES 16 /* max number of PCI buses */ +#define N_DEVICES 32 /* max number of devices on one bus */ +#define N_FUNCTIONS 8 /* max number of functions on one device */ +#define N_CARDS 8 /* max number of HCA cards */ + +/*----------------------------------------------------------------*/ + +PWCHAR +WcharFindChar( + IN PWCHAR pi_BufStart, + IN PWCHAR pi_BufEnd, + IN WCHAR pi_FromPattern, + IN WCHAR pi_ToPattern + ) +/*++ + +Routine Description: + Converts wide-character string into ASCII + +Arguments: + + pi_BufStart.......... start of the source string + pi_BufEnd............ end of the source string + pi_FromPattern....... start of pattern range to find + pi_ToPattern......... end of pattern range to find + +Return Value: + + pointer to the first pattern found or NULL (when reached the end) + +--*/ +{ /* WcharFindChar */ + + PWCHAR l_pResult = pi_BufStart; + + while (l_pResult < pi_BufEnd ) + { + if (*l_pResult >= pi_FromPattern && *l_pResult <= pi_ToPattern) + return l_pResult; + l_pResult++; + } + + return NULL; + +} /* WcharFindChar */ + + +/*----------------------------------------------------------------*/ + +/* + * Function: MdGetDevLocation + * + * Parameters: + * IN pi_pPdo - PDO of a device in question + * OUT po_pBus - pointer to the bus number of the device in question + * OUT po_pDevFunc - pointer to dev/func of the device, if found + * + * Returns: + * not STATUS_SUCCESS - the device location was not found + * STATUS_SUCCESS - the device location was found and returned in OUT parameters + * + * Description: + * The function uses IoGetDeviceProperty to get the location of a device with given PDO + * + */ +static NTSTATUS +MdGetDevLocation( + IN PDEVICE_OBJECT pi_pPdo, + OUT ULONG * po_pBus, + OUT ULONG * po_pDevFunc + ) +{ + ULONG l_BusNumber, l_DevNumber, l_Function, l_ResultLength = 0; + WCHAR l_Buffer[40], *l_pEnd, *l_pBuf = l_Buffer, *l_pBufEnd = l_Buffer + sizeof(l_Buffer); + NTSTATUS l_Status; + UNICODE_STRING l_UnicodeNumber; + + /* prepare */ + l_ResultLength = 0; + RtlZeroMemory( l_Buffer, sizeof(l_Buffer) ); + + /* Get the device number */ + l_Status = IoGetDeviceProperty(pi_pPdo, + DevicePropertyLocationInformation, sizeof(l_Buffer), l_Buffer, &l_ResultLength); + + /* Verify if the function was successful */ + if ( !NT_SUCCESS(l_Status) || !l_ResultLength ) { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("(MdGetDevLocation) Unable to get device number: Status 0x%x, ResultSize %d \n", + l_Status, l_ResultLength )); + goto exit; + } + + // ALL THE BELOW CRAP WE DO INSTEAD OF + // sscanf(l_Buffer, "PCI bus %d, device %d, function %d", &l_BusNumber, &l_DevNumber, &l_Function ); + + /* take bus number */ + l_pBuf = WcharFindChar( l_pBuf, l_pBufEnd, L'0', L'9' ); + if (l_pBuf == NULL) goto err; + l_pEnd = WcharFindChar( l_pBuf, l_pBufEnd, L',', L',' ); + if (l_pEnd == NULL) goto err; + l_UnicodeNumber.Length = l_UnicodeNumber.MaximumLength = (USHORT)((PCHAR)l_pEnd - (PCHAR)l_pBuf); + l_UnicodeNumber.Buffer = l_pBuf; l_pBuf = l_pEnd; + RtlUnicodeStringToInteger( &l_UnicodeNumber, 10, &l_BusNumber); + + /* take slot number */ + l_pBuf = WcharFindChar( l_pBuf, l_pBufEnd, L'0', L'9' ); + if (l_pBuf == NULL) goto err; + l_pEnd = WcharFindChar( l_pBuf, l_pBufEnd, L',', L',' ); + if (l_pEnd == NULL) goto err; + l_UnicodeNumber.Length = l_UnicodeNumber.MaximumLength = (USHORT)((PCHAR)l_pEnd - (PCHAR)l_pBuf); + l_UnicodeNumber.Buffer = l_pBuf; l_pBuf = l_pEnd; + RtlUnicodeStringToInteger( &l_UnicodeNumber, 10, &l_DevNumber); + + /* take function number */ + *(l_Buffer + (l_ResultLength>>1)) = 0; /* set end of string */ + l_pBuf = WcharFindChar( l_pBuf, l_pBufEnd, L'0', L'9' ); + if (l_pBuf == NULL) goto err; + l_pEnd = WcharFindChar( l_pBuf, l_pBufEnd, 0, 0 ); + if (l_pEnd == NULL) goto err; + l_UnicodeNumber.Length = l_UnicodeNumber.MaximumLength = (USHORT)((PCHAR)l_pEnd - (PCHAR)l_pBuf); + l_UnicodeNumber.Buffer = l_pBuf; l_pBuf = l_pEnd; + RtlUnicodeStringToInteger( &l_UnicodeNumber, 10, &l_Function); + + /* return the results */ + *po_pBus = l_BusNumber; + *po_pDevFunc = (l_DevNumber & 0x01f) | ((l_Function & 7) << 5); + + goto exit; + +err: + l_Status = STATUS_UNSUCCESSFUL; +exit: + return l_Status; +} + +/*----------------------------------------------------------------*/ + +/* Function: SendAwaitIrpCompletion + * + * Parameters: + * + * Description: + * IRP completion routine + * + * Returns: + * pointer to the entry on SUCCESS + * NULL - otherwise + * +*/ +static +NTSTATUS +SendAwaitIrpCompletion ( + IN PDEVICE_OBJECT DeviceObject, + IN PIRP Irp, + IN PVOID Context + ) +{ + UNREFERENCED_PARAMETER (DeviceObject); + UNREFERENCED_PARAMETER (Irp); + KeSetEvent ((PKEVENT) Context, IO_NO_INCREMENT, FALSE); + return STATUS_MORE_PROCESSING_REQUIRED; // Keep this IRP +} + +/*------------------------------------------------------------------------------------------------------*/ + +/* + * Function: SendAwaitIrp + * + * Description: + * Create and send IRP stack down the stack and wait for the response (Blocking Mode) + * + * Parameters: + * pi_pDeviceExt.......... ointer to USB device extension + * pi_MajorCode........... IRP major code + * pi_MinorCode........... IRP minor code + * pi_pBuffer............. parameter buffer + * pi_nSize............... size of the buffer + * po_pInfo.............. returned field Information from IoStatus block + * + * Returns: + * pointer to the entry on SUCCESS + * NULL - otherwise + * +*/ +static +NTSTATUS +SendAwaitIrp( + IN PDEVICE_OBJECT pi_pFdo, + IN PDEVICE_OBJECT pi_pLdo, + IN ULONG pi_MajorCode, + IN ULONG pi_MinorCode, + IN PVOID pi_pBuffer, + IN int pi_nSize, + OUT PVOID * po_pInfo + ) +/*++ + + Routine Description: + + Create and send IRP stack down the stack and wait for the response ( +Blocking Mode) + + Arguments: + + pi_pFdo................ our device + pi_pLdo................ lower device + pi_MajorCode........... IRP major code + pi_MinorCode........... IRP minor code + pi_pBuffer............. parameter buffer + pi_nSize............... size of the buffer + + Returns: + + standard NTSTATUS return codes. + + Notes: + +--*/ +{ /* SendAwaitIrp */ + // Event + KEVENT l_hEvent; + // Pointer to IRP + PIRP l_pIrp; + // Stack location + PIO_STACK_LOCATION l_pStackLocation; + // Returned status + NTSTATUS l_Status; + // when to invoke + BOOLEAN InvokeAlways = TRUE; + + // call validation + if(KeGetCurrentIrql() != PASSIVE_LEVEL) + return STATUS_SUCCESS; + + // create event + KeInitializeEvent(&l_hEvent, NotificationEvent, FALSE); + + // build IRP request to USBD driver + l_pIrp = IoAllocateIrp( pi_pFdo->StackSize, FALSE ); + + // validate request + if (!l_pIrp) + { + //MdKdPrint( DBGLVL_MAXIMUM, ("(SendAwaitIrp) Unable to allocate IRP !\n")); + return STATUS_INSUFFICIENT_RESOURCES; + } + + // fill IRP + l_pIrp->IoStatus.Status = STATUS_NOT_SUPPORTED; + + // set completion routine + IoSetCompletionRoutine(l_pIrp,SendAwaitIrpCompletion, &l_hEvent, InvokeAlways, InvokeAlways, InvokeAlways); + + // fill stack location + l_pStackLocation = IoGetNextIrpStackLocation(l_pIrp); + l_pStackLocation->MajorFunction= (UCHAR)pi_MajorCode; + l_pStackLocation->MinorFunction= (UCHAR)pi_MinorCode; + RtlCopyMemory( &l_pStackLocation->Parameters, pi_pBuffer, pi_nSize ); + + // Call lower driver perform request + l_Status = IoCallDriver( pi_pLdo, l_pIrp ); + + // if the request not performed --> wait + if (l_Status == STATUS_PENDING) + { + // Wait until the IRP will be complete + KeWaitForSingleObject( + &l_hEvent, // event to wait for + Executive, // thread type (to wait into its context) + KernelMode, // mode of work + FALSE, // alertable + NULL // timeout + ); + l_Status = l_pIrp->IoStatus.Status; + } + + if (po_pInfo) + *po_pInfo = (PVOID)l_pIrp->IoStatus.Information; + + IoFreeIrp(l_pIrp); + return l_Status; + +} /* SendAwaitIrp */ + + +/*------------------------------------------------------------------------------------------------------*/ + +/* + * Function: FindBridgeIf_new + * + * Parameters: + * IN pi_pPdo - PDO of HCA's bus device + * IN pi_Bus, pi_DevFunc - bridge location + * OUT po_pPdo - pointer to PDO of the bridge, when found + * + * Returns: + * FALSE - the bridge was not found + * TRUE - a device was found; *po_pPdo contains its PDO + * + * Description: + * The function finds and opens the bus interface for Tavor HCA + * + * Algorithm: + * 1. find all PDOs of PCI.SYS driver and save it into an array; + * 2. For each PDO open its bus i/f and check whether it is our bridge; + * + * Note: + * 1. It is a "hack" algorithm. It uses some fields of system structures and some + * optimistic assumptions - see more below + * 2. We dangerously assume, that during part to of the algoritm no PDO will removed or added ! + * 3. PCI.SYS gives to its child devices names like \Device\NTPNP_PCI00nn. I tried to get Bridge's + * PDO by calling IoGetDeviceObjectPointer with all such names, but it returns STATUS_NO_SUCH_DEVICE + * for the *right* name of Bridge device !(IoGetDeviceObjectPointer really opens the device. Maybe Bridge is in exclusive use) + */ +int +FindBridgeIf( + IN hca_dev_ext_t *pi_ext, + OUT PBUS_INTERFACE_STANDARD pi_pInterface + ) +{ + NTSTATUS l_Status; + int rc = FALSE; /* result - "not found" by default */ + int n_pdos = 0; /* number of PCI.SYS's PDOs */ + PDEVICE_OBJECT *pdo; /* an array of PCI.SYS's PDOs */ + PDEVICE_OBJECT l_pHcaPdo; + + { // get HCA's bus PDO + IO_STACK_LOCATION l_Iosl; + PDEVICE_RELATIONS l_pDr; + + // find PDO of our bus driver (bypassing possible low filter drivers) + RtlZeroMemory( &l_Iosl, sizeof(l_Iosl) ); + l_Iosl.Parameters.QueryDeviceRelations.Type = TargetDeviceRelation; + l_Status = SendAwaitIrp( + pi_ext->cl_ext.p_self_do, + pi_ext->cl_ext.p_next_do, + IRP_MJ_PNP, + IRP_MN_QUERY_DEVICE_RELATIONS, + &l_Iosl.Parameters, + sizeof(l_Iosl.Parameters.QueryDeviceRelations), + &l_pDr + ); + + if (!NT_SUCCESS (l_Status)) { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("IRP_MN_QUERY_DEVICE_RELATIONS for TargetDeviceRelation failed (%#x);: Fdo %p, Ldo %p \n", + l_Status, pi_ext->cl_ext.p_self_do, pi_ext->cl_ext.p_next_do )); + goto exit; + } + + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM ,("IRP_MN_QUERY_DEVICE_RELATIONS for TargetDeviceRelation for Fdo %p, Ldo %p: num_of_PDOs %d, PDO %p \n", + pi_ext->cl_ext.p_self_do, pi_ext->cl_ext.p_next_do, l_pDr->Count, l_pDr->Objects[0] )); + l_pHcaPdo = l_pDr->Objects[0]; + } + + { // allocate and fill an array with all PCI.SYS PDO devices + // suppose that there is no more than N_PCI_DEVICES, belonging to PCI.SYS + #define N_PCI_DEVICES 256 + KIRQL irql; + PDRIVER_OBJECT l_pDrv; + PDEVICE_OBJECT l_pPdo; + int l_all_pdos = 0; + + pdo = (PDEVICE_OBJECT *)ExAllocatePoolWithTag( + NonPagedPool, + N_PCI_DEVICES * sizeof(PDEVICE_OBJECT), + MT_TAG_KERNEL ); + if (!pdo) + goto exit; + + // suppose, that PDOs are added only at PASSIVE_LEVEL + irql = KeRaiseIrqlToDpcLevel(); + + // get to the PCI.SYS driver + l_pDrv = l_pHcaPdo->DriverObject; + + // find and store all bus PDO s (because the bridge is a bus enumerated device) + for ( l_pPdo = l_pDrv->DeviceObject; l_pPdo; l_pPdo = l_pPdo->NextDevice ) { + l_all_pdos++; + if ( l_pPdo->Flags & DO_BUS_ENUMERATED_DEVICE ) { + pdo[n_pdos] = l_pPdo; + if (++n_pdos >= N_PCI_DEVICES) { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_SHIM , + ("There are more than %d children of PCI.SYS. Skipping the rest \n", N_PCI_DEVICES )); + break; + } + } + } + + // return to previous level + KeLowerIrql(irql); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SHIM ,("Found %d PCI.SYS's PDOs (from %d) \n", n_pdos, l_all_pdos )); + } + + { // Find PDO of the Bridge of our HCA and return open bus interface to it + int i; + ULONG data, l_SecBus; + IO_STACK_LOCATION l_Stack; // parameter buffer for the request + ULONG l_DevId = ((int)(23110) << 16) | PCI_VENDOR_ID_MELLANOX; + + // loop over all the PCI driver devices + for ( i = 0; i < n_pdos; ++i ) { + + // clean interface data + RtlZeroMemory( (PCHAR)pi_pInterface, sizeof(BUS_INTERFACE_STANDARD) ); + + // get Bus Interface for the current PDO + l_Stack.Parameters.QueryInterface.InterfaceType = (LPGUID) &GUID_BUS_INTERFACE_STANDARD; + l_Stack.Parameters.QueryInterface.Size = sizeof(BUS_INTERFACE_STANDARD); + l_Stack.Parameters.QueryInterface.Version = 1; + l_Stack.Parameters.QueryInterface.Interface = (PINTERFACE)pi_pInterface; + l_Stack.Parameters.QueryInterface.InterfaceSpecificData = NULL; + + l_Status =SendAwaitIrp( pi_ext->cl_ext.p_self_do, pdo[i], IRP_MJ_PNP, + IRP_MN_QUERY_INTERFACE, &l_Stack.Parameters, sizeof(l_Stack.Parameters), NULL); + if (!NT_SUCCESS (l_Status)) { + HCA_PRINT( TRACE_LEVEL_WARNING ,HCA_DBG_SHIM , + ("Failed to get bus interface for pdo[%d] %p, error %#x \n", i, pdo[i], l_Status )); + continue; + } + + // Read DevID + data = 0; + if (4 != pi_pInterface->GetBusData( pi_pInterface->Context, + PCI_WHICHSPACE_CONFIG, &data, 0, 4)) { + HCA_PRINT( TRACE_LEVEL_WARNING, HCA_DBG_PNP, + ("Failed to read DevID for pdo[%d] %p, data %#x \n", i, pdo[i], data )); + goto next_loop; + } + + if (data != l_DevId) { + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_PNP, + ("Not Tavor bridge: pdo[%d] %p, data %#x \n", i, pdo[i], data )); + goto next_loop; + } + + // Found Tavor Bridge - read its SecondaryBus + data = 0; + if (4 != pi_pInterface->GetBusData( pi_pInterface->Context, + PCI_WHICHSPACE_CONFIG, &data, 24, 4)) { /* 24 - PrimaryBus, 25 - SecondaryBus, 26 - SubordinateBus */ + HCA_PRINT( TRACE_LEVEL_WARNING, HCA_DBG_PNP, + ("Failed to read SecondaryBus for pdo[%d] %p, data %#x \n", i, pdo[i], data )); + goto next_loop; + } + + l_SecBus = (data >> 16) & 255; + if (l_SecBus != pi_ext->bus_number) { + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_PNP, + ("Wrong bridge for our HCA: pdo[%d] %p, SecBus %d, HcaBus %d \n", i, pdo[i], l_SecBus, pi_ext->bus_number )); + goto next_loop; + } + else { + ULONG l_DevFunc, l_Bus; + l_Status = MdGetDevLocation( pdo[i], &l_Bus, &l_DevFunc ); + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_PNP, + ("Found bridge for our HCA: pdo[%d] %p (bus %d, dev/func %d, HcaPdo %p), SecBus %d, HcaBus %d \n", + i, pdo[i], l_Bus, l_DevFunc, l_pHcaPdo, l_SecBus, pi_ext->bus_number )); + rc = TRUE; + break; + } + next_loop: + pi_pInterface->InterfaceDereference( pi_pInterface->Context ); + } + } + + ExFreePool(pdo); +exit: + return rc; +} diff --git a/branches/IBFD/hw/mthca/kernel/mt_spinlock.h b/branches/IBFD/hw/mthca/kernel/mt_spinlock.h new file mode 100644 index 00000000..57f3ea5a --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mt_spinlock.h @@ -0,0 +1,143 @@ +#ifndef MT_SPINLOCK_H +#define MT_SPINLOCK_H + +typedef struct spinlock { + KSPIN_LOCK lock; + +#ifdef SUPPORT_SPINLOCK_ISR + PKINTERRUPT p_int_obj; + KIRQL irql; +#endif +} spinlock_t; + +typedef struct { + KLOCK_QUEUE_HANDLE lockh; + KIRQL irql; +} spinlockh_t; + +#ifdef SUPPORT_SPINLOCK_ISR + +static inline void +spin_lock_setint( + IN spinlock_t* const l, + IN PKINTERRUPT p_int_obj ) +{ + MT_ASSERT( l ); + l->p_int_obj = p_int_obj; +} + +static inline void spin_lock_irq_init( + IN spinlock_t* const l, + IN PKINTERRUPT int_obj + ) +{ + KeInitializeSpinLock( &l->lock ); + l->p_int_obj = int_obj; +} + +static inline unsigned long +spin_lock_irq( + IN spinlock_t* const l) +{ + MT_ASSERT( l ); + MT_ASSERT( l->p_int_obj ); + return (unsigned long)(l->irql = KeAcquireInterruptSpinLock ( l->p_int_obj )); +} + +static inline void +spin_unlock_irq( + IN spinlock_t* const p_spinlock ) +{ + MT_ASSERT( p_spinlock ); + MT_ASSERT( p_spinlock->p_int_obj ); + KeReleaseInterruptSpinLock ( p_spinlock->p_int_obj, p_spinlock->irql ); +} + +#endif + +#define SPIN_LOCK_PREP(lh) spinlockh_t lh + +static inline void spin_lock_init( + IN spinlock_t* const p_spinlock ) +{ + KeInitializeSpinLock( &p_spinlock->lock ); +} + +static inline void +spin_lock( + IN spinlock_t* const l, + IN spinlockh_t * const lh) +{ + KIRQL irql = KeGetCurrentIrql(); + + MT_ASSERT( l || lh ); + ASSERT(irql <= DISPATCH_LEVEL); + + if (irql == DISPATCH_LEVEL) + KeAcquireInStackQueuedSpinLockAtDpcLevel( &l->lock, &lh->lockh ); + else + KeAcquireInStackQueuedSpinLock( &l->lock, &lh->lockh ); + lh->irql = irql; +} + +static inline void +spin_unlock( + IN spinlockh_t * const lh) +{ + MT_ASSERT( lh ); + ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL); + if (lh->irql == DISPATCH_LEVEL) + KeReleaseInStackQueuedSpinLockFromDpcLevel( &lh->lockh ); + else + KeReleaseInStackQueuedSpinLock( &lh->lockh ); +} + +static inline void +spin_lock_sync( + IN spinlock_t* const l ) +{ + KLOCK_QUEUE_HANDLE lockh; + MT_ASSERT( l ); + ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL); + KeAcquireInStackQueuedSpinLock ( &l->lock, &lockh ); + KeReleaseInStackQueuedSpinLock( &lockh ); +} + +/* to be used only at DPC level */ +static inline void +spin_lock_dpc( + IN spinlock_t* const l, + IN spinlockh_t * const lh) +{ + MT_ASSERT( l || lh ); + ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL); + KeAcquireInStackQueuedSpinLockAtDpcLevel( &l->lock, &lh->lockh ); +} + +/* to be used only at DPC level */ +static inline void +spin_unlock_dpc( + IN spinlockh_t * const lh) +{ + ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL); + KeReleaseInStackQueuedSpinLockFromDpcLevel( &lh->lockh ); +} + + +/* we are working from DPC level, so we can use usual spinlocks */ +#define spin_lock_irq spin_lock +#define spin_unlock_irq spin_unlock + +/* no diff in Windows */ +#define spin_lock_irqsave spin_lock_irq +#define spin_unlock_irqrestore spin_unlock_irq + +/* Windows doesn't support such kind of spinlocks so far, but may be tomorrow ... */ +#define rwlock_init spin_lock_init +#define read_lock_irqsave spin_lock_irqsave +#define read_unlock_irqrestore spin_unlock_irqrestore +#define write_lock_irq spin_lock_irq +#define write_unlock_irq spin_unlock_irq + +#endif + diff --git a/branches/IBFD/hw/mthca/kernel/mt_sync.h b/branches/IBFD/hw/mthca/kernel/mt_sync.h new file mode 100644 index 00000000..90d3f38c --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mt_sync.h @@ -0,0 +1,109 @@ +#ifndef MT_SYNC_H +#define MT_SYNC_H + +// literals +#ifndef LONG_MAX +#define LONG_MAX 2147483647L /* maximum (signed) long value */ +#endif + + +// mutex wrapper + +// suitable both for mutexes and semaphores +static inline void down(PRKMUTEX p_mutex) +{ + NTSTATUS status; + int need_to_wait = 1; + + ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL); + while (need_to_wait) { + status = KeWaitForSingleObject( p_mutex, Executive, KernelMode, FALSE, NULL ); + if (status == STATUS_SUCCESS) + break; + } +} + +// suitable both for mutexes and semaphores +static inline int down_interruptible(PRKMUTEX p_mutex) +{ + NTSTATUS status; + + ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL); + status = KeWaitForSingleObject( p_mutex, Executive, KernelMode, TRUE, NULL ); + if (status == STATUS_SUCCESS) + return 0; + return -EINTR; +} + +#define sem_down(ptr) down((PRKMUTEX)(ptr)) +#define sem_down_interruptible(ptr) down_interruptible((PRKMUTEX)(ptr)) + +static inline void up(PRKMUTEX p_mutex) +{ + ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL); + KeReleaseMutex( p_mutex, FALSE ); +} + +static inline void sem_up(PRKSEMAPHORE p_sem) +{ + ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL); + KeReleaseSemaphore( p_sem, 0, 1, FALSE ); +} + +static inline void sem_init( + IN PRKSEMAPHORE p_sem, + IN LONG cnt, + IN LONG limit) +{ + ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL); + KeInitializeSemaphore( p_sem, cnt, limit ); +} + + +typedef struct wait_queue_head { + KEVENT event; +} wait_queue_head_t; + +static inline void wait_event(wait_queue_head_t *obj_p, int condition) +{ + NTSTATUS status; + int need_to_wait = 1; + MT_ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL); + if (condition) + return; + while (need_to_wait) { + status = KeWaitForSingleObject( &obj_p->event, Executive, KernelMode, FALSE, NULL ); + if (status == STATUS_SUCCESS) + break; + } +} + +static inline void wake_up(wait_queue_head_t *obj_p) +{ + MT_ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL); + KeSetEvent( &obj_p->event, 0, FALSE ); +} + +static inline void init_waitqueue_head(wait_queue_head_t *obj_p) +{ + //TODO: ASSERT is temporary outcommented, because using of fast mutexes in CompLib + // cause working on APC_LEVEL + //ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL); + KeInitializeEvent( &obj_p->event, NotificationEvent , FALSE ); +} + +static inline void free_irq(PKINTERRUPT int_obj) +{ + IoDisconnectInterrupt( int_obj ); +} + +int request_irq( + IN CM_PARTIAL_RESOURCE_DESCRIPTOR *int_info, /* interrupt resources */ + IN KSPIN_LOCK *isr_lock, /* spin lcok for ISR */ + IN PKSERVICE_ROUTINE isr, /* ISR */ + IN void *isr_ctx, /* ISR context */ + OUT PKINTERRUPT *int_obj /* interrupt object */ + ); + + +#endif diff --git a/branches/IBFD/hw/mthca/kernel/mt_types.h b/branches/IBFD/hw/mthca/kernel/mt_types.h new file mode 100644 index 00000000..efe9a857 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mt_types.h @@ -0,0 +1,60 @@ +#ifndef MT_TYPES_H +#define MT_TYPES_H + +//#include +#pragma warning( push ) +#include + #include +#pragma warning( pop ) + +// =========================================== +// SUBSTITUTES +// =========================================== + +// gcc compiler attributes +#define __iomem +#define likely(x) (x) +#define unlikely(x) (x) + +// container_of +#define container_of CONTAINING_RECORD + +// inline +#define inline __inline + +// =========================================== +// TYPES +// =========================================== + +// basic types +typedef unsigned char u8, __u8; +typedef unsigned short int u16, __u16; +typedef unsigned int u32, __u32; +typedef unsigned __int64 u64, __u64; +typedef char s8, __s8; +typedef short int s16, __s16; +typedef int s32, __s32; +typedef __int64 s64, __s64; + +// inherited +typedef u16 __le16; +typedef u16 __be16; +typedef u32 __le32; +typedef u32 __be32; +typedef u64 __le64; +typedef u64 __be64; +typedef u64 dma_addr_t; +typedef u64 io_addr_t; + +// =========================================== +// MACROS +// =========================================== + +// assert +#ifdef _DEBUG_ +#define MT_ASSERT( exp ) (void)(!(exp)?DbgPrint("Assertion Failed:" #exp "\n"),DbgBreakPoint(),FALSE:TRUE) +#else +#define MT_ASSERT( exp ) +#endif /* _DEBUG_ */ + +#endif diff --git a/branches/IBFD/hw/mthca/kernel/mt_ud_header.c b/branches/IBFD/hw/mthca/kernel/mt_ud_header.c new file mode 100644 index 00000000..e649c53a --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mt_ud_header.c @@ -0,0 +1,280 @@ +/* + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mt_ud_header.tmh" +#endif +#include + +#define STRUCT_FIELD_INIT(header, field,ow,ob,sb) \ + offsetof(struct ib_unpacked_ ## header, field), \ + sizeof ((struct ib_unpacked_ ## header *) 0)->field, \ + ow,ob,sb, \ + #header ":" #field + +#define STRUCT_FIELD_INITR(ow,ob,sb) \ + 0, 0, ow, ob, sb, "reserved" + +static const struct ib_field lrh_table[] = { + { STRUCT_FIELD_INIT(lrh, virtual_lane, 0, 0, 4) }, + { STRUCT_FIELD_INIT(lrh, link_version, 0, 4, 4) }, + { STRUCT_FIELD_INIT(lrh, service_level, 0, 8, 4) }, + { STRUCT_FIELD_INITR(0,12,2) }, + { STRUCT_FIELD_INIT(lrh, link_next_header, 0, 14, 2) }, + { STRUCT_FIELD_INIT(lrh, destination_lid, 0, 16, 16) }, + { STRUCT_FIELD_INITR(1,0,5) }, + { STRUCT_FIELD_INIT(lrh, packet_length, 1, 5, 11) }, + { STRUCT_FIELD_INIT(lrh, source_lid, 1, 16, 16) } +}; + +static const struct ib_field grh_table[] = { + { STRUCT_FIELD_INIT(grh, ip_version, 0, 0, 4) }, + { STRUCT_FIELD_INIT(grh, traffic_class, 0, 4, 8) }, + { STRUCT_FIELD_INIT(grh, flow_label, 0, 12, 20) }, + { STRUCT_FIELD_INIT(grh, payload_length, 1, 0, 16) }, + { STRUCT_FIELD_INIT(grh, next_header, 1, 16, 8) }, + { STRUCT_FIELD_INIT(grh, hop_limit, 1, 24, 8) }, + { STRUCT_FIELD_INIT(grh, source_gid, 2, 0, 128) }, + { STRUCT_FIELD_INIT(grh, destination_gid, 6, 0, 128) } +}; + +static const struct ib_field bth_table[] = { + { STRUCT_FIELD_INIT(bth, opcode, 0, 0, 8) }, + { STRUCT_FIELD_INIT(bth, solicited_event, 0, 8, 1) }, + { STRUCT_FIELD_INIT(bth, mig_req, 0, 9, 1) }, + { STRUCT_FIELD_INIT(bth, pad_count, 0, 10, 2) }, + { STRUCT_FIELD_INIT(bth, transport_header_version, 0, 12, 4) }, + { STRUCT_FIELD_INIT(bth, pkey, 0, 16, 16) }, + { STRUCT_FIELD_INITR(1,0,8) }, + { STRUCT_FIELD_INIT(bth, destination_qpn, 1, 8, 24) }, + { STRUCT_FIELD_INIT(bth, ack_req, 2, 0, 1) }, + { STRUCT_FIELD_INITR(2,1,7) }, + { STRUCT_FIELD_INIT(bth, psn, 2, 8, 24) } +}; + +static const struct ib_field deth_table[] = { + { STRUCT_FIELD_INIT(deth, qkey, 0, 0, 32) }, + { STRUCT_FIELD_INITR(1,0,8) }, + { STRUCT_FIELD_INIT(deth, source_qpn, 1, 8, 24) } +}; + + +/** + * ib_ud_header_init - Initialize UD header structure + * @payload_bytes:Length of packet payload + * @grh_present:GRH flag (if non-zero, GRH will be included) + * @header:Structure to initialize + * + * ib_ud_header_init() initializes the lrh.link_version, lrh.link_next_header, + * lrh.packet_length, grh.ip_version, grh.payload_length, + * grh.next_header, bth.opcode, bth.pad_count and + * bth.transport_header_version fields of a &struct ib_ud_header given + * the payload length and whether a GRH will be included. + */ +void ib_ud_header_init(int payload_bytes, + int grh_present, + struct ib_ud_header *header) +{ + int header_len; + u16 packet_length; + + RtlZeroMemory(header, sizeof *header); + + header_len = + IB_LRH_BYTES + + IB_BTH_BYTES + + IB_DETH_BYTES; + if (grh_present) { + header_len += IB_GRH_BYTES; + } + + header->lrh.link_version = 0; + header->lrh.link_next_header = + (u8)(grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL); + packet_length = (u16)((IB_LRH_BYTES + + IB_BTH_BYTES + + IB_DETH_BYTES + + payload_bytes + + 4 + /* ICRC */ + 3) / 4); /* round up */ + + header->grh_present = grh_present; + if (grh_present) { + packet_length += IB_GRH_BYTES / 4; + header->grh.ip_version = 6; + header->grh.payload_length = + cl_hton16((u16)((IB_BTH_BYTES + + IB_DETH_BYTES + + payload_bytes + + 4 + /* ICRC */ + 3) & ~3)); /* round up */ + header->grh.next_header = 0x1b; + } + + header->lrh.packet_length = cl_hton16(packet_length); + + if (header->immediate_present) + header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; + else + header->bth.opcode = IB_OPCODE_UD_SEND_ONLY; + header->bth.pad_count = (u8)((4 - payload_bytes) & 3); + header->bth.transport_header_version = 0; +} + +/** + * ib_ud_header_pack - Pack UD header struct into wire format + * @header:UD header struct + * @buf:Buffer to pack into + * + * ib_ud_header_pack() packs the UD header structure @header into wire + * format in the buffer @buf. + */ +int ib_ud_header_pack(struct ib_ud_header *header, + u8 *buf) +{ + int len = 0; + + ib_pack(lrh_table, ARRAY_SIZE(lrh_table), + &header->lrh, buf); + len += IB_LRH_BYTES; + + if (header->grh_present) { + ib_pack(grh_table, ARRAY_SIZE(grh_table), + &header->grh, buf + len); + len += IB_GRH_BYTES; + } + + ib_pack(bth_table, ARRAY_SIZE(bth_table), + &header->bth, buf + len); + len += IB_BTH_BYTES; + + ib_pack(deth_table, ARRAY_SIZE(deth_table), + &header->deth, buf + len); + len += IB_DETH_BYTES; + + if (header->immediate_present) { + memcpy(buf + len, &header->immediate_data, sizeof header->immediate_data); + len += sizeof header->immediate_data; + } + + return len; +} + +/** + * ib_ud_header_unpack - Unpack UD header struct from wire format + * @header:UD header struct + * @buf:Buffer to pack into + * + * ib_ud_header_pack() unpacks the UD header structure @header from wire + * format in the buffer @buf. + */ +int ib_ud_header_unpack(u8 *buf, + struct ib_ud_header *header) +{ + ib_unpack(lrh_table, ARRAY_SIZE(lrh_table), + buf, &header->lrh); + buf += IB_LRH_BYTES; + + if (header->lrh.link_version != 0) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Invalid LRH.link_version %d\n", + header->lrh.link_version)); + return -EINVAL; + } + + switch (header->lrh.link_next_header) { + case IB_LNH_IBA_LOCAL: + header->grh_present = 0; + break; + + case IB_LNH_IBA_GLOBAL: + header->grh_present = 1; + ib_unpack(grh_table, ARRAY_SIZE(grh_table), + buf, &header->grh); + buf += IB_GRH_BYTES; + + if (header->grh.ip_version != 6) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Invalid GRH.ip_version %d\n", + header->grh.ip_version)); + return -EINVAL; + } + if (header->grh.next_header != 0x1b) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Invalid GRH.next_header 0x%02x\n", + header->grh.next_header)); + return -EINVAL; + } + break; + + default: + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Invalid LRH.link_next_header %d\n", + header->lrh.link_next_header)); + return -EINVAL; + } + + ib_unpack(bth_table, ARRAY_SIZE(bth_table), + buf, &header->bth); + buf += IB_BTH_BYTES; + + switch (header->bth.opcode) { + case IB_OPCODE_UD_SEND_ONLY: + header->immediate_present = 0; + break; + case IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE: + header->immediate_present = 1; + break; + default: + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Invalid BTH.opcode 0x%02x\n", + header->bth.opcode)); + return -EINVAL; + } + + if (header->bth.transport_header_version != 0) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Invalid BTH.transport_header_version %d\n", + header->bth.transport_header_version)); + return -EINVAL; + } + + ib_unpack(deth_table, ARRAY_SIZE(deth_table), + buf, &header->deth); + buf += IB_DETH_BYTES; + + if (header->immediate_present) + memcpy(&header->immediate_data, buf, sizeof header->immediate_data); + + return 0; +} diff --git a/branches/IBFD/hw/mthca/kernel/mt_uverbs.c b/branches/IBFD/hw/mthca/kernel/mt_uverbs.c new file mode 100644 index 00000000..0e4e5674 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mt_uverbs.c @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include +#include + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mt_uverbs.tmh" +#endif + + +//TODO: all this module is to be eliminated !! + + +static void ib_uverbs_add_one(struct ib_device *device); +static void ib_uverbs_remove_one(struct ib_device *device); + +static struct ib_client uverbs_client = { + "uverbs", + ib_uverbs_add_one, + ib_uverbs_remove_one +}; + +struct ib_uverbs_device { + struct ib_device *ib_dev; +}; + +static void ib_uverbs_add_one(struct ib_device *device) +{ + struct ib_uverbs_device *uverbs_dev; + + if (!device->alloc_ucontext) + return; + + uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL); + if (!uverbs_dev) + return; + + ib_set_client_data(device, &uverbs_client, uverbs_dev); +} + +static void ib_uverbs_remove_one(struct ib_device *device) +{ + struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client); + + if (uverbs_dev) + kfree(uverbs_dev); +} + +int ib_uverbs_init(void) +{ + int ret; + + ret = ib_register_client(&uverbs_client); + if (ret) + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("user_verbs: couldn't register client\n")); + + return ret; +} + +void ib_uverbs_cleanup(void) +{ + ib_unregister_client(&uverbs_client); +} + diff --git a/branches/IBFD/hw/mthca/kernel/mt_verbs.c b/branches/IBFD/hw/mthca/kernel/mt_verbs.c new file mode 100644 index 00000000..29955e3c --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mt_verbs.c @@ -0,0 +1,935 @@ +/* + * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include +#include +#include "mthca_dev.h" +#include "mx_abi.h" + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mt_verbs.tmh" +#endif + + +void ibv_um_close( struct ib_ucontext * h_um_ca ) +{ + int err; + ib_api_status_t status; + struct ib_ucontext *context_p = (struct ib_ucontext *)h_um_ca; + PREP_IBDEV_FOR_PRINT(context_p->device); + + HCA_ENTER(HCA_DBG_SHIM); + + context_p->is_removing = TRUE; + + if (atomic_read(&context_p->usecnt)) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SHIM, + ("resources are not released (cnt %d)\n", context_p->usecnt)); + status = IB_RESOURCE_BUSY; + goto err_usage; + } + + err = ibv_dealloc_pd( context_p->pd ); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SHIM, + ("ibv_dealloc_pd failed (%d)\n", err)); + status = errno_to_iberr(err); + } + + err = mthca_dealloc_ucontext(context_p); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SHIM, + ("mthca_dealloc_ucontext failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_dealloc_ucontext; + } + + HCA_PRINT(TRACE_LEVEL_INFORMATION,HCA_DBG_SHIM, + ("pcs %p\n", PsGetCurrentProcess()) ); + status = IB_SUCCESS; + goto end; + +err_dealloc_ucontext: +err_usage: +end: + if (status != IB_SUCCESS) + { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_SHIM, + ("completes with ERROR status %s\n", IB_GET_ERR_STR(status))); + } + HCA_EXIT(HCA_DBG_SHIM); + return; +} + +/* Protection domains */ + +struct ib_pd *ibv_alloc_pd(struct ib_device *device, + struct ib_ucontext *context, ci_umv_buf_t* const p_umv_buf) +{ + struct ib_pd *pd; + + // direct call is a must, because "lifefish" devices doesn't fill driver i/f table + pd = mthca_alloc_pd(device, context, p_umv_buf); + + if (!IS_ERR(pd)) { + pd->device = device; + pd->ucontext = context; + atomic_set(&pd->usecnt, 0); + KeInitializeMutex( &pd->mutex, 0 ); + INIT_LIST_HEAD( &pd->list ); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_CQ ,("PD%d use cnt %d, pd_handle %p, ctx %p \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt, pd, pd->ucontext)); + } + + return pd; +} + +int ibv_dealloc_pd(struct ib_pd *pd) +{ + if (mthca_is_livefish(to_mdev(pd->device))) + goto done; + + // we need first to release list of AV MRs to decrease pd->usecnt + if (pd->ucontext) { + struct ib_mr *ib_mr, *tmp; + down(&pd->mutex ); + list_for_each_entry_safe(ib_mr, tmp, &pd->list, list,struct ib_mr,struct ib_mr) { + ibv_dereg_mr( ib_mr ); + } + up(&pd->mutex ); + } + + if (atomic_read(&pd->usecnt)) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_CQ,("resources are not released (cnt %d)\n", pd->usecnt)); + return -EBUSY; + } + +done: + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_CQ ,("PD%d use cnt %d, pd_handle %p, ctx %p \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt, pd, pd->ucontext)); + // direct call is a must, because "lifefish" devices doesn't fill driver i/f table + return mthca_dealloc_pd(pd); +} + +/* Address handles */ + +struct ib_ah *ibv_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_ucontext *context, ci_umv_buf_t* const p_umv_buf) +{ + int err; + struct ib_ah *ah; + struct ib_mr *ib_mr = NULL; + u64 start = 0; + u64 user_handle = 0; + struct ibv_create_ah_resp *create_ah_resp = 0; + + // for user call we need also allocate MR + if (context && p_umv_buf && p_umv_buf->p_inout_buf) { + struct ibv_create_ah *create_ah = (struct ibv_create_ah *)(void*)p_umv_buf->p_inout_buf; + + // create region; destroy will be done on dealloc_pd + ib_mr = ibv_reg_mr( + pd, + create_ah->mr.access_flags, + (void*)(ULONG_PTR)create_ah->mr.start, + create_ah->mr.length, create_ah->mr.hca_va, TRUE ); + if (IS_ERR(ib_mr)) { + err = PTR_ERR(ib_mr); + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_AV ,("ibv_reg_mr failed (%d)\n", err)); + goto err_alloc_mr; + } + + start = create_ah->mr.start; + user_handle = create_ah->user_handle; + + // chain this MR to PD list + down(&pd->mutex ); + list_add_tail(&ib_mr->list, &pd->list); + up(&pd->mutex ); + } + + ah = pd->device->create_ah(pd, ah_attr); + + /* fill obligatory fields */ + if (context && p_umv_buf && p_umv_buf->p_inout_buf) { + create_ah_resp = (struct ibv_create_ah_resp *)(void*)p_umv_buf->p_inout_buf; + create_ah_resp->user_handle = user_handle; + } + + if (IS_ERR(ah)) { + err = PTR_ERR(ah); + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_AV ,("create_ah failed (%d)\n", err)); + goto err_create_ah; + } + + // fill results + ah->device = pd->device; + ah->pd = pd; + ah->ucontext = context; + atomic_inc(&pd->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_AV ,("PD%d use cnt %d, pd_handle %p, ctx %p \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt, pd, pd->ucontext)); + + // fill results for user + if (context && p_umv_buf && p_umv_buf->p_inout_buf) { + struct ibv_create_ah_resp *create_ah_resp = (struct ibv_create_ah_resp *)(void*)p_umv_buf->p_inout_buf; + create_ah_resp->start = start; + create_ah_resp->mr.lkey = ib_mr->lkey; + create_ah_resp->mr.rkey = ib_mr->rkey; + create_ah_resp->mr.mr_handle = (u64)(ULONG_PTR)ib_mr; + p_umv_buf->output_size = sizeof(struct ibv_create_ah_resp); + } + + return ah; + +err_create_ah: + if (ib_mr) + ibv_dereg_mr(ib_mr); +err_alloc_mr: + if( p_umv_buf && p_umv_buf->command ) + p_umv_buf->status = IB_ERROR; + return ERR_PTR(ib_mr); +} + +struct ib_ah *ibv_create_ah_from_wc(struct ib_pd *pd, struct _ib_wc *wc, + struct ib_grh *grh, u8 port_num) +{ + struct ib_ah_attr ah_attr; + u32 flow_class; + u16 gid_index; + int ret; + + memset(&ah_attr, 0, sizeof ah_attr); + ah_attr.dlid = wc->recv.ud.remote_lid; + ah_attr.sl = wc->recv.ud.remote_sl; + ah_attr.src_path_bits = wc->recv.ud.path_bits; + ah_attr.port_num = port_num; + + if (wc->recv.ud.recv_opt & IB_RECV_OPT_GRH_VALID) { + ah_attr.ah_flags = IB_AH_GRH; + ah_attr.grh.dgid = grh->dgid; + + ret = ib_find_cached_gid(pd->device, &grh->sgid, &port_num, + &gid_index); + if (ret) + return ERR_PTR(ret); + + ah_attr.grh.sgid_index = (u8) gid_index; + flow_class = cl_ntoh32(grh->version_tclass_flow); + ah_attr.grh.flow_label = flow_class & 0xFFFFF; + ah_attr.grh.traffic_class = (u8)((flow_class >> 20) & 0xFF); + ah_attr.grh.hop_limit = grh->hop_limit; + } + + return ibv_create_ah(pd, &ah_attr, NULL, NULL); +} + +int ibv_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) +{ + return ah->device->modify_ah ? + ah->device->modify_ah(ah, ah_attr) : + -ENOSYS; +} + +int ibv_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) +{ + return ah->device->query_ah ? + ah->device->query_ah(ah, ah_attr) : + -ENOSYS; +} + + +static void release_user_cq_qp_resources( + struct ib_ucontext *ucontext, + struct ib_mr * ib_mr) +{ + if (ucontext) { + ibv_dereg_mr( ib_mr ); + atomic_dec(&ucontext->usecnt); + if (!atomic_read(&ucontext->usecnt) && ucontext->is_removing) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SHIM ,("User resources are released. Removing context\n")); + ibv_um_close(ucontext); + } + } +} + +int ibv_destroy_ah(struct ib_ah *ah) +{ + struct ib_pd *pd; + int ret; + + HCA_ENTER(HCA_DBG_AV); + pd = ah->pd; + + ret = ah->device->destroy_ah(ah); + if (!ret) { + atomic_dec(&pd->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_AV ,("PD%d use cnt %d, pd_handle %p, ctx %p \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt, pd, pd->ucontext)); + } + HCA_EXIT(HCA_DBG_AV); + return ret; +} + +/* Shared receive queues */ + +struct ib_srq *ibv_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_ucontext *context, ci_umv_buf_t* const p_umv_buf) +{ + int err; + struct ib_srq *ib_srq; + struct ib_mr *ib_mr = NULL; + u64 user_handle = 0; + struct ibv_create_srq_resp *create_srq_resp = 0; + + // for user call we need also allocate MR + if (context && p_umv_buf && p_umv_buf->p_inout_buf) { + struct ibv_create_srq *create_srp = (struct ibv_create_srq *)(void*)p_umv_buf->p_inout_buf; + + // create region + ib_mr = ibv_reg_mr( + (struct ib_pd *)(ULONG_PTR)create_srp->mr.pd_handle, + create_srp->mr.access_flags, + (void*)(ULONG_PTR)create_srp->mr.start, + create_srp->mr.length, create_srp->mr.hca_va, TRUE ); + if (IS_ERR(ib_mr)) { + err = PTR_ERR(ib_mr); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("ibv_reg_mr failed (%d)\n", err)); + goto err_alloc_mr; + } + create_srp->lkey = ib_mr->lkey; + user_handle = create_srp->user_handle; + } + + ib_srq = pd->device->create_srq(pd, srq_init_attr, p_umv_buf); + + /* fill obligatory fields */ + if (context && p_umv_buf && p_umv_buf->p_inout_buf) { + create_srq_resp = (struct ibv_create_srq_resp *)(void*)p_umv_buf->p_inout_buf; + create_srq_resp->user_handle = user_handle; + } + + if (IS_ERR(ib_srq)) { + err = PTR_ERR(ib_srq); + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP ,("create_srq failed (%d)\n", err)); + goto err_create_srq; + } + + // fill results + ib_srq->device = pd->device; + ib_srq->pd = pd; + ib_srq->ucontext = context; + ib_srq->event_handler = srq_init_attr->event_handler; + ib_srq->srq_context = srq_init_attr->srq_context; + atomic_inc(&pd->usecnt); + atomic_set(&ib_srq->usecnt, 0); + if (context) + atomic_inc(&context->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_QP ,("PD%d use cnt %d, pd_handle %p, ctx %p \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt, pd, pd->ucontext)); + + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_SRQ , + ("uctx %p, qhndl %p, qnum %#x \n", + pd->ucontext, ib_srq, ((struct mthca_srq*)ib_srq)->srqn ) ); + + // fill results for user + if (context && p_umv_buf && p_umv_buf->p_inout_buf) { + struct mthca_srq *srq = (struct mthca_srq *)ib_srq; + ib_srq->ib_mr = ib_mr; + create_srq_resp->mr.lkey = ib_mr->lkey; + create_srq_resp->mr.rkey = ib_mr->rkey; + create_srq_resp->mr.mr_handle = (u64)(ULONG_PTR)ib_mr; + create_srq_resp->srq_handle = (__u64)(ULONG_PTR)srq; + create_srq_resp->max_wr = (mthca_is_memfree(to_mdev(pd->device))) ? srq->max - 1 : srq->max; + create_srq_resp->max_sge = srq->max_gs; + create_srq_resp->srqn= srq->srqn; + p_umv_buf->output_size = sizeof(struct ibv_create_srq_resp); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_QP ,("PD%d use cnt %d \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt)); + } + + return ib_srq; + +err_create_srq: + if (ib_mr) + ibv_dereg_mr(ib_mr); +err_alloc_mr: + if( p_umv_buf && p_umv_buf->command ) + p_umv_buf->status = IB_ERROR; + HCA_EXIT(HCA_DBG_QP); + return ERR_PTR(err); +} + +int ibv_modify_srq(struct ib_srq *srq, + ib_srq_attr_t *srq_attr, + ib_srq_attr_mask_t srq_attr_mask) +{ + return srq->device->modify_srq(srq, srq_attr, srq_attr_mask); +} + +int ibv_query_srq(struct ib_srq *srq, + ib_srq_attr_t *srq_attr) +{ + return srq->device->query_srq(srq, srq_attr); +} + +int ibv_destroy_srq(struct ib_srq *srq) +{ + int ret; + struct ib_pd *pd = srq->pd; + struct ib_ucontext *ucontext = pd->ucontext; + struct ib_mr * ib_mr = srq->ib_mr; + + ret = srq->device->destroy_srq(srq); + if (!ret) { + atomic_dec(&pd->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_SRQ ,("PD%d use cnt %d, pd_handle %p, ctx %p \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt, pd, pd->ucontext)); + release_user_cq_qp_resources(ucontext, ib_mr); + } + + return ret; +} + +/* Queue pairs */ + +struct ib_qp *ibv_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + struct ib_ucontext *context, ci_umv_buf_t* const p_umv_buf) +{ + int err; + struct ib_qp *ib_qp; + struct ib_mr *ib_mr = NULL; + u64 user_handle = 0; + + HCA_ENTER(HCA_DBG_QP); + + // for user call we need also allocate MR + if (context && p_umv_buf && p_umv_buf->p_inout_buf) { + struct ibv_create_qp *create_qp = (struct ibv_create_qp *)(void*)p_umv_buf->p_inout_buf; + + // create region + ib_mr = ibv_reg_mr( + (struct ib_pd *)(ULONG_PTR)create_qp->mr.pd_handle, + create_qp->mr.access_flags, + (void*)(ULONG_PTR)create_qp->mr.start, + create_qp->mr.length, create_qp->mr.hca_va, TRUE ); + if (IS_ERR(ib_mr)) { + err = PTR_ERR(ib_mr); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("ibv_reg_mr failed (%d)\n", err)); + goto err_alloc_mr; + } + create_qp->lkey = ib_mr->lkey; + user_handle = create_qp->user_handle; + } + + ib_qp = pd->device->create_qp(pd, qp_init_attr, p_umv_buf); + + if (IS_ERR(ib_qp)) { + err = PTR_ERR(ib_qp); + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP ,("create_qp failed (%d)\n", err)); + goto err_create_qp; + } + + // fill results + ib_qp->device = pd->device; + ib_qp->pd = pd; + ib_qp->send_cq = qp_init_attr->send_cq; + ib_qp->recv_cq = qp_init_attr->recv_cq; + ib_qp->srq = qp_init_attr->srq; + ib_qp->ucontext = context; + ib_qp->event_handler = qp_init_attr->event_handler; + ib_qp->qp_context = qp_init_attr->qp_context; + ib_qp->qp_type = qp_init_attr->qp_type; + atomic_inc(&pd->usecnt); + atomic_inc(&qp_init_attr->send_cq->usecnt); + atomic_inc(&qp_init_attr->recv_cq->usecnt); + if (qp_init_attr->srq) + atomic_inc(&qp_init_attr->srq->usecnt); + if (context) + atomic_inc(&context->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_QP ,("PD%d use cnt %d, pd_handle %p, ctx %p \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt, pd, pd->ucontext)); + + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_QP , + ("uctx %p, qhndl %p, qnum %#x, q_num %#x, scq %#x:%#x, rcq %#x:%#x \n", + pd->ucontext, ib_qp, ((struct mthca_qp*)ib_qp)->qpn, ib_qp->qp_num, + ((struct mthca_cq*)ib_qp->send_cq)->cqn, ib_qp->send_cq->cqe, + ((struct mthca_cq*)ib_qp->recv_cq)->cqn, ib_qp->recv_cq->cqe ) ); + + // fill results for user + if (context && p_umv_buf && p_umv_buf->p_inout_buf) { + struct mthca_qp *qp = (struct mthca_qp *)ib_qp; + struct ibv_create_qp_resp *create_qp_resp = (struct ibv_create_qp_resp *)(void*)p_umv_buf->p_inout_buf; + ib_qp->ib_mr = ib_mr; + create_qp_resp->qpn = ib_qp->qp_num; + create_qp_resp->user_handle = user_handle; + create_qp_resp->mr.lkey = ib_mr->lkey; + create_qp_resp->mr.rkey = ib_mr->rkey; + create_qp_resp->mr.mr_handle = (u64)(ULONG_PTR)ib_mr; + create_qp_resp->qp_handle = (__u64)(ULONG_PTR)qp; + create_qp_resp->max_send_wr = qp->sq.max; + create_qp_resp->max_recv_wr = qp->rq.max; + create_qp_resp->max_send_sge = qp->sq.max_gs; + create_qp_resp->max_recv_sge = qp->rq.max_gs; + create_qp_resp->max_inline_data = qp->max_inline_data; + p_umv_buf->output_size = sizeof(struct ibv_create_qp_resp); + } + + return ib_qp; + +err_create_qp: + if (ib_mr) + ibv_dereg_mr(ib_mr); +err_alloc_mr: + if( p_umv_buf && p_umv_buf->command ) + p_umv_buf->status = IB_ERROR; + HCA_EXIT(HCA_DBG_QP); + return ERR_PTR(err); +} + +int ibv_modify_qp(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask) +{ + return qp->device->modify_qp(qp, qp_attr, qp_attr_mask); +} + +int ibv_query_qp(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr) +{ + return qp->device->query_qp ? + qp->device->query_qp(qp, qp_attr, qp_attr_mask, qp_init_attr) : + -ENOSYS; +} + +int ibv_destroy_qp(struct ib_qp *qp) +{ + struct ib_pd *pd; + struct ib_cq *scq, *rcq; + struct ib_srq *srq; + int ret; + struct ib_ucontext *ucontext; + struct ib_mr * ib_mr; + + pd = qp->pd; + scq = qp->send_cq; + rcq = qp->recv_cq; + srq = qp->srq; + ucontext = pd->ucontext; + ib_mr = qp->ib_mr; + + ret = qp->device->destroy_qp(qp); + if (!ret) { + atomic_dec(&pd->usecnt); + atomic_dec(&scq->usecnt); + atomic_dec(&rcq->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_QP ,("PD%d use cnt %d, pd_handle %p, ctx %p \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt, pd, pd->ucontext)); + if (srq) + atomic_dec(&srq->usecnt); + release_user_cq_qp_resources(ucontext, ib_mr); + } + + return ret; +} + +/* Completion queues */ + +struct ib_cq *ibv_create_cq(struct ib_device *device, + ib_comp_handler comp_handler, + void (*event_handler)(struct ib_event *, void *), + void *cq_context, int cqe, + struct ib_ucontext *context, ci_umv_buf_t* const p_umv_buf) +{ + int err; + struct ib_cq *cq; + struct ib_mr *ib_mr = NULL; + u64 user_handle = 0; + + // for user call we need also allocate MR + if (context && p_umv_buf && p_umv_buf->p_inout_buf) { + struct ibv_create_cq *create_cq = (struct ibv_create_cq *)(void*)p_umv_buf->p_inout_buf; + + // create region + ib_mr = ibv_reg_mr( + (struct ib_pd *)(ULONG_PTR)create_cq->mr.pd_handle, + create_cq->mr.access_flags, + (void*)(ULONG_PTR)create_cq->mr.start, + create_cq->mr.length, create_cq->mr.hca_va, TRUE ); + if (IS_ERR(ib_mr)) { + err = PTR_ERR(ib_mr); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_CQ ,("ibv_reg_mr failed (%d)\n", err)); + goto err_alloc_mr; + } + user_handle = create_cq->user_handle; + create_cq->lkey = ib_mr->lkey; + cqe = create_cq->cqe; + } + + // create cq + cq = device->create_cq(device, cqe, context, p_umv_buf); + if (IS_ERR(cq)) { + err = PTR_ERR(cq); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_CQ ,("create_cq failed (%d)\n", err)); + goto err_create_cq; + } + + cq->device = device; + cq->ucontext = context; + cq->comp_handler = comp_handler; + cq->event_handler = event_handler; + cq->cq_context = cq_context; + atomic_set(&cq->usecnt, 0); + if (context) + atomic_inc(&context->usecnt); + + // fill results + if (context && p_umv_buf && p_umv_buf->p_inout_buf) { + struct ibv_create_cq_resp *create_cq_resp = (struct ibv_create_cq_resp *)(void*)p_umv_buf->p_inout_buf; + cq->ib_mr = ib_mr; + create_cq_resp->user_handle = user_handle; + create_cq_resp->mr.lkey = ib_mr->lkey; + create_cq_resp->mr.rkey = ib_mr->rkey; + create_cq_resp->mr.mr_handle = (u64)(ULONG_PTR)ib_mr; + create_cq_resp->cq_handle = (u64)(ULONG_PTR)cq; + create_cq_resp->cqe = cq->cqe; + p_umv_buf->output_size = sizeof(struct ibv_create_cq_resp); + } + + return cq; + +err_create_cq: + if (ib_mr) + ibv_dereg_mr(ib_mr); +err_alloc_mr: + if( p_umv_buf && p_umv_buf->command ) + p_umv_buf->status = IB_ERROR; + return ERR_PTR(err); +} + +int ibv_destroy_cq(struct ib_cq *cq) +{ + int ret; + struct ib_ucontext *ucontext = cq->ucontext; + struct ib_mr * ib_mr = cq->ib_mr; + + if (atomic_read(&cq->usecnt)) + return -EBUSY; + + ret = cq->device->destroy_cq(cq); + + release_user_cq_qp_resources(ucontext, ib_mr); + + return ret; +} + +int ibv_resize_cq(struct ib_cq *cq, + int cqe) +{ + int ret; + + if (!cq->device->resize_cq) + return -ENOSYS; + + ret = cq->device->resize_cq(cq, &cqe); + if (!ret) + cq->cqe = cqe; + + return ret; +} + +/* Memory regions */ + +struct ib_mr *ibv_reg_mr(struct ib_pd *pd, + mthca_qp_access_t mr_access_flags, + void* __ptr64 vaddr, + uint64_t length, + uint64_t hca_va, + boolean_t um_call + ) +{ + struct ib_mr *ib_mr; + int err; + HCA_ENTER(HCA_DBG_MEMORY); + + ib_mr = pd->device->reg_virt_mr(pd, vaddr, length, hca_va, mr_access_flags, um_call); + if (IS_ERR(ib_mr)) { + err = PTR_ERR(ib_mr); + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_MEMORY ,("mthca_reg_user_mr failed (%d)\n", err)); + goto err_reg_user_mr; + } + + ib_mr->device = pd->device; + ib_mr->pd = pd; + atomic_inc(&pd->usecnt); + atomic_set(&ib_mr->usecnt, 0); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("PD%d use cnt %d, pd_handle %p, ctx %p \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt, pd, pd->ucontext)); + HCA_EXIT(HCA_DBG_MEMORY); + return ib_mr; + +err_reg_user_mr: + HCA_EXIT(HCA_DBG_MEMORY); + return ERR_PTR(err); +} + +struct ib_mr *ibv_get_dma_mr(struct ib_pd *pd, mthca_qp_access_t mr_access_flags) +{ + struct ib_mr *mr; + + // direct call is a must, because "lifefish" devices doesn't fill driver i/f table + mr = mthca_get_dma_mr(pd, mr_access_flags); + + if (!IS_ERR(mr)) { + mr->device = pd->device; + mr->pd = pd; + atomic_inc(&pd->usecnt); + atomic_set(&mr->usecnt, 0); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("PD%d use cnt %d \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt)); + } + + return mr; +} + +struct ib_mr *ibv_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + mthca_qp_access_t mr_access_flags, + u64 *iova_start) +{ + struct ib_mr *mr; + + mr = pd->device->reg_phys_mr(pd, phys_buf_array, num_phys_buf, + mr_access_flags, iova_start); + + if (!IS_ERR(mr)) { + mr->device = pd->device; + mr->pd = pd; + atomic_inc(&pd->usecnt); + atomic_set(&mr->usecnt, 0); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("PD%d use cnt %d \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt)); + } + + return mr; +} + +int ibv_rereg_phys_mr(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + mthca_qp_access_t mr_access_flags, + u64 *iova_start) +{ + struct ib_pd *old_pd; + int ret; + + if (!mr->device->rereg_phys_mr) + return -ENOSYS; + + if (atomic_read(&mr->usecnt)) + return -EBUSY; + + old_pd = mr->pd; + + ret = mr->device->rereg_phys_mr(mr, mr_rereg_mask, pd, + phys_buf_array, num_phys_buf, + mr_access_flags, iova_start); + + if (!ret && (mr_rereg_mask & IB_MR_REREG_PD)) { + atomic_dec(&old_pd->usecnt); + atomic_inc(&pd->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("PD%d use cnt %d \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt)); + } + + return ret; +} + +int ibv_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr) +{ + return mr->device->query_mr ? + mr->device->query_mr(mr, mr_attr) : -ENOSYS; +} + +int ibv_dereg_mr(struct ib_mr *mr) +{ + int ret; + struct ib_pd *pd; + + if (atomic_read(&mr->usecnt)) + return -EBUSY; + + pd = mr->pd; + // direct call is a must, because "lifefish" devices doesn't fill driver i/f table + ret = mthca_dereg_mr(mr); + if (!ret) { + atomic_dec(&pd->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("PD%d use cnt %d, pd_handle %p, ctx %p \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt, pd, pd->ucontext)); + } + + return ret; +} + +/* Memory windows */ + +struct ib_mw *ibv_alloc_mw(struct ib_pd *pd) +{ + struct ib_mw *mw; + + if (!pd->device->alloc_mw) + return ERR_PTR(-ENOSYS); + + mw = pd->device->alloc_mw(pd); + if (!IS_ERR(mw)) { + mw->device = pd->device; + mw->pd = pd; + atomic_inc(&pd->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("PD%d use cnt %d \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt)); + } + + return mw; +} + +int ibv_dealloc_mw(struct ib_mw *mw) +{ + struct ib_pd *pd; + int ret; + + pd = mw->pd; + ret = mw->device->dealloc_mw(mw); + if (!ret) { + atomic_dec(&pd->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("PD%d use cnt %d \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt)); + } + + return ret; +} + +/* "Fast" memory regions */ + +struct ib_fmr *ibv_alloc_fmr(struct ib_pd *pd, + mthca_qp_access_t mr_access_flags, + struct ib_fmr_attr *fmr_attr) +{ + struct ib_fmr *fmr; + + if (!pd->device->alloc_fmr) + return ERR_PTR(-ENOSYS); + + fmr = pd->device->alloc_fmr(pd, mr_access_flags, fmr_attr); + if (!IS_ERR(fmr)) { + fmr->device = pd->device; + fmr->pd = pd; + atomic_inc(&pd->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("PD%d use cnt %d \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt)); + } + + return fmr; +} + +int ibv_map_phys_fmr(struct ib_fmr *fmr, + u64 *page_list, int list_len, + u64 iova) +{ + return fmr->device->map_phys_fmr(fmr, page_list, list_len, iova); +} + +int ibv_unmap_fmr(struct list_head *fmr_list) +{ + struct ib_fmr *fmr; + + if (list_empty(fmr_list)) + return 0; + + fmr = list_entry(fmr_list->next, struct ib_fmr, list); + return fmr->device->unmap_fmr(fmr_list); +} + +int ibv_dealloc_fmr(struct ib_fmr *fmr) +{ + struct ib_pd *pd; + int ret; + + pd = fmr->pd; + ret = fmr->device->dealloc_fmr(fmr); + if (!ret) { + atomic_dec(&pd->usecnt); + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("PD%d use cnt %d \n", + ((struct mthca_pd*)pd)->pd_num, pd->usecnt)); + } + + return ret; +} + +/* Multicast groups */ + +int ibv_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) +{ + if (!qp->device->attach_mcast) + return -ENOSYS; + if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UNRELIABLE_DGRM) + return -EINVAL; + + return qp->device->attach_mcast(qp, gid, lid); +} + +int ibv_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) +{ + if (!qp->device->detach_mcast) + return -ENOSYS; + if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UNRELIABLE_DGRM) + return -EINVAL; + + return qp->device->detach_mcast(qp, gid, lid); +} diff --git a/branches/IBFD/hw/mthca/kernel/mthca.cdf b/branches/IBFD/hw/mthca/kernel/mthca.cdf new file mode 100644 index 00000000..af10df1c --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca.cdf @@ -0,0 +1,14 @@ +[CatalogHeader] +Name=mthca.cat +PublicVersion=0x0000001 +EncodingType=0x00010001 +CATATTR1=0x10010001:OSAttr:2:6.0 +[CatalogFiles] +mthca.inf=mthca.inf +mthca.sys=mthca.sys +mthcau.dll=mthcau.dll +mthcaud.dll=mthcaud.dll +mthca32.dll=mthca32.dll +mthca32d.dll=mthca32d.dll +IbInstaller.dll=IbInstaller.dll + diff --git a/branches/IBFD/hw/mthca/kernel/mthca.h b/branches/IBFD/hw/mthca/kernel/mthca.h new file mode 100644 index 00000000..9570421a --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca.h @@ -0,0 +1,9 @@ +#ifndef MTHCA_H +#define MTHCA_H + +NTSTATUS mthca_init_one(hca_dev_ext_t *ext); +void mthca_remove_one(hca_dev_ext_t *ext); +int mthca_get_dev_info(struct mthca_dev *mdev, __be64 *node_guid, u32 *hw_id); + +#endif + diff --git a/branches/IBFD/hw/mthca/kernel/mthca.inf b/branches/IBFD/hw/mthca/kernel/mthca.inf new file mode 100644 index 00000000..a553ba25 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca.inf @@ -0,0 +1,205 @@ +; Mellanox Technologies InfiniBand HCAs. +; Copyright 2005 Mellanox Technologies all Rights Reserved. + +[Version] +Signature="$Windows NT$" +Class=InfiniBandHca +ClassGUID={58517E00-D3CF-40c9-A679-CEE5752F4491} +Provider=%OPENIB% +; must be synchronized with MTHCA_DEV.H +DriverVer=03/08/2006,1.0.0000.614 +CatalogFile=mthca.cat + +; ================= Destination directory section ===================== + +[DestinationDirs] +DefaultDestDir=%DIRID_DRIVERS% +ClassCopyFiles=%DIRID_SYSTEM% +MTHCA.UMCopyFiles=%DIRID_SYSTEM% +MTHCA.WOW64CopyFiles=%DIRID_SYSTEM_X86% + +; ================= Class Install section ===================== + +[ClassInstall32] +CopyFiles=ClassCopyFiles +AddReg=ClassAddReg + +[ClassCopyFiles] +IbInstaller.dll + +[ClassAddReg] +HKR,,,,"InfiniBand Host Channel Adapters" +HKR,,Icon,,-5 +HKR,,SilentInstall,,1 +HKLM,"System\CurrentControlSet\Control\CoDeviceInstallers", \ + %HcaClassGuid%,%REG_MULTI_SZ_APPEND%, "IbInstaller.dll,IbCoInstaller" + +; ================= Device Install section ===================== + +[SourceDisksNames.x86] +1=%DiskId%,,,"" + +[SourceDisksNames.amd64] +1=%DiskId%,,,"" + +[SourceDisksNames.ia64] +1=%DiskId%,,,"" + +[SourceDisksFiles] +IbInstaller.dll=1 +mthca.sys=1 +mthcau.dll=1 +mthcaud.dll=1 + +[SourceDisksFiles.amd64] +IbInstaller.dll=1 +mthca.sys=1 +mthcau.dll=1 +mthcaud.dll=1 +mthca32.dll=1 +mthca32d.dll=1 + +[SourceDisksFiles.ia64] +IbInstaller.dll=1 +mthca.sys=1 +mthcau.dll=1 +mthcaud.dll=1 +mthca32.dll=1 +mthca32d.dll=1 + +[Manufacturer] +%MTL% = HCA.DeviceSection,ntx86,ntamd64,ntia64 + +[HCA.DeviceSection] +; empty since we don't support W9x/Me + +[HCA.DeviceSection.ntx86] +%MT23108.DeviceDesc%=MTHCA.DDInstall, PCI\VEN_15B3&DEV_5A44 +%MT23109.DeviceDesc%=MTHCA.DDInstall, PCI\VEN_15B3&DEV_5A45 +%MT25208.DeviceDesc%=MTHCA.DDInstall, PCI\VEN_15B3&DEV_6278 +%MT25209.DeviceDesc%=MTHCA.DDInstall, PCI\VEN_15B3&DEV_6279 +%MT25218.DeviceDesc%=MTHCA.DDInstall, PCI\VEN_15B3&DEV_6282 +%MT24204.DeviceDesc%=MTHCA.DDInstall, PCI\VEN_15B3&DEV_5E8C +%MT24205.DeviceDesc%=MTHCA.DDInstall, PCI\VEN_15B3&DEV_5E8D +%MT25204.DeviceDesc%=MTHCA.DDInstall, PCI\VEN_15B3&DEV_6274 +%MT25205.DeviceDesc%=MTHCA.DDInstall, PCI\VEN_15B3&DEV_6275 + +[HCA.DeviceSection.ntamd64] +%MT23108.DeviceDesc%=MTHCA.DDInstall, PCI\VEN_15B3&DEV_5A44 +%MT23109.DeviceDesc%=MTHCA.DDInstall, PCI\VEN_15B3&DEV_5A45 +%MT25208.DeviceDesc%=MTHCA.DDInstall, PCI\VEN_15B3&DEV_6278 +%MT25209.DeviceDesc%=MTHCA.DDInstall, PCI\VEN_15B3&DEV_6279 +%MT25218.DeviceDesc%=MTHCA.DDInstall, PCI\VEN_15B3&DEV_6282 +%MT24204.DeviceDesc%=MTHCA.DDInstall, PCI\VEN_15B3&DEV_5E8C +%MT24205.DeviceDesc%=MTHCA.DDInstall, PCI\VEN_15B3&DEV_5E8D +%MT25204.DeviceDesc%=MTHCA.DDInstall, PCI\VEN_15B3&DEV_6274 +%MT25205.DeviceDesc%=MTHCA.DDInstall, PCI\VEN_15B3&DEV_6275 + +[HCA.DeviceSection.ntia64] +%MT23108.DeviceDesc%=MTHCA.DDInstall, PCI\VEN_15B3&DEV_5A44 +%MT23109.DeviceDesc%=MTHCA.DDInstall, PCI\VEN_15B3&DEV_5A45 +%MT25208.DeviceDesc%=MTHCA.DDInstall, PCI\VEN_15B3&DEV_6278 +%MT25209.DeviceDesc%=MTHCA.DDInstall, PCI\VEN_15B3&DEV_6279 +%MT25218.DeviceDesc%=MTHCA.DDInstall, PCI\VEN_15B3&DEV_6282 +%MT24204.DeviceDesc%=MTHCA.DDInstall, PCI\VEN_15B3&DEV_5E8C +%MT24205.DeviceDesc%=MTHCA.DDInstall, PCI\VEN_15B3&DEV_5E8D +%MT25204.DeviceDesc%=MTHCA.DDInstall, PCI\VEN_15B3&DEV_6274 +%MT25205.DeviceDesc%=MTHCA.DDInstall, PCI\VEN_15B3&DEV_6275 + +[MTHCA.DDInstall.ntx86] +CopyFiles = MTHCA.CopyFiles +CopyFiles = MTHCA.UMCopyFiles +CopyINF=ib_bus.inf + +[MTHCA.DDInstall.ntamd64] +CopyFiles = MTHCA.CopyFiles +CopyFiles = MTHCA.UMCopyFiles +CopyFiles = MTHCA.WOW64CopyFiles +CopyINF=ib_bus.inf + +[MTHCA.DDInstall.ntia64] +CopyFiles = MTHCA.CopyFiles +CopyFiles = MTHCA.UMCopyFiles +CopyFiles = MTHCA.WOW64CopyFiles +CopyINF=ib_bus.inf + +[MTHCA.DDInstall.ntx86.Services] +AddService = mthca,%SPSVCINST_ASSOCSERVICE%,MTHCA.ServiceInstall,MTHCA.EventLog + +[MTHCA.DDInstall.ntamd64.Services] +AddService = mthca,%SPSVCINST_ASSOCSERVICE%,MTHCA.ServiceInstall,MTHCA.EventLog + +[MTHCA.DDInstall.ntia64.Services] +AddService = mthca,%SPSVCINST_ASSOCSERVICE%,MTHCA.ServiceInstall,MTHCA.EventLog + +[MTHCA.CopyFiles] +mthca.sys + +[MTHCA.UMCopyFiles] +mthcau.dll,,,2 +mthcaud.dll,,,2 + +[MTHCA.WOW64CopyFiles] +mthcau.dll,mthca32.dll,,2 +mthcaud.dll,mthca32d.dll,,2 + +; +; ============= Service Install section ============== +; + +[MTHCA.ServiceInstall] +DisplayName = %MTHCA.ServiceDesc% +ServiceType = %SERVICE_KERNEL_DRIVER% +StartType = %SERVICE_DEMAND_START% +ErrorControl = %SERVICE_ERROR_NORMAL% +ServiceBinary = %12%\mthca.sys +LoadOrderGroup = extended base +AddReg = MTHCA.ParamsReg + + +[MTHCA.EventLog] +AddReg = MTHCA.AddEventLogReg + +[MTHCA.AddEventLogReg] +HKR, , EventMessageFile, 0x00020000, "%%SystemRoot%%\System32\IoLogMsg.dll;%%SystemRoot%%\System32\drivers\mthca.sys" +HKR, , TypesSupported, 0x00010001, 7 + +[MTHCA.ParamsReg] +HKR,"Parameters","DebugLevel",%REG_DWORD%,0x00000003 +HKR,"Parameters","DebugFlags",%REG_DWORD%,0x0000ffff +HKR,"Parameters","SkipTavorReset",%REG_DWORD%,0 +HKR,"Parameters","DisableTavorResetOnFailure",%REG_DWORD%,1 +HKR,"Parameters","TunePci",%REG_DWORD%,0 +HKR,"Parameters","ProcessorAffinity",%REG_DWORD%,0 +HKR,"Parameters","MaxDpcTimeUs",%REG_DWORD%,10000 +HKR,"Parameters","ProfileQpNum",%REG_DWORD%,0 +HKR,"Parameters","ProfileRdOut",%REG_DWORD%,0xffffffff +HKLM,"System\CurrentControlSet\Control\WMI\GlobalLogger\8bf1f640-63fe-4743-b9ef-fa38c695bfde","Flags",%REG_DWORD%,0xffff +HKLM,"System\CurrentControlSet\Control\WMI\GlobalLogger\8bf1f640-63fe-4743-b9ef-fa38c695bfde","Level",%REG_DWORD%,0x3 + +[Strings] +HcaClassGuid = "{58517E00-D3CF-40c9-A679-CEE5752F4491}" +OPENIB = "OpenIB Alliance" +MTL="Mellanox Technologies Ltd." +MTHCA.ServiceDesc = "Driver for Mellanox InfiniHost Devices" +MT23108.DeviceDesc="InfiniHost (MT23108) - Mellanox InfiniBand HCA" +MT23109.DeviceDesc="InfiniHost (MT23109) - Mellanox InfiniBand HCA (burner device)" +MT25208.DeviceDesc="InfiniHost (MT25208) - Mellanox InfiniBand HCA for PCI Express" +MT25209.DeviceDesc="InfiniHost (MT25209) - Mellanox InfiniBand HCA for PCI Express (burner device)" +MT25218.DeviceDesc="InfiniHost III Ex (MT25218) - Mellanox InfiniBand HCA for PCI Express" +MT24204.DeviceDesc="InfiniHost III Lx (MT24204) - Mellanox InfiniBand HCA for PCI Express" +MT24205.DeviceDesc="InfiniHost III Lx (MT24205) - Mellanox InfiniBand HCA for PCI Express (burner device)" +MT25204.DeviceDesc="InfiniHost III Lx (MT25204) - Mellanox InfiniBand HCA for PCI Express" +MT25205.DeviceDesc="InfiniHost III Lx (MT25205) - Mellanox InfiniBand HCA for PCI Express (burner device)" +DiskId = "Mellanox InfiniBand HCA installation disk" +SPSVCINST_NULL = 0x0 +SPSVCINST_ASSOCSERVICE = 0x00000002 +SERVICE_KERNEL_DRIVER = 1 +SERVICE_DEMAND_START = 3 +SERVICE_ERROR_NORMAL = 1 +REG_DWORD = 0x00010001 +REG_MULTI_SZ_APPEND = 0x00010008 +DIRID_SYSTEM = 11 +DIRID_DRIVERS = 12 +DIRID_SYSTEM_X86 = 16425 + diff --git a/branches/IBFD/hw/mthca/kernel/mthca_allocator.c b/branches/IBFD/hw/mthca/kernel/mthca_allocator.c new file mode 100644 index 00000000..28dd974f --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca_allocator.c @@ -0,0 +1,294 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_allocator.tmh" +#endif + +/* Trivial bitmap-based allocator */ +u32 mthca_alloc(struct mthca_alloc *alloc) +{ + u32 obj; + SPIN_LOCK_PREP(lh); + + spin_lock(&alloc->lock, &lh); + obj = find_next_zero_bit(alloc->table, alloc->max, alloc->last); + if (obj >= alloc->max) { + alloc->top = (alloc->top + alloc->max) & alloc->mask; + obj = find_first_zero_bit(alloc->table, alloc->max); + } + + if (obj < alloc->max) { + set_bit(obj, (long*)alloc->table); + obj |= alloc->top; + } else + obj = (u32)-1; + + spin_unlock(&lh); + + return obj; +} + +void mthca_free(struct mthca_alloc *alloc, u32 obj) +{ + SPIN_LOCK_PREP(lh); + + obj &= alloc->max - 1; + spin_lock(&alloc->lock, &lh); + clear_bit(obj, (long *)alloc->table); + alloc->last = MIN(alloc->last, obj); + alloc->top = (alloc->top + alloc->max) & alloc->mask; + spin_unlock(&lh); +} + +int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask, + u32 reserved) +{ + int i; + HCA_ENTER(HCA_DBG_INIT); + /* num must be a power of 2 */ + if ((int)num != 1 << (ffs(num) - 1)) + return -EINVAL; + + alloc->last = 0; + alloc->top = 0; + alloc->max = num; + alloc->mask = mask; + spin_lock_init(&alloc->lock); + alloc->table = kmalloc(BITS_TO_LONGS(num) * sizeof (long), + GFP_KERNEL); + if (!alloc->table) + return -ENOMEM; + + bitmap_zero(alloc->table, num); + for (i = 0; i < (int)reserved; ++i) + set_bit(i, (long *)alloc->table); + + return 0; +} + +void mthca_alloc_cleanup(struct mthca_alloc *alloc) +{ + kfree(alloc->table); +} + +/* + * Array of pointers with lazy allocation of leaf pages. Callers of + * _get, _set and _clear methods must use a lock or otherwise + * serialize access to the array. + */ + +#define MTHCA_ARRAY_MASK (PAGE_SIZE / sizeof (void *) - 1) + +void *mthca_array_get(struct mthca_array *array, int index) +{ + int p = (index * sizeof (void *)) >> PAGE_SHIFT; + + if (array->page_list[p].page) + return array->page_list[p].page[index & MTHCA_ARRAY_MASK]; + else + return NULL; +} + +int mthca_array_set(struct mthca_array *array, int index, void *value) +{ + int p = (index * sizeof (void *)) >> PAGE_SHIFT; + + /* Allocate with GFP_ATOMIC because we'll be called with locks held. */ + if (!array->page_list[p].page) + array->page_list[p].page = (void **) get_zeroed_page(GFP_ATOMIC); + + if (!array->page_list[p].page) + return -ENOMEM; + + array->page_list[p].page[index & MTHCA_ARRAY_MASK] = value; + ++array->page_list[p].used; + + return 0; +} + +void mthca_array_clear(struct mthca_array *array, int index) +{ + int p = (index * sizeof (void *)) >> PAGE_SHIFT; + + if (array->page_list[p].used <= 0) { + HCA_PRINT(TRACE_LEVEL_INFORMATION, HCA_DBG_LOW,("Array %p index %d page %d with ref count %d < 0\n", + array, index, p, array->page_list[p].used)); + return; + } + + if (--array->page_list[p].used == 0) { + free_page((void*) array->page_list[p].page); + array->page_list[p].page = NULL; + } + else + array->page_list[p].page[index & MTHCA_ARRAY_MASK] = NULL; +} + +int mthca_array_init(struct mthca_array *array, int nent) +{ + int npage = (nent * sizeof (void *) + PAGE_SIZE - 1) / PAGE_SIZE; + int i; + + array->page_list = kmalloc(npage * sizeof *array->page_list, GFP_KERNEL); + if (!array->page_list) + return -ENOMEM; + + for (i = 0; i < npage; ++i) { + array->page_list[i].page = NULL; + array->page_list[i].used = 0; + } + + return 0; +} + +void mthca_array_cleanup(struct mthca_array *array, int nent) +{ + int i; + + for (i = 0; i < (int)((nent * sizeof (void *) + PAGE_SIZE - 1) / PAGE_SIZE); ++i) + free_page((void*) array->page_list[i].page); + + kfree(array->page_list); +} + +/* + * Handling for queue buffers -- we allocate a bunch of memory and + * register it in a memory region at HCA virtual address 0. If the + * requested size is > max_direct, we split the allocation into + * multiple pages, so we don't require too much contiguous memory. + */ + +int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct, + union mthca_buf *buf, int *is_direct, struct mthca_pd *pd, + int hca_write, struct mthca_mr *mr) +{ + int err = -ENOMEM; + int npages, shift; + u64 *dma_list = NULL; + dma_addr_t t; + int i; + + HCA_ENTER(HCA_DBG_MEMORY); + if (size <= max_direct) { + *is_direct = 1; + npages = 1; + shift = get_order(size) + PAGE_SHIFT; + + alloc_dma_zmem_map(dev, size, PCI_DMA_BIDIRECTIONAL, &buf->direct); + if (!buf->direct.page) + return -ENOMEM; + t = buf->direct.dma_address; /* shorten the code below */ + + while (t & ((1 << shift) - 1)) { + --shift; + npages *= 2; + } + + dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); + if (!dma_list) + goto err_free; + + for (i = 0; i < npages; ++i) + dma_list[i] = t + i * (1 << shift); + } else { + *is_direct = 0; + npages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + shift = PAGE_SHIFT; + + dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); + if (!dma_list) + return -ENOMEM; + + buf->page_list = kmalloc(npages * sizeof *buf->page_list, + GFP_KERNEL); + if (!buf->page_list) + goto err_out; + + for (i = 0; i < npages; ++i) + buf->page_list[i].page = NULL; + + for (i = 0; i < npages; ++i) { + alloc_dma_zmem_map(dev, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL, &buf->page_list[i]); + if (!buf->page_list[i].page) + goto err_free; + dma_list[i] = buf->page_list[i].dma_address; + } + } + + err = mthca_mr_alloc_phys(dev, pd->pd_num, + dma_list, shift, npages, + 0, size, + MTHCA_MPT_FLAG_LOCAL_READ | + (hca_write ? MTHCA_MPT_FLAG_LOCAL_WRITE : 0), + mr); + if (err) + goto err_free; + + kfree(dma_list); + + HCA_EXIT(HCA_DBG_MEMORY); + return 0; + +err_free: + mthca_buf_free(dev, size, buf, *is_direct, NULL); + +err_out: + kfree(dma_list); + + return err; +} + +void mthca_buf_free(struct mthca_dev *dev, int size, union mthca_buf *buf, + int is_direct, struct mthca_mr *mr) +{ + int i; + + if (mr) + mthca_free_mr(dev, mr); + + if (is_direct) { + free_dma_mem_map(dev, &buf->direct, PCI_DMA_BIDIRECTIONAL); + } + else { + for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i) { + free_dma_mem_map(dev, &buf->page_list[i], PCI_DMA_BIDIRECTIONAL); + } + kfree(buf->page_list); + } +} diff --git a/branches/IBFD/hw/mthca/kernel/mthca_av.c b/branches/IBFD/hw/mthca/kernel/mthca_av.c new file mode 100644 index 00000000..ba029d05 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca_av.c @@ -0,0 +1,298 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include +#include +#include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_av.tmh" +#endif + +#ifdef ALLOC_PRAGMA +#pragma alloc_text (PAGE, mthca_init_av_table) +#pragma alloc_text (PAGE, mthca_cleanup_av_table) +#endif + + +struct mthca_av { + __be32 port_pd; + u8 reserved1; + u8 g_slid; + __be16 dlid; + u8 reserved2; + u8 gid_index; + u8 msg_sr; + u8 hop_limit; + __be32 sl_tclass_flowlabel; + __be32 dgid[4]; +}; + +int mthca_create_ah(struct mthca_dev *dev, + struct mthca_pd *pd, + struct ib_ah_attr *ah_attr, + struct mthca_ah *ah) +{ + u32 index = (u32)-1; + struct mthca_av *av = NULL; + + ah->type = MTHCA_AH_PCI_POOL; + + if (mthca_is_memfree(dev)) { + ah->av = kmalloc(sizeof *ah->av, GFP_ATOMIC); + if (!ah->av) + return -ENOMEM; + + ah->type = MTHCA_AH_KMALLOC; + av = ah->av; + } else if (!atomic_read(&pd->sqp_count) && + !(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) { + index = mthca_alloc(&dev->av_table.alloc); + + /* fall back to allocate in host memory */ + if (index == -1) + goto on_hca_fail; + + av = kmalloc(sizeof *av, GFP_ATOMIC); + if (!av) + goto on_hca_fail; + + ah->type = MTHCA_AH_ON_HCA; + ah->avdma = dev->av_table.ddr_av_base + + index * MTHCA_AV_SIZE; + } + +on_hca_fail: + if (ah->type == MTHCA_AH_PCI_POOL) { + ah->av = pci_pool_alloc(dev->av_table.pool, + SLAB_ATOMIC, &ah->avdma); + if (!ah->av) + return -ENOMEM; + + av = ah->av; + } + + ah->key = pd->ntmr.ibmr.lkey; + + RtlZeroMemory(av, MTHCA_AV_SIZE); + + av->port_pd = cl_hton32(pd->pd_num | (ah_attr->port_num << 24)); + av->g_slid = ah_attr->src_path_bits; + av->dlid = cl_hton16(ah_attr->dlid); + av->msg_sr = (3 << 4) | /* 2K message */ + ah_attr->static_rate; + av->sl_tclass_flowlabel = cl_hton32(ah_attr->sl << 28); + if (ah_attr->ah_flags & IB_AH_GRH) { + av->g_slid |= 0x80; + av->gid_index = (u8)((ah_attr->port_num - 1) * dev->limits.gid_table_len + + ah_attr->grh.sgid_index); + av->hop_limit = ah_attr->grh.hop_limit; + av->sl_tclass_flowlabel |= + cl_hton32((ah_attr->grh.traffic_class << 20) | + ah_attr->grh.flow_label); + memcpy(av->dgid, ah_attr->grh.dgid.raw, 16); + } else { + /* Arbel workaround -- low byte of GID must be 2 */ + av->dgid[3] = cl_hton32(2); + } + + { // debug print + int j; + + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Created UDAV at %p/%08lx:\n", + av, (unsigned long) ah->avdma)); + for (j = 0; j < 8; ++j) + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_AV ,(" [%2x] %08x\n", + j * 4, cl_ntoh32(((__be32 *) av)[j]))); + } + + if (ah->type == MTHCA_AH_ON_HCA) { + memcpy_toio((u8*)dev->av_table.av_map + index * MTHCA_AV_SIZE, + av, MTHCA_AV_SIZE); + ah->av = (struct mthca_av *)( (u8*)( dev->av_table.av_map) + index *MTHCA_AV_SIZE ); + kfree(av); + } + return 0; +} + +int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah) +{ + HCA_ENTER(HCA_DBG_AV); + + switch (ah->type) { + case MTHCA_AH_ON_HCA: + mthca_free(&dev->av_table.alloc, + (u32)( (ah->avdma - dev->av_table.ddr_av_base) /MTHCA_AV_SIZE)); + break; + + case MTHCA_AH_PCI_POOL: + pci_pool_free(dev->av_table.pool, ah->av, ah->avdma); + break; + + case MTHCA_AH_KMALLOC: + kfree(ah->av); + break; + } + + HCA_EXIT(HCA_DBG_AV); + return 0; +} + +int mthca_ah_grh_present(struct mthca_ah *ah) +{ + return !!(ah->av->g_slid & 0x80); +} + +int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah, + struct ib_ud_header *header) +{ + if (ah->type == MTHCA_AH_ON_HCA) + return -ENOSYS; + + header->lrh.service_level = (u8)(cl_ntoh32(ah->av->sl_tclass_flowlabel) >> 28); + header->lrh.destination_lid = ah->av->dlid; + header->lrh.source_lid = cl_hton16(ah->av->g_slid & 0x7f); + header->grh_present = mthca_ah_grh_present(ah); + if (header->grh_present) { + header->grh.traffic_class = + (u8)((cl_ntoh32(ah->av->sl_tclass_flowlabel) >> 20) & 0xff); + header->grh.flow_label = + (u8)(ah->av->sl_tclass_flowlabel & cl_hton32(0xfffff)); + ib_get_cached_gid(&dev->ib_dev, + (u8) (cl_ntoh32(ah->av->port_pd) >> 24), + ah->av->gid_index % dev->limits.gid_table_len, + &header->grh.source_gid); + memcpy(header->grh.destination_gid.raw, + ah->av->dgid, 16); + } + + return 0; +} + +int mthca_init_av_table(struct mthca_dev *dev) +{ + int err; + + if (mthca_is_memfree(dev)) + return 0; + + err = mthca_alloc_init(&dev->av_table.alloc, + dev->av_table.num_ddr_avs, + dev->av_table.num_ddr_avs - 1, + 0); + if (err) + return err; + + dev->av_table.pool = pci_pool_create("mthca_av", dev, + MTHCA_AV_SIZE, + MTHCA_AV_SIZE, 0); + if (!dev->av_table.pool) + goto out_free_alloc; + + if (!(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) { + dev->av_table.av_map = ioremap(pci_resource_start(dev, HCA_BAR_TYPE_DDR) + + dev->av_table.ddr_av_base - + dev->ddr_start, + dev->av_table.num_ddr_avs * + MTHCA_AV_SIZE, + &dev->av_table.av_map_size); + if (!dev->av_table.av_map) + goto out_free_pool; + } else + dev->av_table.av_map = NULL; + + return 0; + + out_free_pool: + pci_pool_destroy(dev->av_table.pool); + + out_free_alloc: + mthca_alloc_cleanup(&dev->av_table.alloc); + return -ENOMEM; +} + +void mthca_cleanup_av_table(struct mthca_dev *dev) +{ + if (mthca_is_memfree(dev)) + return; + + if (dev->av_table.av_map) + iounmap(dev->av_table.av_map, dev->av_table.av_map_size); + pci_pool_destroy(dev->av_table.pool); + mthca_alloc_cleanup(&dev->av_table.alloc); +} + +//NB: temporary, for support of query_qp +void mthca_get_av_params( struct mthca_ah *ah_p, u8 *port_num, __be16 *dlid, u8 *sr, u8 *path_bits ) +{ + struct mthca_av *av_p = ah_p->av; + *port_num = (u8) (cl_ntoh32(av_p->port_pd) >> 24); + *dlid = av_p->dlid; + *sr = av_p->msg_sr & 0x0f; + *path_bits = av_p->g_slid & 0x7f; +} + +//NB: temporary, for support of modify_qp +void mthca_set_av_params( struct mthca_dev *dev, struct mthca_ah *ah_p, struct ib_ah_attr *ah_attr ) +{ + struct mthca_av *av = ah_p->av; + struct ib_ah *ib_ah_p = (struct ib_ah *)ah_p; + struct mthca_pd *pd = (struct mthca_pd *)ib_ah_p->pd; + + // taken from mthca_create_av + av->port_pd = cl_hton32(pd->pd_num | (ah_attr->port_num << 24)); + av->g_slid = ah_attr->src_path_bits; + av->dlid = cl_hton16(ah_attr->dlid); + av->msg_sr = (3 << 4) | /* 2K message */ + ah_attr->static_rate; + av->sl_tclass_flowlabel = cl_hton32(ah_attr->sl << 28); + if (ah_attr->ah_flags & IB_AH_GRH) { + av->g_slid |= 0x80; + av->gid_index = (u8)((ah_attr->port_num - 1) * dev->limits.gid_table_len + + ah_attr->grh.sgid_index); + av->hop_limit = ah_attr->grh.hop_limit; + av->sl_tclass_flowlabel |= + cl_hton32((ah_attr->grh.traffic_class << 20) | + ah_attr->grh.flow_label); + memcpy(av->dgid, ah_attr->grh.dgid.raw, 16); + } else { + /* Arbel workaround -- low byte of GID must be 2 */ + av->dgid[3] = cl_hton32(2); + } +} + + diff --git a/branches/IBFD/hw/mthca/kernel/mthca_catas.c b/branches/IBFD/hw/mthca/kernel/mthca_catas.c new file mode 100644 index 00000000..0c91518f --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca_catas.c @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_catas.tmh" +#endif + +enum { + MTHCA_CATAS_POLL_INTERVAL = 5 * HZ, + + MTHCA_CATAS_TYPE_INTERNAL = 0, + MTHCA_CATAS_TYPE_UPLINK = 3, + MTHCA_CATAS_TYPE_DDR = 4, + MTHCA_CATAS_TYPE_PARITY = 5, +}; + +static spinlock_t catas_lock; + +static void handle_catas(struct mthca_dev *dev) +{ + struct ib_event event; + const char *type; + int i; + + event.device = &dev->ib_dev; + event.event = IB_EVENT_DEVICE_FATAL; + event.element.port_num = 0; + + ib_dispatch_event(&event); + + switch (_byteswap_ulong(readl(dev->catas_err.map)) >> 24) { + case MTHCA_CATAS_TYPE_INTERNAL: + type = "internal error"; + break; + case MTHCA_CATAS_TYPE_UPLINK: + type = "uplink bus error"; + break; + case MTHCA_CATAS_TYPE_DDR: + type = "DDR data error"; + break; + case MTHCA_CATAS_TYPE_PARITY: + type = "internal parity error"; + break; + default: + type = "unknown error"; + break; + } + + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Catastrophic error detected: %s\n", type)); + for (i = 0; i < (int)dev->catas_err.size; ++i) + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_LOW,(" buf[%02x]: %08x\n", + i, _byteswap_ulong(readl(dev->catas_err.map + i)))); +} + +static void poll_catas(struct mthca_dev *dev) +{ + int i; + SPIN_LOCK_PREP(lh); + + for (i = 0; i < (int)dev->catas_err.size; ++i) + if (readl(dev->catas_err.map + i)) { + handle_catas(dev); + return; + } + + spin_lock_dpc(&catas_lock, &lh); + if (!dev->catas_err.stop) { + KeSetTimerEx( &dev->catas_err.timer, dev->catas_err.interval, + 0, &dev->catas_err.timer_dpc ); + } + spin_unlock_dpc(&lh); + + return; +} + +static void timer_dpc( + IN struct _KDPC *Dpc, + IN PVOID DeferredContext, + IN PVOID SystemArgument1, + IN PVOID SystemArgument2 + ) +{ + struct mthca_dev *dev = (struct mthca_dev *)DeferredContext; + UNREFERENCED_PARAMETER(Dpc); + UNREFERENCED_PARAMETER(SystemArgument1); + UNREFERENCED_PARAMETER(SystemArgument2); + poll_catas( dev ); +} + + +void mthca_start_catas_poll(struct mthca_dev *dev) +{ + u64 addr; + + dev->catas_err.stop = 0; + dev->catas_err.map = NULL; + + addr = pci_resource_start(dev, HCA_BAR_TYPE_HCR) + + ((pci_resource_len(dev, HCA_BAR_TYPE_HCR) - 1) & + dev->catas_err.addr); + + dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4, &dev->catas_err.map_size ); + if (!dev->catas_err.map) { + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_LOW, ("couldn't map catastrophic error region " + "at 0x%I64x/0x%x\n", addr, dev->catas_err.size * 4)); + return; + } + + spin_lock_init( &catas_lock ); + KeInitializeDpc( &dev->catas_err.timer_dpc, timer_dpc, dev ); + KeInitializeTimer( &dev->catas_err.timer ); + dev->catas_err.interval.QuadPart = (-10)* (__int64)MTHCA_CATAS_POLL_INTERVAL; + KeSetTimerEx( &dev->catas_err.timer, dev->catas_err.interval, + 0, &dev->catas_err.timer_dpc ); +} + +void mthca_stop_catas_poll(struct mthca_dev *dev) +{ + SPIN_LOCK_PREP(lh); + + spin_lock_irq(&catas_lock, &lh); + dev->catas_err.stop = 1; + spin_unlock_irq(&lh); + + KeCancelTimer(&dev->catas_err.timer); + KeFlushQueuedDpcs(); + + if (dev->catas_err.map) { + iounmap(dev->catas_err.map, dev->catas_err.map_size); + } +} diff --git a/branches/IBFD/hw/mthca/kernel/mthca_cmd.c b/branches/IBFD/hw/mthca/kernel/mthca_cmd.c new file mode 100644 index 00000000..2ea169d4 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca_cmd.c @@ -0,0 +1,1830 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include + +#include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_cmd.tmh" +#endif +#include "mthca_config_reg.h" +#include "mthca_cmd.h" +#include "mthca_memfree.h" + +#define CMD_POLL_TOKEN 0xffff + +enum { + HCR_IN_PARAM_OFFSET = 0x00, + HCR_IN_MODIFIER_OFFSET = 0x08, + HCR_OUT_PARAM_OFFSET = 0x0c, + HCR_TOKEN_OFFSET = 0x14, + HCR_STATUS_OFFSET = 0x18, + + HCR_OPMOD_SHIFT = 12, + HCA_E_BIT = 22, + HCR_GO_BIT = 23 +}; + +enum { + /* initialization and general commands */ + CMD_SYS_EN = 0x1, + CMD_SYS_DIS = 0x2, + CMD_MAP_FA = 0xfff, + CMD_UNMAP_FA = 0xffe, + CMD_RUN_FW = 0xff6, + CMD_MOD_STAT_CFG = 0x34, + CMD_QUERY_DEV_LIM = 0x3, + CMD_QUERY_FW = 0x4, + CMD_ENABLE_LAM = 0xff8, + CMD_DISABLE_LAM = 0xff7, + CMD_QUERY_DDR = 0x5, + CMD_QUERY_ADAPTER = 0x6, + CMD_INIT_HCA = 0x7, + CMD_CLOSE_HCA = 0x8, + CMD_INIT_IB = 0x9, + CMD_CLOSE_IB = 0xa, + CMD_QUERY_HCA = 0xb, + CMD_SET_IB = 0xc, + CMD_ACCESS_DDR = 0x2e, + CMD_MAP_ICM = 0xffa, + CMD_UNMAP_ICM = 0xff9, + CMD_MAP_ICM_AUX = 0xffc, + CMD_UNMAP_ICM_AUX = 0xffb, + CMD_SET_ICM_SIZE = 0xffd, + + /* TPT commands */ + CMD_SW2HW_MPT = 0xd, + CMD_QUERY_MPT = 0xe, + CMD_HW2SW_MPT = 0xf, + CMD_READ_MTT = 0x10, + CMD_WRITE_MTT = 0x11, + CMD_SYNC_TPT = 0x2f, + + /* EQ commands */ + CMD_MAP_EQ = 0x12, + CMD_SW2HW_EQ = 0x13, + CMD_HW2SW_EQ = 0x14, + CMD_QUERY_EQ = 0x15, + + /* CQ commands */ + CMD_SW2HW_CQ = 0x16, + CMD_HW2SW_CQ = 0x17, + CMD_QUERY_CQ = 0x18, + CMD_RESIZE_CQ = 0x2c, + + /* SRQ commands */ + CMD_SW2HW_SRQ = 0x35, + CMD_HW2SW_SRQ = 0x36, + CMD_QUERY_SRQ = 0x37, + CMD_ARM_SRQ = 0x40, + + /* QP/EE commands */ + CMD_RST2INIT_QPEE = 0x19, + CMD_INIT2RTR_QPEE = 0x1a, + CMD_RTR2RTS_QPEE = 0x1b, + CMD_RTS2RTS_QPEE = 0x1c, + CMD_SQERR2RTS_QPEE = 0x1d, + CMD_2ERR_QPEE = 0x1e, + CMD_RTS2SQD_QPEE = 0x1f, + CMD_SQD2SQD_QPEE = 0x38, + CMD_SQD2RTS_QPEE = 0x20, + CMD_ERR2RST_QPEE = 0x21, + CMD_QUERY_QPEE = 0x22, + CMD_INIT2INIT_QPEE = 0x2d, + CMD_SUSPEND_QPEE = 0x32, + CMD_UNSUSPEND_QPEE = 0x33, + /* special QPs and management commands */ + CMD_CONF_SPECIAL_QP = 0x23, + CMD_MAD_IFC = 0x24, + + /* multicast commands */ + CMD_READ_MGM = 0x25, + CMD_WRITE_MGM = 0x26, + CMD_MGID_HASH = 0x27, + + /* miscellaneous commands */ + CMD_DIAG_RPRT = 0x30, + CMD_NOP = 0x31, + + /* debug commands */ + CMD_QUERY_DEBUG_MSG = 0x2a, + CMD_SET_DEBUG_MSG = 0x2b, +}; + +/* + * According to Mellanox code, FW may be starved and never complete + * commands. So we can't use strict timeouts described in PRM -- we + * just arbitrarily select 60 seconds for now. + */ +#define CMD_POLL_N_TRIES 60 + +enum { + CMD_TIME_CLASS_A = 60 * HZ, + CMD_TIME_CLASS_B = 60 * HZ, + CMD_TIME_CLASS_C = 60 * HZ +}; + +enum { + GO_BIT_TIMEOUT = 10 * HZ +}; + +#define GO_BIT_N_TRIES 5 +#define GO_BIT_STALL_TIMEOUT ((GO_BIT_TIMEOUT/HZ)/GO_BIT_N_TRIES) /* usecs */ + +struct mthca_cmd_context { + KEVENT event; + int result; + int next; + u64 out_param; + u16 token; + u8 status; +}; + +static inline int go_bit(struct mthca_dev *dev) +{ + return readl(dev->hcr + HCR_STATUS_OFFSET) & + _byteswap_ulong(1 << HCR_GO_BIT); +} + +/* +* Function: performs busy-wait loop, while polling GO bit +* Return: 0 when GO bit was extinguished in time +*/ +static int poll_go_bit(struct mthca_dev *dev) +{ + int i=0; /* init must be here !*/ + + if (!go_bit(dev)) + return 0; + + for (; i= N_POLL_TRIES) { + if ( (__int64)interval.QuadPart > (__int64)MAX_POLL_INTERVAL) + interval.QuadPart += POLL_INTERVAL_DELTA; + i = 0; + } +#endif + } + + if (!go_bit(dev)) return 0; + return 1; +} + + +static int mthca_cmd_post(struct mthca_dev *dev, + u64 in_param, + u64 out_param, + u32 in_modifier, + u8 op_modifier, + u16 op, + u16 token, + int event) +{ + int err = 0; + + down(&dev->cmd.hcr_mutex); + + if (event && wait_go_bit(dev,GO_BIT_TIMEOUT)) { + err = -EAGAIN; + goto out; + } + + /* + * We use writel (instead of something like memcpy_toio) + * because writes of less than 32 bits to the HCR don't work + * (and some architectures such as ia64 implement memcpy_toio + * in terms of writeb). + */ + __raw_writel((u32) cl_hton32((u32)(in_param >> 32)), (u8 *)dev->hcr + 0 * 4); + __raw_writel((u32) cl_hton32((u32)(in_param & 0xfffffffful)), (u8 *) dev->hcr + 1 * 4); + __raw_writel((u32) cl_hton32(in_modifier), (u8 *)dev->hcr + 2 * 4); + __raw_writel((u32) cl_hton32((u32)(out_param >> 32)), (u8 *)dev->hcr + 3 * 4); + __raw_writel((u32) cl_hton32((u32)(out_param & 0xfffffffful)), (u8 *)dev->hcr + 4 * 4); + __raw_writel((u32) cl_hton32(token << 16), (u8 *)dev->hcr + 5 * 4); + + /* __raw_writel may not order writes. */ + wmb(); + + __raw_writel((u32) cl_hton32((1 << HCR_GO_BIT) | + (event ? (1 << HCA_E_BIT) : 0) | + (op_modifier << HCR_OPMOD_SHIFT) | + op), (u8 *)dev->hcr + 6 * 4); + +out: + up(&dev->cmd.hcr_mutex); + return err; +} + + +static int mthca_cmd_poll(struct mthca_dev *dev, + u64 in_param, + u64 *out_param, + int out_is_imm, + u32 in_modifier, + u8 op_modifier, + u16 op, + unsigned long timeout, + u8 *status) +{ + int err = 0; + + sem_down(&dev->cmd.poll_sem); + + err = mthca_cmd_post(dev, in_param, + out_param ? *out_param : 0, + in_modifier, op_modifier, + op, CMD_POLL_TOKEN, 0); + if (err) + goto out; + + if (wait_go_bit(dev,timeout)) { + err = -EBUSY; + goto out; + } + + if (out_is_imm) + *out_param = + (u64) cl_ntoh32((__be32) + __raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET)) << 32 | + (u64) cl_ntoh32((__be32) + __raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET + 4)); + + *status = (u8)(cl_ntoh32((__be32) __raw_readl(dev->hcr + HCR_STATUS_OFFSET)) >> 24); + if (*status) + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("mthca_cmd_wait: Command %02x completed with status %02x\n", + op, *status)); + +out: + sem_up(&dev->cmd.poll_sem); + return err; +} + +void mthca_cmd_event(struct mthca_dev *dev, + u16 token, + u8 status, + u64 out_param) +{ + struct mthca_cmd_context *context = + &dev->cmd.context[token & dev->cmd.token_mask]; + + /* previously timed out command completing at long last */ + if (token != context->token) + return; + + context->result = 0; + context->status = status; + context->out_param = out_param; + + context->token += dev->cmd.token_mask + 1; + + ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL); + KeSetEvent( &context->event, 0, FALSE ); +} + +static int mthca_cmd_wait(struct mthca_dev *dev, + u64 in_param, + u64 *out_param, + int out_is_imm, + u32 in_modifier, + u8 op_modifier, + u16 op, + unsigned long timeout, + u8 *status) +{ + int err = 0; + struct mthca_cmd_context *context; + SPIN_LOCK_PREP(lh); + + sem_down(&dev->cmd.event_sem); + + spin_lock( &dev->cmd.context_lock, &lh ); + BUG_ON(dev->cmd.free_head < 0); + context = &dev->cmd.context[dev->cmd.free_head]; + dev->cmd.free_head = context->next; + spin_unlock( &lh ); + + KeClearEvent( &context->event ); + err = mthca_cmd_post(dev, in_param, + out_param ? *out_param : 0, + in_modifier, op_modifier, + op, context->token, 1); + if (err) { + HCA_PRINT(TRACE_LEVEL_INFORMATION,HCA_DBG_LOW, + ("mthca_cmd_wait: Command %02x completed with err %02x\n", op, err)); + goto out; + } + + { + NTSTATUS res; + LARGE_INTEGER interval; + interval.QuadPart = (-10)* (__int64)timeout; + res = KeWaitForSingleObject( &context->event, Executive, KernelMode, FALSE, &interval ); + if (res != STATUS_SUCCESS) { + err = -EBUSY; + HCA_PRINT(TRACE_LEVEL_INFORMATION,HCA_DBG_LOW, + ("mthca_cmd_wait: Command %02x completed with err %02x\n", op, err)); + goto out; + } + } + + *status = context->status; + if (*status) + HCA_PRINT(TRACE_LEVEL_INFORMATION,HCA_DBG_LOW,("mthca_cmd_wait: Command %02x completed with status %02x\n", + op, *status)); + + if (out_is_imm) + *out_param = context->out_param; + +out: + spin_lock(&dev->cmd.context_lock, &lh); + context->next = dev->cmd.free_head; + dev->cmd.free_head = (int)(context - dev->cmd.context); + spin_unlock(&lh); + + sem_up( &dev->cmd.event_sem ); + + return err; +} + +/* Invoke a command with an output mailbox */ +static int mthca_cmd_box(struct mthca_dev *dev, + u64 in_param, + u64 out_param, + u32 in_modifier, + u8 op_modifier, + u16 op, + unsigned long timeout, + u8 *status) +{ + if (dev->cmd.use_events) + return mthca_cmd_wait(dev, in_param, &out_param, 0, + in_modifier, op_modifier, op, + timeout, status); + else + return mthca_cmd_poll(dev, in_param, &out_param, 0, + in_modifier, op_modifier, op, + timeout, status); +} + +/* Invoke a command with no output parameter */ +static int mthca_cmd(struct mthca_dev *dev, + u64 in_param, + u32 in_modifier, + u8 op_modifier, + u16 op, + unsigned long timeout, + u8 *status) +{ + return mthca_cmd_box(dev, in_param, 0, in_modifier, + op_modifier, op, timeout, status); +} + +/* + * Invoke a command with an immediate output parameter (and copy the + * output into the caller's out_param pointer after the command + * executes). + */ +static int mthca_cmd_imm(struct mthca_dev *dev, + u64 in_param, + u64 *out_param, + u32 in_modifier, + u8 op_modifier, + u16 op, + unsigned long timeout, + u8 *status) +{ + if (dev->cmd.use_events) + return mthca_cmd_wait(dev, in_param, out_param, 1, + in_modifier, op_modifier, op, + timeout, status); + else + return mthca_cmd_poll(dev, in_param, out_param, 1, + in_modifier, op_modifier, op, + timeout, status); +} + +int mthca_cmd_init(struct mthca_dev *dev) +{ + KeInitializeMutex(&dev->cmd.hcr_mutex, 0); + sem_init(&dev->cmd.poll_sem, 1, 1); + dev->cmd.use_events = 0; + + dev->hcr = ioremap(pci_resource_start(dev, HCA_BAR_TYPE_HCR) + MTHCA_HCR_BASE, + MTHCA_HCR_SIZE, &dev->hcr_size); + if (!dev->hcr) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Couldn't map command register.")); + return -ENOMEM; + } + + dev->cmd.pool = pci_pool_create("mthca_cmd", dev, + MTHCA_MAILBOX_SIZE, + MTHCA_MAILBOX_SIZE, 0); + if (!dev->cmd.pool) { + iounmap(dev->hcr, dev->hcr_size); + return -ENOMEM; + } + + return 0; +} + +void mthca_cmd_cleanup(struct mthca_dev *dev) +{ + pci_pool_destroy(dev->cmd.pool); + iounmap(dev->hcr, dev->hcr_size); +} + +/* + * Switch to using events to issue FW commands (should be called after + * event queue to command events has been initialized). + */ +int mthca_cmd_use_events(struct mthca_dev *dev) +{ + int i; + + dev->cmd.context = kmalloc(dev->cmd.max_cmds * + sizeof (struct mthca_cmd_context), + GFP_KERNEL); + if (!dev->cmd.context) + return -ENOMEM; + + for (i = 0; i < dev->cmd.max_cmds; ++i) { + dev->cmd.context[i].token = (u16)i; + dev->cmd.context[i].next = i + 1; + KeInitializeEvent( &dev->cmd.context[i].event, NotificationEvent , FALSE ); + } + + dev->cmd.context[dev->cmd.max_cmds - 1].next = -1; + dev->cmd.free_head = 0; + + sem_init(&dev->cmd.event_sem, dev->cmd.max_cmds, LONG_MAX); + spin_lock_init(&dev->cmd.context_lock); + + for (dev->cmd.token_mask = 1; + dev->cmd.token_mask < dev->cmd.max_cmds; + dev->cmd.token_mask <<= 1) + ; /* nothing */ + --dev->cmd.token_mask; + + dev->cmd.use_events = 1; + sem_down(&dev->cmd.poll_sem); + + return 0; +} + +/* + * Switch back to polling (used when shutting down the device) + */ +void mthca_cmd_use_polling(struct mthca_dev *dev) +{ + int i; + + dev->cmd.use_events = 0; + + for (i = 0; i < dev->cmd.max_cmds; ++i) + sem_down(&dev->cmd.event_sem); + + kfree(dev->cmd.context); + + sem_up(&dev->cmd.poll_sem); +} + +struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev, + unsigned int gfp_mask) +{ + struct mthca_mailbox *mailbox; + + mailbox = kmalloc(sizeof *mailbox, gfp_mask); + if (!mailbox) + return ERR_PTR(-ENOMEM); + + mailbox->buf = pci_pool_alloc(dev->cmd.pool, gfp_mask, &mailbox->dma); + if (!mailbox->buf) { + kfree(mailbox); + return ERR_PTR(-ENOMEM); + } + + return mailbox; +} + +void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox) +{ + if (!mailbox) + return; + + pci_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma); + kfree(mailbox); +} + +int mthca_SYS_EN(struct mthca_dev *dev, u8 *status) +{ + u64 out; + int ret; + + ret = mthca_cmd_imm(dev, 0, &out, 0, 0, CMD_SYS_EN, HZ, status); + + if (*status == MTHCA_CMD_STAT_DDR_MEM_ERR) + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_LOW,("SYS_EN DDR error: syn=%x, sock=%d, " + "sladdr=%d, SPD source=%s\n", + (int) (out >> 6) & 0xf, (int) (out >> 4) & 3, + (int) (out >> 1) & 7, (int) out & 1 ? "NVMEM" : "DIMM")); + + return ret; +} + +int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status) +{ + return mthca_cmd(dev, 0, 0, 0, CMD_SYS_DIS, HZ, status); +} + +static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, + u64 virt, u8 *status) +{ + struct mthca_mailbox *mailbox; + struct mthca_icm_iter iter; + __be64 *pages; + int lg; + int nent = 0; + unsigned long i; + int err = 0; + int ts = 0, tc = 0; + CPU_2_BE64_PREP; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + RtlZeroMemory(mailbox->buf, MTHCA_MAILBOX_SIZE); + pages = mailbox->buf; + + for (mthca_icm_first(icm, &iter); + !mthca_icm_last(&iter); + mthca_icm_next(&iter)) { + /* + * We have to pass pages that are aligned to their + * size, so find the least significant 1 in the + * address or size and use that as our log2 size. + */ + i = (u32)mthca_icm_addr(&iter) | mthca_icm_size(&iter); + lg = ffs(i) - 1; + if (lg < 12) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Got FW area not aligned to 4K (%I64x/%lx).\n", + (u64) mthca_icm_addr(&iter), + mthca_icm_size(&iter))); + err = -EINVAL; + goto out; + } + for (i = 0; i < mthca_icm_size(&iter) >> lg; ++i) { + if (virt != -1) { + pages[nent * 2] = cl_hton64(virt); + virt += 1Ui64 << lg; + } + pages[nent * 2 + 1] = CPU_2_BE64((mthca_icm_addr(&iter) + + (i << lg)) | (lg - 12)); + ts += 1 << (lg - 10); + ++tc; + + if (++nent == MTHCA_MAILBOX_SIZE / 16) { + err = mthca_cmd(dev, mailbox->dma, nent, 0, op, + CMD_TIME_CLASS_B, status); + if (err || *status) + goto out; + nent = 0; + } + } + } + + if (nent) + err = mthca_cmd(dev, mailbox->dma, nent, 0, op, + CMD_TIME_CLASS_B, status); + + switch (op) { + case CMD_MAP_FA: + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("Mapped %d chunks/%d KB for FW.\n", tc, ts)); + break; + case CMD_MAP_ICM_AUX: + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("Mapped %d chunks/%d KB for ICM aux.\n", tc, ts)); + break; + case CMD_MAP_ICM: + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Mapped %d chunks/%d KB at %I64x for ICM.\n", + tc, ts, (u64) virt - (ts << 10))); + break; + } + +out: + mthca_free_mailbox(dev, mailbox); + return err; +} + +int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status) +{ + return mthca_map_cmd(dev, CMD_MAP_FA, icm, (u64)-1, status); +} + +int mthca_UNMAP_FA(struct mthca_dev *dev, u8 *status) +{ + return mthca_cmd(dev, 0, 0, 0, CMD_UNMAP_FA, CMD_TIME_CLASS_B, status); +} + +int mthca_RUN_FW(struct mthca_dev *dev, u8 *status) +{ + return mthca_cmd(dev, 0, 0, 0, CMD_RUN_FW, CMD_TIME_CLASS_A, status); +} + +int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status) +{ + struct mthca_mailbox *mailbox; + u32 *outbox; + int err = 0; + u8 lg; + +#define QUERY_FW_OUT_SIZE 0x100 +#define QUERY_FW_VER_OFFSET 0x00 +#define QUERY_FW_MAX_CMD_OFFSET 0x0f +#define QUERY_FW_ERR_START_OFFSET 0x30 +#define QUERY_FW_ERR_SIZE_OFFSET 0x38 + +#define QUERY_FW_START_OFFSET 0x20 +#define QUERY_FW_END_OFFSET 0x28 + +#define QUERY_FW_SIZE_OFFSET 0x00 +#define QUERY_FW_CLR_INT_BASE_OFFSET 0x20 +#define QUERY_FW_EQ_ARM_BASE_OFFSET 0x40 +#define QUERY_FW_EQ_SET_CI_BASE_OFFSET 0x48 + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; + + err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_FW, + CMD_TIME_CLASS_A, status); + + if (err) + goto out; + + MTHCA_GET(dev->fw_ver, outbox, QUERY_FW_VER_OFFSET); + /* + * FW subSIZE_Tor version is at more signifant bits than minor + * version, so swap here. + */ + dev->fw_ver = (dev->fw_ver & 0xffff00000000Ui64) | + ((dev->fw_ver & 0xffff0000Ui64) >> 16) | + ((dev->fw_ver & 0x0000ffffUi64) << 16); + + MTHCA_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET); + dev->cmd.max_cmds = 1 << lg; + MTHCA_GET(dev->catas_err.addr, outbox, QUERY_FW_ERR_START_OFFSET); + MTHCA_GET(dev->catas_err.size, outbox, QUERY_FW_ERR_SIZE_OFFSET); + + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("FW version %012I64x, max commands %d\n", + (u64) dev->fw_ver, dev->cmd.max_cmds)); + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Catastrophic error buffer at 0x%I64x, size 0x%x\n", + (u64) dev->catas_err.addr, dev->catas_err.size)); + + + if (mthca_is_memfree(dev)) { + MTHCA_GET(dev->fw.arbel.fw_pages, outbox, QUERY_FW_SIZE_OFFSET); + MTHCA_GET(dev->fw.arbel.clr_int_base, outbox, QUERY_FW_CLR_INT_BASE_OFFSET); + MTHCA_GET(dev->fw.arbel.eq_arm_base, outbox, QUERY_FW_EQ_ARM_BASE_OFFSET); + MTHCA_GET(dev->fw.arbel.eq_set_ci_base, outbox, QUERY_FW_EQ_SET_CI_BASE_OFFSET); + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("FW size %d KB\n", dev->fw.arbel.fw_pages << 2)); + + /* + * Arbel page size is always 4 KB; round up number of + * system pages needed. + */ + dev->fw.arbel.fw_pages = + ALIGN(dev->fw.arbel.fw_pages, PAGE_SIZE >> 12) >> + (PAGE_SHIFT - 12); + + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Clear int @ %I64x, EQ arm @ %I64x, EQ set CI @ %I64x\n", + (u64) dev->fw.arbel.clr_int_base, + (u64) dev->fw.arbel.eq_arm_base, + (u64) dev->fw.arbel.eq_set_ci_base)); + } else { + MTHCA_GET(dev->fw.tavor.fw_start, outbox, QUERY_FW_START_OFFSET); + MTHCA_GET(dev->fw.tavor.fw_end, outbox, QUERY_FW_END_OFFSET); + + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("FW size %d KB (start %I64x, end %I64x)\n", + (int) ((dev->fw.tavor.fw_end - dev->fw.tavor.fw_start) >> 10), + (u64) dev->fw.tavor.fw_start, + (u64) dev->fw.tavor.fw_end)); + } + +out: + mthca_free_mailbox(dev, mailbox); + return err; +} + +int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status) +{ + struct mthca_mailbox *mailbox; + u8 info; + u32 *outbox; + int err = 0; + +#define ENABLE_LAM_OUT_SIZE 0x100 +#define ENABLE_LAM_START_OFFSET 0x00 +#define ENABLE_LAM_END_OFFSET 0x08 +#define ENABLE_LAM_INFO_OFFSET 0x13 + +#define ENABLE_LAM_INFO_HIDDEN_FLAG (1 << 4) +#define ENABLE_LAM_INFO_ECC_MASK 0x3 + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; + + err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_ENABLE_LAM, + CMD_TIME_CLASS_C, status); + + if (err) + goto out; + + if (*status == MTHCA_CMD_STAT_LAM_NOT_PRE) + goto out; + + MTHCA_GET(dev->ddr_start, outbox, ENABLE_LAM_START_OFFSET); + MTHCA_GET(dev->ddr_end, outbox, ENABLE_LAM_END_OFFSET); + MTHCA_GET(info, outbox, ENABLE_LAM_INFO_OFFSET); + + if (!!(info & ENABLE_LAM_INFO_HIDDEN_FLAG) != + !!(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) { + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_LOW ,("FW reports that HCA-attached memory " + "is %s hidden; does not match PCI config\n", + (info & ENABLE_LAM_INFO_HIDDEN_FLAG)? + "" : "not")); + } + if (info & ENABLE_LAM_INFO_HIDDEN_FLAG) + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("HCA-attached memory is hidden.\n")); + + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("HCA memory size %d KB (start %I64x, end %I64x)\n", + (int) ((dev->ddr_end - dev->ddr_start) >> 10), + (u64) dev->ddr_start, + (u64) dev->ddr_end)); + +out: + mthca_free_mailbox(dev, mailbox); + return err; +} + +int mthca_DISABLE_LAM(struct mthca_dev *dev, u8 *status) +{ + return mthca_cmd(dev, 0, 0, 0, CMD_SYS_DIS, CMD_TIME_CLASS_C, status); +} + +int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status) +{ + struct mthca_mailbox *mailbox; + u8 info; + u32 *outbox; + int err = 0; + +#define QUERY_DDR_OUT_SIZE 0x100 +#define QUERY_DDR_START_OFFSET 0x00 +#define QUERY_DDR_END_OFFSET 0x08 +#define QUERY_DDR_INFO_OFFSET 0x13 + +#define QUERY_DDR_INFO_HIDDEN_FLAG (1 << 4) +#define QUERY_DDR_INFO_ECC_MASK 0x3 + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; + + err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_DDR, + CMD_TIME_CLASS_A, status); + + if (err) + goto out; + + MTHCA_GET(dev->ddr_start, outbox, QUERY_DDR_START_OFFSET); + MTHCA_GET(dev->ddr_end, outbox, QUERY_DDR_END_OFFSET); + MTHCA_GET(info, outbox, QUERY_DDR_INFO_OFFSET); + + if (!!(info & QUERY_DDR_INFO_HIDDEN_FLAG) != + !!(dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) { + + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_LOW ,("FW reports that HCA-attached memory " + "is %s hidden; does not match PCI config\n", + (info & QUERY_DDR_INFO_HIDDEN_FLAG) ? + "" : "not")); + } + if (info & QUERY_DDR_INFO_HIDDEN_FLAG) + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("HCA-attached memory is hidden.\n")); + + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("HCA memory size %d KB (start %I64x, end %I64x)\n", + (int) ((dev->ddr_end - dev->ddr_start) >> 10), + (u64) dev->ddr_start, + (u64) dev->ddr_end)); + +out: + mthca_free_mailbox(dev, mailbox); + return err; +} + +int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, + struct mthca_dev_lim *dev_lim, u8 *status) +{ + struct mthca_mailbox *mailbox; + u32 *outbox; + u8 field; + u16 size; + int err; + +#define QUERY_DEV_LIM_OUT_SIZE 0x100 +#define QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET 0x10 +#define QUERY_DEV_LIM_MAX_QP_SZ_OFFSET 0x11 +#define QUERY_DEV_LIM_RSVD_QP_OFFSET 0x12 +#define QUERY_DEV_LIM_MAX_QP_OFFSET 0x13 +#define QUERY_DEV_LIM_RSVD_SRQ_OFFSET 0x14 +#define QUERY_DEV_LIM_MAX_SRQ_OFFSET 0x15 +#define QUERY_DEV_LIM_RSVD_EEC_OFFSET 0x16 +#define QUERY_DEV_LIM_MAX_EEC_OFFSET 0x17 +#define QUERY_DEV_LIM_MAX_CQ_SZ_OFFSET 0x19 +#define QUERY_DEV_LIM_RSVD_CQ_OFFSET 0x1a +#define QUERY_DEV_LIM_MAX_CQ_OFFSET 0x1b +#define QUERY_DEV_LIM_MAX_MPT_OFFSET 0x1d +#define QUERY_DEV_LIM_RSVD_EQ_OFFSET 0x1e +#define QUERY_DEV_LIM_MAX_EQ_OFFSET 0x1f +#define QUERY_DEV_LIM_RSVD_MTT_OFFSET 0x20 +#define QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET 0x21 +#define QUERY_DEV_LIM_RSVD_MRW_OFFSET 0x22 +#define QUERY_DEV_LIM_MAX_MTT_SEG_OFFSET 0x23 +#define QUERY_DEV_LIM_MAX_AV_OFFSET 0x27 +#define QUERY_DEV_LIM_MAX_REQ_QP_OFFSET 0x29 +#define QUERY_DEV_LIM_MAX_RES_QP_OFFSET 0x2b +#define QUERY_DEV_LIM_MAX_RDMA_OFFSET 0x2f +#define QUERY_DEV_LIM_RSZ_SRQ_OFFSET 0x33 +#define QUERY_DEV_LIM_ACK_DELAY_OFFSET 0x35 +#define QUERY_DEV_LIM_MTU_WIDTH_OFFSET 0x36 +#define QUERY_DEV_LIM_VL_PORT_OFFSET 0x37 +#define QUERY_DEV_LIM_MAX_GID_OFFSET 0x3b +#define QUERY_DEV_LIM_MAX_PKEY_OFFSET 0x3f +#define QUERY_DEV_LIM_FLAGS_OFFSET 0x44 +#define QUERY_DEV_LIM_RSVD_UAR_OFFSET 0x48 +#define QUERY_DEV_LIM_UAR_SZ_OFFSET 0x49 +#define QUERY_DEV_LIM_PAGE_SZ_OFFSET 0x4b +#define QUERY_DEV_LIM_MAX_SG_OFFSET 0x51 +#define QUERY_DEV_LIM_MAX_DESC_SZ_OFFSET 0x52 +#define QUERY_DEV_LIM_MAX_SG_RQ_OFFSET 0x55 +#define QUERY_DEV_LIM_MAX_DESC_SZ_RQ_OFFSET 0x56 +#define QUERY_DEV_LIM_MAX_QP_MCG_OFFSET 0x61 +#define QUERY_DEV_LIM_RSVD_MCG_OFFSET 0x62 +#define QUERY_DEV_LIM_MAX_MCG_OFFSET 0x63 +#define QUERY_DEV_LIM_RSVD_PD_OFFSET 0x64 +#define QUERY_DEV_LIM_MAX_PD_OFFSET 0x65 +#define QUERY_DEV_LIM_RSVD_RDD_OFFSET 0x66 +#define QUERY_DEV_LIM_MAX_RDD_OFFSET 0x67 +#define QUERY_DEV_LIM_EEC_ENTRY_SZ_OFFSET 0x80 +#define QUERY_DEV_LIM_QPC_ENTRY_SZ_OFFSET 0x82 +#define QUERY_DEV_LIM_EEEC_ENTRY_SZ_OFFSET 0x84 +#define QUERY_DEV_LIM_EQPC_ENTRY_SZ_OFFSET 0x86 +#define QUERY_DEV_LIM_EQC_ENTRY_SZ_OFFSET 0x88 +#define QUERY_DEV_LIM_CQC_ENTRY_SZ_OFFSET 0x8a +#define QUERY_DEV_LIM_SRQ_ENTRY_SZ_OFFSET 0x8c +#define QUERY_DEV_LIM_UAR_ENTRY_SZ_OFFSET 0x8e +#define QUERY_DEV_LIM_MTT_ENTRY_SZ_OFFSET 0x90 +#define QUERY_DEV_LIM_MPT_ENTRY_SZ_OFFSET 0x92 +#define QUERY_DEV_LIM_PBL_SZ_OFFSET 0x96 +#define QUERY_DEV_LIM_BMME_FLAGS_OFFSET 0x97 +#define QUERY_DEV_LIM_RSVD_LKEY_OFFSET 0x98 +#define QUERY_DEV_LIM_LAMR_OFFSET 0x9f +#define QUERY_DEV_LIM_MAX_ICM_SZ_OFFSET 0xa0 + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; + + err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_DEV_LIM, + CMD_TIME_CLASS_A, status); + + if (err) + goto out; + + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_QP_OFFSET); + dev_lim->reserved_qps = 1 << (field & 0xf); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_OFFSET); + dev_lim->max_qps = 1 << (field & 0x1f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_SRQ_OFFSET); + dev_lim->reserved_srqs = 1 << (field >> 4); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_OFFSET); + dev_lim->max_srqs = 1 << (field & 0x1f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_EEC_OFFSET); + dev_lim->reserved_eecs = 1 << (field & 0xf); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_EEC_OFFSET); + dev_lim->max_eecs = 1 << (field & 0x1f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_CQ_SZ_OFFSET); + dev_lim->max_cq_sz = 1 << field; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_CQ_OFFSET); + dev_lim->reserved_cqs = 1 << (field & 0xf); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_CQ_OFFSET); + dev_lim->max_cqs = 1 << (field & 0x1f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MPT_OFFSET); + dev_lim->max_mpts = 1 << (field & 0x3f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_EQ_OFFSET); + dev_lim->reserved_eqs = 1 << (field & 0xf); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_EQ_OFFSET); + dev_lim->max_eqs = 1 << (field & 0x7); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MTT_OFFSET); + dev_lim->reserved_mtts = 1 << (field >> 4); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET); + dev_lim->max_mrw_sz = 1 << field; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MRW_OFFSET); + dev_lim->reserved_mrws = 1 << (field & 0xf); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MTT_SEG_OFFSET); + dev_lim->max_mtt_seg = 1 << (field & 0x3f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_REQ_QP_OFFSET); + dev_lim->max_requester_per_qp = 1 << (field & 0x3f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_RES_QP_OFFSET); + dev_lim->max_responder_per_qp = 1 << (field & 0x3f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_RDMA_OFFSET); + dev_lim->max_rdma_global = 1 << (field & 0x3f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_ACK_DELAY_OFFSET); + dev_lim->local_ca_ack_delay = field & 0x1f; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MTU_WIDTH_OFFSET); + dev_lim->max_mtu = field >> 4; + dev_lim->max_port_width = field & 0xf; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_VL_PORT_OFFSET); + dev_lim->max_vl = field >> 4; + dev_lim->num_ports = field & 0xf; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_GID_OFFSET); + dev_lim->max_gids = 1 << (field & 0xf); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_PKEY_OFFSET); + dev_lim->max_pkeys = 1 << (field & 0xf); + MTHCA_GET(dev_lim->flags, outbox, QUERY_DEV_LIM_FLAGS_OFFSET); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_UAR_OFFSET); + dev_lim->reserved_uars = field >> 4; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_UAR_SZ_OFFSET); + dev_lim->uar_size = 1 << ((field & 0x3f) + 20); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_PAGE_SZ_OFFSET); + dev_lim->min_page_sz = 1 << field; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_OFFSET); + dev_lim->max_sg = field; + + MTHCA_GET(size, outbox, QUERY_DEV_LIM_MAX_DESC_SZ_OFFSET); + dev_lim->max_desc_sz = size; + + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_MCG_OFFSET); + dev_lim->max_qp_per_mcg = 1 << field; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MCG_OFFSET); + dev_lim->reserved_mgms = field & 0xf; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MCG_OFFSET); + dev_lim->max_mcgs = 1 << field; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_PD_OFFSET); + dev_lim->reserved_pds = field >> 4; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_PD_OFFSET); + dev_lim->max_pds = 1 << (field & 0x3f); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_RDD_OFFSET); + dev_lim->reserved_rdds = field >> 4; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_RDD_OFFSET); + dev_lim->max_rdds = 1 << (field & 0x3f); + + MTHCA_GET(size, outbox, QUERY_DEV_LIM_EEC_ENTRY_SZ_OFFSET); + dev_lim->eec_entry_sz = size; + MTHCA_GET(size, outbox, QUERY_DEV_LIM_QPC_ENTRY_SZ_OFFSET); + dev_lim->qpc_entry_sz = size; + MTHCA_GET(size, outbox, QUERY_DEV_LIM_EEEC_ENTRY_SZ_OFFSET); + dev_lim->eeec_entry_sz = size; + MTHCA_GET(size, outbox, QUERY_DEV_LIM_EQPC_ENTRY_SZ_OFFSET); + dev_lim->eqpc_entry_sz = size; + MTHCA_GET(size, outbox, QUERY_DEV_LIM_EQC_ENTRY_SZ_OFFSET); + dev_lim->eqc_entry_sz = size; + MTHCA_GET(size, outbox, QUERY_DEV_LIM_CQC_ENTRY_SZ_OFFSET); + dev_lim->cqc_entry_sz = size; + MTHCA_GET(size, outbox, QUERY_DEV_LIM_SRQ_ENTRY_SZ_OFFSET); + dev_lim->srq_entry_sz = size; + MTHCA_GET(size, outbox, QUERY_DEV_LIM_UAR_ENTRY_SZ_OFFSET); + dev_lim->uar_scratch_entry_sz = size; + + if (mthca_is_memfree(dev)) { + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET); + dev_lim->max_srq_sz = 1 << field; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET); + dev_lim->max_qp_sz = 1 << field; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSZ_SRQ_OFFSET); + dev_lim->hca.arbel.resize_srq = field & 1; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_RQ_OFFSET); + dev_lim->max_sg = min(field, dev_lim->max_sg); + MTHCA_GET(size, outbox, QUERY_DEV_LIM_MAX_DESC_SZ_RQ_OFFSET); + dev_lim->max_desc_sz = min((int)size, dev_lim->max_desc_sz); + MTHCA_GET(size, outbox, QUERY_DEV_LIM_MPT_ENTRY_SZ_OFFSET); + dev_lim->mpt_entry_sz = size; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_PBL_SZ_OFFSET); + dev_lim->hca.arbel.max_pbl_sz = 1 << (field & 0x3f); + MTHCA_GET(dev_lim->hca.arbel.bmme_flags, outbox, + QUERY_DEV_LIM_BMME_FLAGS_OFFSET); + MTHCA_GET(dev_lim->hca.arbel.reserved_lkey, outbox, + QUERY_DEV_LIM_RSVD_LKEY_OFFSET); + MTHCA_GET(field, outbox, QUERY_DEV_LIM_LAMR_OFFSET); + dev_lim->hca.arbel.lam_required = field & 1; + MTHCA_GET(dev_lim->hca.arbel.max_icm_sz, outbox, + QUERY_DEV_LIM_MAX_ICM_SZ_OFFSET); + + if (dev_lim->hca.arbel.bmme_flags & 1){ + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Base MM extensions: yes " + "(flags %d, max PBL %d, rsvd L_Key %08x)\n", + dev_lim->hca.arbel.bmme_flags, + dev_lim->hca.arbel.max_pbl_sz, + dev_lim->hca.arbel.reserved_lkey)); + }else{ + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("Base MM extensions: no\n")); + } + + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Max ICM size %I64d MB\n", + (u64) dev_lim->hca.arbel.max_icm_sz >> 20)); + } + else { + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET); + dev_lim->max_srq_sz = (1 << field) - 1; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET); + dev_lim->max_qp_sz = (1 << field) - 1; + MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_AV_OFFSET); + dev_lim->hca.tavor.max_avs = 1 << (field & 0x3f); + dev_lim->mpt_entry_sz = MTHCA_MPT_ENTRY_SIZE; + } + + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Max QPs: %d, reserved QPs: %d, entry size: %d\n", + dev_lim->max_qps, dev_lim->reserved_qps, dev_lim->qpc_entry_sz)); + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Max SRQs: %d, reserved SRQs: %d, entry size: %d\n", + dev_lim->max_srqs, dev_lim->reserved_srqs, dev_lim->srq_entry_sz)); + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Max CQs: %d, reserved CQs: %d, entry size: %d\n", + dev_lim->max_cqs, dev_lim->reserved_cqs, dev_lim->cqc_entry_sz)); + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Max EQs: %d, reserved EQs: %d, entry size: %d\n", + dev_lim->max_eqs, dev_lim->reserved_eqs, dev_lim->eqc_entry_sz)); + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("reserved MPTs: %d, reserved MTTs: %d\n", + dev_lim->reserved_mrws, dev_lim->reserved_mtts)); + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Max PDs: %d, reserved PDs: %d, reserved UARs: %d\n", + dev_lim->max_pds, dev_lim->reserved_pds, dev_lim->reserved_uars)); + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Max QP/MCG: %d, reserved MGMs: %d\n", + dev_lim->max_pds, dev_lim->reserved_mgms)); + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n", + dev_lim->max_cq_sz, dev_lim->max_qp_sz, dev_lim->max_srq_sz)); + + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("Flags: %08x\n", dev_lim->flags)); + +out: + mthca_free_mailbox(dev, mailbox); + return err; +} + +static void get_board_id(u8 *vsd, char *board_id) +{ + int i; + +#define VSD_OFFSET_SIG1 0x00 +#define VSD_OFFSET_SIG2 0xde +#define VSD_OFFSET_MLX_BOARD_ID 0xd0 +#define VSD_OFFSET_TS_BOARD_ID 0x20 + +#define VSD_SIGNATURE_TOPSPIN 0x5ad + + RtlZeroMemory(board_id, MTHCA_BOARD_ID_LEN); + + if (cl_ntoh16(*(u16*)(vsd + VSD_OFFSET_SIG1)) == VSD_SIGNATURE_TOPSPIN && + cl_ntoh16(*(u16*)(vsd + VSD_OFFSET_SIG2)) == VSD_SIGNATURE_TOPSPIN) { + strlcpy(board_id, (const char *)(vsd + VSD_OFFSET_TS_BOARD_ID), MTHCA_BOARD_ID_LEN); + } else { + /* + * The board ID is a string but the firmware byte + * swaps each 4-byte word before passing it back to + * us. Therefore we need to swab it before printing. + */ + for (i = 0; i < 4; ++i) + ((u32 *) board_id)[i] = + _byteswap_ulong(*(u32 *) (vsd + VSD_OFFSET_MLX_BOARD_ID + i * 4)); + } +} + +int mthca_QUERY_ADAPTER(struct mthca_dev *dev, + struct mthca_adapter *adapter, u8 *status) +{ + struct mthca_mailbox *mailbox; + u32 *outbox; + int err; + +#define QUERY_ADAPTER_OUT_SIZE 0x100 +#define QUERY_ADAPTER_VENDOR_ID_OFFSET 0x00 +#define QUERY_ADAPTER_DEVICE_ID_OFFSET 0x04 +#define QUERY_ADAPTER_REVISION_ID_OFFSET 0x08 +#define QUERY_ADAPTER_INTA_PIN_OFFSET 0x10 +#define QUERY_ADAPTER_VSD_OFFSET 0x20 + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; + + err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_ADAPTER, + CMD_TIME_CLASS_A, status); + + if (err) + goto out; + + MTHCA_GET(adapter->vendor_id, outbox, QUERY_ADAPTER_VENDOR_ID_OFFSET); + MTHCA_GET(adapter->device_id, outbox, QUERY_ADAPTER_DEVICE_ID_OFFSET); + MTHCA_GET(adapter->revision_id, outbox, QUERY_ADAPTER_REVISION_ID_OFFSET); + MTHCA_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET); + + get_board_id((u8*)outbox + QUERY_ADAPTER_VSD_OFFSET, + adapter->board_id); + +out: + mthca_free_mailbox(dev, mailbox); + return err; +} + +int mthca_INIT_HCA(struct mthca_dev *dev, + struct mthca_init_hca_param *param, + u8 *status) +{ + struct mthca_mailbox *mailbox; + __be32 *inbox; + int err; + +#define INIT_HCA_IN_SIZE 0x200 +#define INIT_HCA_FLAGS_OFFSET 0x014 +#define INIT_HCA_QPC_OFFSET 0x020 +#define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10) +#define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17) +#define INIT_HCA_EEC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x20) +#define INIT_HCA_LOG_EEC_OFFSET (INIT_HCA_QPC_OFFSET + 0x27) +#define INIT_HCA_SRQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x28) +#define INIT_HCA_LOG_SRQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x2f) +#define INIT_HCA_CQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x30) +#define INIT_HCA_LOG_CQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x37) +#define INIT_HCA_EQPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x40) +#define INIT_HCA_EEEC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x50) +#define INIT_HCA_EQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x60) +#define INIT_HCA_LOG_EQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x67) +#define INIT_HCA_RDB_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x70) +#define INIT_HCA_UDAV_OFFSET 0x0b0 +#define INIT_HCA_UDAV_LKEY_OFFSET (INIT_HCA_UDAV_OFFSET + 0x0) +#define INIT_HCA_UDAV_PD_OFFSET (INIT_HCA_UDAV_OFFSET + 0x4) +#define INIT_HCA_MCAST_OFFSET 0x0c0 +#define INIT_HCA_MC_BASE_OFFSET (INIT_HCA_MCAST_OFFSET + 0x00) +#define INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x12) +#define INIT_HCA_MC_HASH_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x16) +#define INIT_HCA_LOG_MC_TABLE_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x1b) +#define INIT_HCA_TPT_OFFSET 0x0f0 +#define INIT_HCA_MPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x00) +#define INIT_HCA_MTT_SEG_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x09) +#define INIT_HCA_LOG_MPT_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x0b) +#define INIT_HCA_MTT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x10) +#define INIT_HCA_UAR_OFFSET 0x120 +#define INIT_HCA_UAR_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x00) +#define INIT_HCA_UARC_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x09) +#define INIT_HCA_LOG_UAR_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0a) +#define INIT_HCA_UAR_PAGE_SZ_OFFSET (INIT_HCA_UAR_OFFSET + 0x0b) +#define INIT_HCA_UAR_SCATCH_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x10) +#define INIT_HCA_UAR_CTX_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x18) + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; + + RtlZeroMemory(inbox, INIT_HCA_IN_SIZE); + +#if defined(__LITTLE_ENDIAN) + *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cl_hton32(1 << 1); +#elif defined(__BIG_ENDIAN) + *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cl_hton32(1 << 1); +#else +#error Host endianness not defined +#endif + /* Check port for UD address vector: */ + *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cl_hton32(1); + + /* We leave wqe_quota, responder_exu, etc as 0 (default) */ + + /* QPC/EEC/CQC/EQC/RDB attributes */ + + MTHCA_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET); + MTHCA_PUT(inbox, param->log_num_qps, INIT_HCA_LOG_QP_OFFSET); + MTHCA_PUT(inbox, param->eec_base, INIT_HCA_EEC_BASE_OFFSET); + MTHCA_PUT(inbox, param->log_num_eecs, INIT_HCA_LOG_EEC_OFFSET); + MTHCA_PUT(inbox, param->srqc_base, INIT_HCA_SRQC_BASE_OFFSET); + MTHCA_PUT(inbox, param->log_num_srqs, INIT_HCA_LOG_SRQ_OFFSET); + MTHCA_PUT(inbox, param->cqc_base, INIT_HCA_CQC_BASE_OFFSET); + MTHCA_PUT(inbox, param->log_num_cqs, INIT_HCA_LOG_CQ_OFFSET); + MTHCA_PUT(inbox, param->eqpc_base, INIT_HCA_EQPC_BASE_OFFSET); + MTHCA_PUT(inbox, param->eeec_base, INIT_HCA_EEEC_BASE_OFFSET); + MTHCA_PUT(inbox, param->eqc_base, INIT_HCA_EQC_BASE_OFFSET); + MTHCA_PUT(inbox, param->log_num_eqs, INIT_HCA_LOG_EQ_OFFSET); + MTHCA_PUT(inbox, param->rdb_base, INIT_HCA_RDB_BASE_OFFSET); + + /* UD AV attributes */ + + /* multicast attributes */ + + MTHCA_PUT(inbox, param->mc_base, INIT_HCA_MC_BASE_OFFSET); + MTHCA_PUT(inbox, param->log_mc_entry_sz, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET); + MTHCA_PUT(inbox, param->mc_hash_sz, INIT_HCA_MC_HASH_SZ_OFFSET); + MTHCA_PUT(inbox, param->log_mc_table_sz, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); + + /* TPT attributes */ + + MTHCA_PUT(inbox, param->mpt_base, INIT_HCA_MPT_BASE_OFFSET); + if (!mthca_is_memfree(dev)) + MTHCA_PUT(inbox, param->mtt_seg_sz, INIT_HCA_MTT_SEG_SZ_OFFSET); + MTHCA_PUT(inbox, param->log_mpt_sz, INIT_HCA_LOG_MPT_SZ_OFFSET); + MTHCA_PUT(inbox, param->mtt_base, INIT_HCA_MTT_BASE_OFFSET); + + /* UAR attributes */ + { + u8 uar_page_sz = PAGE_SHIFT - 12; + MTHCA_PUT(inbox, uar_page_sz, INIT_HCA_UAR_PAGE_SZ_OFFSET); + } + + MTHCA_PUT(inbox, param->uar_scratch_base, INIT_HCA_UAR_SCATCH_BASE_OFFSET); + + if (mthca_is_memfree(dev)) { + MTHCA_PUT(inbox, param->log_uarc_sz, INIT_HCA_UARC_SZ_OFFSET); + MTHCA_PUT(inbox, param->log_uar_sz, INIT_HCA_LOG_UAR_SZ_OFFSET); + MTHCA_PUT(inbox, param->uarc_base, INIT_HCA_UAR_CTX_BASE_OFFSET); + } + + err = mthca_cmd(dev, mailbox->dma, 0, 0, CMD_INIT_HCA, HZ, status); + + mthca_free_mailbox(dev, mailbox); + return err; +} + +int mthca_INIT_IB(struct mthca_dev *dev, + struct mthca_init_ib_param *param, + int port, u8 *status) +{ + struct mthca_mailbox *mailbox; + u32 *inbox; + int err; + u32 flags; + +#define INIT_IB_IN_SIZE 56 +#define INIT_IB_FLAGS_OFFSET 0x00 +#define INIT_IB_FLAG_SIG (1 << 18) +#define INIT_IB_FLAG_NG (1 << 17) +#define INIT_IB_FLAG_G0 (1 << 16) +#define INIT_IB_VL_SHIFT 4 +#define INIT_IB_PORT_WIDTH_SHIFT 8 +#define INIT_IB_MTU_SHIFT 12 +#define INIT_IB_MAX_GID_OFFSET 0x06 +#define INIT_IB_MAX_PKEY_OFFSET 0x0a +#define INIT_IB_GUID0_OFFSET 0x10 +#define INIT_IB_NODE_GUID_OFFSET 0x18 +#define INIT_IB_SI_GUID_OFFSET 0x20 + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; + + RtlZeroMemory(inbox, INIT_IB_IN_SIZE); + + flags = 0; + flags |= param->set_guid0 ? INIT_IB_FLAG_G0 : 0; + flags |= param->set_node_guid ? INIT_IB_FLAG_NG : 0; + flags |= param->set_si_guid ? INIT_IB_FLAG_SIG : 0; + flags |= param->vl_cap << INIT_IB_VL_SHIFT; + flags |= param->port_width << INIT_IB_PORT_WIDTH_SHIFT; + flags |= param->mtu_cap << INIT_IB_MTU_SHIFT; + MTHCA_PUT(inbox, flags, INIT_IB_FLAGS_OFFSET); + + MTHCA_PUT(inbox, param->gid_cap, INIT_IB_MAX_GID_OFFSET); + MTHCA_PUT(inbox, param->pkey_cap, INIT_IB_MAX_PKEY_OFFSET); + MTHCA_PUT(inbox, param->guid0, INIT_IB_GUID0_OFFSET); + MTHCA_PUT(inbox, param->node_guid, INIT_IB_NODE_GUID_OFFSET); + MTHCA_PUT(inbox, param->si_guid, INIT_IB_SI_GUID_OFFSET); + + err = mthca_cmd(dev, mailbox->dma, port, 0, CMD_INIT_IB, + CMD_TIME_CLASS_A, status); + + mthca_free_mailbox(dev, mailbox); + return err; +} + +int mthca_CLOSE_IB(struct mthca_dev *dev, int port, u8 *status) +{ + return mthca_cmd(dev, 0, port, 0, CMD_CLOSE_IB, HZ, status); +} + +int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic, u8 *status) +{ + return mthca_cmd(dev, 0, 0, (u8)panic, CMD_CLOSE_HCA, HZ, status); +} + +int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param, + int port, u8 *status) +{ + struct mthca_mailbox *mailbox; + u32 *inbox; + int err; + u32 flags = 0; + +#define SET_IB_IN_SIZE 0x40 +#define SET_IB_FLAGS_OFFSET 0x00 +#define SET_IB_FLAG_SIG (1 << 18) +#define SET_IB_FLAG_RQK (1 << 0) +#define SET_IB_CAP_MASK_OFFSET 0x04 +#define SET_IB_SI_GUID_OFFSET 0x08 + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; + + RtlZeroMemory(inbox, SET_IB_IN_SIZE); + + flags |= param->set_si_guid ? SET_IB_FLAG_SIG : 0; + flags |= param->reset_qkey_viol ? SET_IB_FLAG_RQK : 0; + MTHCA_PUT(inbox, flags, SET_IB_FLAGS_OFFSET); + + MTHCA_PUT(inbox, param->cap_mask, SET_IB_CAP_MASK_OFFSET); + MTHCA_PUT(inbox, param->si_guid, SET_IB_SI_GUID_OFFSET); + + err = mthca_cmd(dev, mailbox->dma, port, 0, CMD_SET_IB, + CMD_TIME_CLASS_B, status); + + mthca_free_mailbox(dev, mailbox); + return err; +} + +int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *status) +{ + return mthca_map_cmd(dev, CMD_MAP_ICM, icm, virt, status); +} + +int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status) +{ + struct mthca_mailbox *mailbox; + __be64 *inbox; + int err; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; + + inbox[0] = cl_hton64(virt); + inbox[1] = cl_hton64(dma_addr); + + err = mthca_cmd(dev, mailbox->dma, 1, 0, CMD_MAP_ICM, + CMD_TIME_CLASS_B, status); + + mthca_free_mailbox(dev, mailbox); + + if (!err) + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Mapped page at %I64x to %I64x for ICM.\n", + (u64) dma_addr, (u64) virt)); + + return err; +} + +int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count, u8 *status) +{ + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Unmapping %d pages at %I64x from ICM.\n", + page_count, (u64) virt)); + + return mthca_cmd(dev, virt, page_count, 0, CMD_UNMAP_ICM, CMD_TIME_CLASS_B, status); +} + +int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status) +{ + return mthca_map_cmd(dev, CMD_MAP_ICM_AUX, icm, (u64)-1, status); +} + +int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev, u8 *status) +{ + return mthca_cmd(dev, 0, 0, 0, CMD_UNMAP_ICM_AUX, CMD_TIME_CLASS_B, status); +} + +int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages, + u8 *status) +{ + int ret = mthca_cmd_imm(dev, icm_size, aux_pages, 0, 0, CMD_SET_ICM_SIZE, + CMD_TIME_CLASS_A, status); + + if (ret || status) + return ret; + + /* + * Arbel page size is always 4 KB; round up number of system + * pages needed. + */ + *aux_pages = (*aux_pages + (1 << (PAGE_SHIFT - 12)) - 1) >> (PAGE_SHIFT - 12); + *aux_pages = ALIGN(*aux_pages, PAGE_SIZE >> 12) >> (PAGE_SHIFT - 12); + + return 0; +} + +int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int mpt_index, u8 *status) +{ + return mthca_cmd(dev, mailbox->dma, mpt_index, 0, CMD_SW2HW_MPT, + CMD_TIME_CLASS_B, status); +} + +int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int mpt_index, u8 *status) +{ + return mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, mpt_index, + (u8)!mailbox, CMD_HW2SW_MPT, + CMD_TIME_CLASS_B, status); +} + +int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int num_mtt, u8 *status) +{ + return mthca_cmd(dev, mailbox->dma, num_mtt, 0, CMD_WRITE_MTT, + CMD_TIME_CLASS_B, status); +} + +int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status) +{ + return mthca_cmd(dev, 0, 0, 0, CMD_SYNC_TPT, CMD_TIME_CLASS_B, status); +} + +int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap, + int eq_num, u8 *status) +{ + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("%s mask %016I64x for eqn %d\n", + unmap ? "Clearing" : "Setting", + (u64) event_mask, eq_num)); + return mthca_cmd(dev, event_mask, (unmap << 31) | eq_num, + 0, CMD_MAP_EQ, CMD_TIME_CLASS_B, status); +} + +int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int eq_num, u8 *status) +{ + return mthca_cmd(dev, mailbox->dma, eq_num, 0, CMD_SW2HW_EQ, + CMD_TIME_CLASS_A, status); +} + +int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int eq_num, u8 *status) +{ + return mthca_cmd_box(dev, 0, mailbox->dma, eq_num, 0, + CMD_HW2SW_EQ, + CMD_TIME_CLASS_A, status); +} + +int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int cq_num, u8 *status) +{ + return mthca_cmd(dev, mailbox->dma, cq_num, 0, CMD_SW2HW_CQ, + CMD_TIME_CLASS_A, status); +} + +int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int cq_num, u8 *status) +{ + return mthca_cmd_box(dev, 0, mailbox->dma, cq_num, 0, + CMD_HW2SW_CQ, + CMD_TIME_CLASS_A, status); +} + +int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int srq_num, u8 *status) +{ + return mthca_cmd(dev, mailbox->dma, srq_num, 0, CMD_SW2HW_SRQ, + CMD_TIME_CLASS_A, status); +} + +int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int srq_num, u8 *status) +{ + return mthca_cmd_box(dev, 0, mailbox->dma, srq_num, 0, + CMD_HW2SW_SRQ, + CMD_TIME_CLASS_A, status); +} + +int mthca_QUERY_SRQ(struct mthca_dev *dev, u32 num, + struct mthca_mailbox *mailbox, u8 *status) +{ + return mthca_cmd_box(dev, 0, mailbox->dma, num, 0, + CMD_QUERY_SRQ, CMD_TIME_CLASS_A, status); +} + +int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit, u8 *status) +{ + return mthca_cmd(dev, limit, srq_num, 0, CMD_ARM_SRQ, + CMD_TIME_CLASS_B, status); +} + +int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, + int is_ee, struct mthca_mailbox *mailbox, u32 optmask, + u8 *status) +{ + enum { + MTHCA_TRANS_INVALID = 0, + MTHCA_TRANS_RST2INIT, + MTHCA_TRANS_INIT2INIT, + MTHCA_TRANS_INIT2RTR, + MTHCA_TRANS_RTR2RTS, + MTHCA_TRANS_RTS2RTS, + MTHCA_TRANS_SQERR2RTS, + MTHCA_TRANS_ANY2ERR, + MTHCA_TRANS_RTS2SQD, + MTHCA_TRANS_SQD2SQD, + MTHCA_TRANS_SQD2RTS, + MTHCA_TRANS_ANY2RST, + }; + static const u16 op[] = { + 0, /* MTHCA_TRANS_INVALID */ + CMD_RST2INIT_QPEE, /* MTHCA_TRANS_RST2INIT */ + CMD_INIT2INIT_QPEE, /* MTHCA_TRANS_INIT2INIT */ + CMD_INIT2RTR_QPEE, /* MTHCA_TRANS_INIT2RTR */ + CMD_RTR2RTS_QPEE, /* MTHCA_TRANS_RTR2RTS */ + CMD_RTS2RTS_QPEE, /* MTHCA_TRANS_RTS2RTS */ + CMD_SQERR2RTS_QPEE, /* MTHCA_TRANS_SQERR2RTS */ + CMD_2ERR_QPEE, /* MTHCA_TRANS_ANY2ERR */ + CMD_RTS2SQD_QPEE, /* MTHCA_TRANS_RTS2SQD */ + CMD_SQD2SQD_QPEE, /* MTHCA_TRANS_SQD2SQD */ + CMD_SQD2RTS_QPEE, /* MTHCA_TRANS_SQD2RTS */ + CMD_ERR2RST_QPEE /* MTHCA_TRANS_ANY2RST */ + }; + u8 op_mod = 0; + int my_mailbox = 0; + int err; + + UNREFERENCED_PARAMETER(optmask); + + if (trans < 0 || trans >= ARRAY_SIZE(op)) + return -EINVAL; + + if (trans == MTHCA_TRANS_ANY2RST) { + op_mod = 3; /* don't write outbox, any->reset */ + + /* For debugging */ + if (!mailbox) { + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (!IS_ERR(mailbox)) { + my_mailbox = 1; + op_mod = 2; /* write outbox, any->reset */ + } else + mailbox = NULL; + } + } else { + { // debug print + int i; + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_QP ,("Dumping QP context:\n")); + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_QP ,(" opt param mask: %08x\n", cl_ntoh32(*(__be32 *)mailbox->buf))); + for (i = 2; i < 0x100 / 4; i=i+4) { + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_QP ,(" [%02x] %08x %08x %08x %08x\n",i-2, + cl_ntoh32(((__be32 *) mailbox->buf)[i ]), + cl_ntoh32(((__be32 *) mailbox->buf)[i + 1]), + cl_ntoh32(((__be32 *) mailbox->buf)[i + 2]), + cl_ntoh32(((__be32 *) mailbox->buf)[i + 3]))); + } + } + } + + if (trans == MTHCA_TRANS_ANY2RST) { + err = mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, + (!!is_ee << 24) | num, op_mod, + op[trans], CMD_TIME_CLASS_C, status); + + if (mailbox) { // debug print + int i; + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_QP ,("Dumping QP context:\n")); + for (i = 2; i < 0x100 / 4; i=i+4) { + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_QP ,(" [%02x] %08x %08x %08x %08x\n",i-2, + cl_ntoh32(((__be32 *) mailbox->buf)[i ]), + cl_ntoh32(((__be32 *) mailbox->buf)[i + 1]), + cl_ntoh32(((__be32 *) mailbox->buf)[i + 2]), + cl_ntoh32(((__be32 *) mailbox->buf)[i + 3]))); + } + } + } else + err = mthca_cmd(dev, mailbox->dma, optmask | (!!is_ee << 24) | num, + op_mod, op[trans], CMD_TIME_CLASS_C, status); + + if (my_mailbox) + mthca_free_mailbox(dev, mailbox); + + return err; +} + +int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee, + struct mthca_mailbox *mailbox, u8 *status) +{ + return mthca_cmd_box(dev, 0, mailbox->dma, (!!is_ee << 24) | num, 0, + CMD_QUERY_QPEE, CMD_TIME_CLASS_A, status); +} + +int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn, + u8 *status) +{ + u8 op_mod; + + switch (type) { + case IB_QPT_QP0: + op_mod = 0; + break; + case IB_QPT_QP1: + op_mod = 1; + break; + case IB_QPT_RAW_IPV6: + op_mod = 2; + break; + case IB_QPT_RAW_ETHER: + op_mod = 3; + break; + default: + return -EINVAL; + } + + return mthca_cmd(dev, 0, qpn, op_mod, CMD_CONF_SPECIAL_QP, + CMD_TIME_CLASS_B, status); +} + +int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, + int port, struct _ib_wc *in_wc, struct _ib_grh *in_grh, + void *in_mad, void *response_mad, u8 *status) +{ + struct mthca_mailbox *inmailbox, *outmailbox; + u8 *inbox; + int err; + u32 in_modifier = port; + u8 op_modifier = 0; + + +#define MAD_IFC_BOX_SIZE 0x400 +#define MAD_IFC_MY_QPN_OFFSET 0x100 +#define MAD_IFC_RQPN_OFFSET 0x108 +#define MAD_IFC_SL_OFFSET 0x10c +#define MAD_IFC_G_PATH_OFFSET 0x10d +#define MAD_IFC_RLID_OFFSET 0x10e +#define MAD_IFC_PKEY_OFFSET 0x112 +#define MAD_IFC_GRH_OFFSET 0x140 + + inmailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(inmailbox)) + return PTR_ERR(inmailbox); + inbox = inmailbox->buf; + + outmailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(outmailbox)) { + mthca_free_mailbox(dev, inmailbox); + return PTR_ERR(outmailbox); + } + + memcpy(inbox, in_mad, 256); + + /* + * Key check traps can't be generated unless we have in_wc to + * tell us where to send the trap. + */ + if (ignore_mkey || !in_wc) + op_modifier |= 0x1; + if (ignore_bkey || !in_wc) + op_modifier |= 0x2; + + if (in_wc) { + u8 val; + + memset(inbox + 256, 0, 256); + + + MTHCA_PUT(inbox, 0, MAD_IFC_MY_QPN_OFFSET); + MTHCA_PUT(inbox, cl_ntoh32(in_wc->recv.ud.remote_qp), MAD_IFC_RQPN_OFFSET); + val = in_wc->recv.ud.remote_sl << 4; + MTHCA_PUT(inbox, val, MAD_IFC_SL_OFFSET); + + val = in_wc->recv.ud.path_bits | + (in_wc->recv.ud.recv_opt & IB_RECV_OPT_GRH_VALID ? 0x80 : 0); + MTHCA_PUT(inbox, val, MAD_IFC_G_PATH_OFFSET) + + MTHCA_PUT(inbox, cl_ntoh16(in_wc->recv.ud.remote_lid), MAD_IFC_RLID_OFFSET); + MTHCA_PUT(inbox, in_wc->recv.ud.pkey_index, MAD_IFC_PKEY_OFFSET); + + if (in_grh) + memcpy(inbox + MAD_IFC_GRH_OFFSET, in_grh, 40); + + op_modifier |= 0x4; + + in_modifier |= cl_ntoh16(in_wc->recv.ud.remote_lid) << 16; + + } + + err = mthca_cmd_box(dev, inmailbox->dma, outmailbox->dma, + in_modifier, op_modifier, + CMD_MAD_IFC, CMD_TIME_CLASS_C, status); + + if (!err && !*status) + memcpy(response_mad, outmailbox->buf, 256); + + mthca_free_mailbox(dev, inmailbox); + mthca_free_mailbox(dev, outmailbox); + return err; +} + +int mthca_READ_MGM(struct mthca_dev *dev, int index, + struct mthca_mailbox *mailbox, u8 *status) +{ + return mthca_cmd_box(dev, 0, mailbox->dma, index, 0, + CMD_READ_MGM, CMD_TIME_CLASS_A, status); +} + +int mthca_WRITE_MGM(struct mthca_dev *dev, int index, + struct mthca_mailbox *mailbox, u8 *status) +{ + return mthca_cmd(dev, mailbox->dma, index, 0, CMD_WRITE_MGM, + CMD_TIME_CLASS_A, status); +} + +int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + u16 *hash, u8 *status) +{ + u64 imm; + int err; + + err = mthca_cmd_imm(dev, mailbox->dma, &imm, 0, 0, CMD_MGID_HASH, + CMD_TIME_CLASS_A, status); + + *hash = (u16)imm; + return err; +} + +int mthca_NOP(struct mthca_dev *dev, u8 *status) +{ + return mthca_cmd(dev, 0, 0x1f, 0, CMD_NOP, 100000, status); /* 100 msecs */ +} diff --git a/branches/IBFD/hw/mthca/kernel/mthca_cmd.h b/branches/IBFD/hw/mthca/kernel/mthca_cmd.h new file mode 100644 index 00000000..fdeef839 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca_cmd.h @@ -0,0 +1,326 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#ifndef MTHCA_CMD_H +#define MTHCA_CMD_H + +#include + +#define MTHCA_MAILBOX_SIZE 4096 + +enum { + /* command completed successfully: */ + MTHCA_CMD_STAT_OK = 0x00, + /* Internal error (such as a bus error) occurred while processing command: */ + MTHCA_CMD_STAT_INTERNAL_ERR = 0x01, + /* Operation/command not supported or opcode modifier not supported: */ + MTHCA_CMD_STAT_BAD_OP = 0x02, + /* Parameter not supported or parameter out of range: */ + MTHCA_CMD_STAT_BAD_PARAM = 0x03, + /* System not enabled or bad system state: */ + MTHCA_CMD_STAT_BAD_SYS_STATE = 0x04, + /* Attempt to access reserved or unallocaterd resource: */ + MTHCA_CMD_STAT_BAD_RESOURCE = 0x05, + /* Requested resource is currently executing a command, or is otherwise busy: */ + MTHCA_CMD_STAT_RESOURCE_BUSY = 0x06, + /* memory error: */ + MTHCA_CMD_STAT_DDR_MEM_ERR = 0x07, + /* Required capability exceeds device limits: */ + MTHCA_CMD_STAT_EXCEED_LIM = 0x08, + /* Resource is not in the appropriate state or ownership: */ + MTHCA_CMD_STAT_BAD_RES_STATE = 0x09, + /* Index out of range: */ + MTHCA_CMD_STAT_BAD_INDEX = 0x0a, + /* FW image corrupted: */ + MTHCA_CMD_STAT_BAD_NVMEM = 0x0b, + /* Attempt to modify a QP/EE which is not in the presumed state: */ + MTHCA_CMD_STAT_BAD_QPEE_STATE = 0x10, + /* Bad segment parameters (Address/Size): */ + MTHCA_CMD_STAT_BAD_SEG_PARAM = 0x20, + /* Memory Region has Memory Windows bound to: */ + MTHCA_CMD_STAT_REG_BOUND = 0x21, + /* HCA local attached memory not present: */ + MTHCA_CMD_STAT_LAM_NOT_PRE = 0x22, + /* Bad management packet (silently discarded): */ + MTHCA_CMD_STAT_BAD_PKT = 0x30, + /* More outstanding CQEs in CQ than new CQ size: */ + MTHCA_CMD_STAT_BAD_SIZE = 0x40 +}; + +enum { + MTHCA_TRANS_INVALID = 0, + MTHCA_TRANS_RST2INIT, + MTHCA_TRANS_INIT2INIT, + MTHCA_TRANS_INIT2RTR, + MTHCA_TRANS_RTR2RTS, + MTHCA_TRANS_RTS2RTS, + MTHCA_TRANS_SQERR2RTS, + MTHCA_TRANS_ANY2ERR, + MTHCA_TRANS_RTS2SQD, + MTHCA_TRANS_SQD2SQD, + MTHCA_TRANS_SQD2RTS, + MTHCA_TRANS_ANY2RST, +}; + +enum { + DEV_LIM_FLAG_RC = 1 << 0, + DEV_LIM_FLAG_UC = 1 << 1, + DEV_LIM_FLAG_UD = 1 << 2, + DEV_LIM_FLAG_RD = 1 << 3, + DEV_LIM_FLAG_RAW_IPV6 = 1 << 4, + DEV_LIM_FLAG_RAW_ETHER = 1 << 5, + DEV_LIM_FLAG_SRQ = 1 << 6, + DEV_LIM_FLAG_BAD_PKEY_CNTR = 1 << 8, + DEV_LIM_FLAG_BAD_QKEY_CNTR = 1 << 9, + DEV_LIM_FLAG_MW = 1 << 16, + DEV_LIM_FLAG_AUTO_PATH_MIG = 1 << 17, + DEV_LIM_FLAG_ATOMIC = 1 << 18, + DEV_LIM_FLAG_RAW_MULTI = 1 << 19, + DEV_LIM_FLAG_UD_AV_PORT_ENFORCE = 1 << 20, + DEV_LIM_FLAG_UD_MULTI = 1 << 21, +}; + +struct mthca_mailbox { + dma_addr_t dma; + void *buf; +}; + +struct mthca_dev_lim { + int max_srq_sz; + int max_qp_sz; + int reserved_qps; + int max_qps; + int reserved_srqs; + int max_srqs; + int reserved_eecs; + int max_eecs; + int max_cq_sz; + int reserved_cqs; + int max_cqs; + int max_mpts; + int reserved_eqs; + int max_eqs; + int reserved_mtts; + int max_mrw_sz; + int reserved_mrws; + int max_mtt_seg; + int max_requester_per_qp; + int max_responder_per_qp; + int max_rdma_global; + int local_ca_ack_delay; + int max_mtu; + int max_port_width; + int max_vl; + int num_ports; + int max_gids; + int max_pkeys; + u32 flags; + int reserved_uars; + int uar_size; + int min_page_sz; + int max_sg; + int max_desc_sz; + int max_qp_per_mcg; + int reserved_mgms; + int max_mcgs; + int reserved_pds; + int max_pds; + int reserved_rdds; + int max_rdds; + int eec_entry_sz; + int qpc_entry_sz; + int eeec_entry_sz; + int eqpc_entry_sz; + int eqc_entry_sz; + int cqc_entry_sz; + int srq_entry_sz; + int uar_scratch_entry_sz; + int mpt_entry_sz; + union { + struct { + int max_avs; + } tavor; + struct { + int resize_srq; + int max_pbl_sz; + u8 bmme_flags; + u32 reserved_lkey; + int lam_required; + u64 max_icm_sz; + } arbel; + } hca; +}; + +struct mthca_adapter { + u32 vendor_id; + u32 device_id; + u32 revision_id; + char board_id[MTHCA_BOARD_ID_LEN]; + u8 inta_pin; +}; + +struct mthca_init_hca_param { + u64 qpc_base; + u64 eec_base; + u64 srqc_base; + u64 cqc_base; + u64 eqpc_base; + u64 eeec_base; + u64 eqc_base; + u64 rdb_base; + u64 mc_base; + u64 mpt_base; + u64 mtt_base; + u64 uar_scratch_base; + u64 uarc_base; + u16 log_mc_entry_sz; + u16 mc_hash_sz; + u8 log_num_qps; + u8 log_num_eecs; + u8 log_num_srqs; + u8 log_num_cqs; + u8 log_num_eqs; + u8 log_mc_table_sz; + u8 mtt_seg_sz; + u8 log_mpt_sz; + u8 log_uar_sz; + u8 log_uarc_sz; +}; + +struct mthca_init_ib_param { + int port_width; + int vl_cap; + int mtu_cap; + u16 gid_cap; + u16 pkey_cap; + int set_guid0; + u64 guid0; + int set_node_guid; + u64 node_guid; + int set_si_guid; + u64 si_guid; +}; + +struct mthca_set_ib_param { + int set_si_guid; + int reset_qkey_viol; + u64 si_guid; + u32 cap_mask; +}; + +int mthca_cmd_init(struct mthca_dev *dev); +void mthca_cmd_cleanup(struct mthca_dev *dev); +int mthca_cmd_use_events(struct mthca_dev *dev); +void mthca_cmd_use_polling(struct mthca_dev *dev); +void mthca_cmd_event(struct mthca_dev *dev, u16 token, + u8 status, u64 out_param); + +struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev, + unsigned int gfp_mask); +void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox); + +int mthca_SYS_EN(struct mthca_dev *dev, u8 *status); +int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status); +int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status); +int mthca_UNMAP_FA(struct mthca_dev *dev, u8 *status); +int mthca_RUN_FW(struct mthca_dev *dev, u8 *status); +int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status); +int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status); +int mthca_DISABLE_LAM(struct mthca_dev *dev, u8 *status); +int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status); +int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, + struct mthca_dev_lim *dev_lim, u8 *status); +int mthca_QUERY_ADAPTER(struct mthca_dev *dev, + struct mthca_adapter *adapter, u8 *status); +int mthca_INIT_HCA(struct mthca_dev *dev, + struct mthca_init_hca_param *param, + u8 *status); +int mthca_INIT_IB(struct mthca_dev *dev, + struct mthca_init_ib_param *param, + int port, u8 *status); +int mthca_CLOSE_IB(struct mthca_dev *dev, int port, u8 *status); +int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic, u8 *status); +int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param, + int port, u8 *status); +int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *status); +int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status); +int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count, u8 *status); +int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status); +int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev, u8 *status); +int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages, + u8 *status); +int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int mpt_index, u8 *status); +int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int mpt_index, u8 *status); +int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int num_mtt, u8 *status); +int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status); +int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap, + int eq_num, u8 *status); +int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int eq_num, u8 *status); +int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int eq_num, u8 *status); +int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int cq_num, u8 *status); +int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int cq_num, u8 *status); +int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int srq_num, u8 *status); +int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int srq_num, u8 *status); +int mthca_QUERY_SRQ(struct mthca_dev *dev, u32 num, + struct mthca_mailbox *mailbox, u8 *status); +int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit, u8 *status); +int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, + int is_ee, struct mthca_mailbox *mailbox, u32 optmask, + u8 *status); +int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee, + struct mthca_mailbox *mailbox, u8 *status); +int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn, + u8 *status); +int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, + int port, struct _ib_wc *in_wc, struct _ib_grh *in_grh, + void *in_mad, void *response_mad, u8 *status); +int mthca_READ_MGM(struct mthca_dev *dev, int index, + struct mthca_mailbox *mailbox, u8 *status); +int mthca_WRITE_MGM(struct mthca_dev *dev, int index, + struct mthca_mailbox *mailbox, u8 *status); +int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + u16 *hash, u8 *status); +int mthca_NOP(struct mthca_dev *dev, u8 *status); + +#endif /* MTHCA_CMD_H */ diff --git a/branches/IBFD/hw/mthca/kernel/mthca_config_reg.h b/branches/IBFD/hw/mthca/kernel/mthca_config_reg.h new file mode 100644 index 00000000..9ff4a97a --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca_config_reg.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#ifndef MTHCA_CONFIG_REG_H +#define MTHCA_CONFIG_REG_H + +#define MTHCA_HCR_BASE 0x80680 +#define MTHCA_HCR_SIZE 0x0001c +#define MTHCA_ECR_BASE 0x80700 +#define MTHCA_ECR_SIZE 0x00008 +#define MTHCA_ECR_CLR_BASE 0x80708 +#define MTHCA_ECR_CLR_SIZE 0x00008 +#define MTHCA_MAP_ECR_SIZE (MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE) +#define MTHCA_CLR_INT_BASE 0xf00d8 +#define MTHCA_CLR_INT_SIZE 0x00008 +#define MTHCA_EQ_SET_CI_SIZE (8 * 32) + +#endif /* MTHCA_CONFIG_REG_H */ diff --git a/branches/IBFD/hw/mthca/kernel/mthca_cq.c b/branches/IBFD/hw/mthca/kernel/mthca_cq.c new file mode 100644 index 00000000..c64bc0b9 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca_cq.c @@ -0,0 +1,963 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include + +#include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_cq.tmh" +#endif +#include "mthca_cmd.h" +#include "mthca_memfree.h" +#include "mthca_wqe.h" + + +#ifdef ALLOC_PRAGMA +#pragma alloc_text (PAGE, mthca_init_cq_table) +#pragma alloc_text (PAGE, mthca_cleanup_cq_table) +#endif + +enum { + MTHCA_MAX_DIRECT_CQ_SIZE = 4 * PAGE_SIZE +}; + +/* + * Must be packed because start is 64 bits but only aligned to 32 bits. + */ +#pragma pack(push,1) +struct mthca_cq_context { + __be32 flags; + __be64 start; + __be32 logsize_usrpage; + __be32 error_eqn; /* Tavor only */ + __be32 comp_eqn; + __be32 pd; + __be32 lkey; + __be32 last_notified_index; + __be32 solicit_producer_index; + __be32 consumer_index; + __be32 producer_index; + __be32 cqn; + __be32 ci_db; /* Arbel only */ + __be32 state_db; /* Arbel only */ + u32 reserved; +}; +#pragma pack(pop) + +#define MTHCA_CQ_STATUS_OK ( 0 << 28) +#define MTHCA_CQ_STATUS_OVERFLOW ( 9 << 28) +#define MTHCA_CQ_STATUS_WRITE_FAIL (10 << 28) +#define MTHCA_CQ_FLAG_TR ( 1 << 18) +#define MTHCA_CQ_FLAG_OI ( 1 << 17) +#define MTHCA_CQ_STATE_DISARMED ( 0 << 8) +#define MTHCA_CQ_STATE_ARMED ( 1 << 8) +#define MTHCA_CQ_STATE_ARMED_SOL ( 4 << 8) +#define MTHCA_EQ_STATE_FIRED (10 << 8) + +enum { + MTHCA_ERROR_CQE_OPCODE_MASK = 0xfe +}; + +enum { + SYNDROME_LOCAL_LENGTH_ERR = 0x01, + SYNDROME_LOCAL_QP_OP_ERR = 0x02, + SYNDROME_LOCAL_EEC_OP_ERR = 0x03, + SYNDROME_LOCAL_PROT_ERR = 0x04, + SYNDROME_WR_FLUSH_ERR = 0x05, + SYNDROME_MW_BIND_ERR = 0x06, + SYNDROME_BAD_RESP_ERR = 0x10, + SYNDROME_LOCAL_ACCESS_ERR = 0x11, + SYNDROME_REMOTE_INVAL_REQ_ERR = 0x12, + SYNDROME_REMOTE_ACCESS_ERR = 0x13, + SYNDROME_REMOTE_OP_ERR = 0x14, + SYNDROME_RETRY_EXC_ERR = 0x15, + SYNDROME_RNR_RETRY_EXC_ERR = 0x16, + SYNDROME_LOCAL_RDD_VIOL_ERR = 0x20, + SYNDROME_REMOTE_INVAL_RD_REQ_ERR = 0x21, + SYNDROME_REMOTE_ABORTED_ERR = 0x22, + SYNDROME_INVAL_EECN_ERR = 0x23, + SYNDROME_INVAL_EEC_STATE_ERR = 0x24 +}; + +struct mthca_cqe { + __be32 my_qpn; + __be32 my_ee; + __be32 rqpn; + __be16 sl_g_mlpath; + __be16 rlid; + __be32 imm_etype_pkey_eec; + __be32 byte_cnt; + __be32 wqe; + u8 opcode; + u8 is_send; + u8 reserved; + u8 owner; +}; + +struct mthca_err_cqe { + __be32 my_qpn; + u32 reserved1[3]; + u8 syndrome; + u8 vendor_err; + __be16 db_cnt; + u32 reserved2; + __be32 wqe; + u8 opcode; + u8 reserved3[2]; + u8 owner; +}; + +#define MTHCA_CQ_ENTRY_OWNER_SW (0 << 7) +#define MTHCA_CQ_ENTRY_OWNER_HW (1 << 7) + +#define MTHCA_TAVOR_CQ_DB_INC_CI (1 << 24) +#define MTHCA_TAVOR_CQ_DB_REQ_NOT (2 << 24) +#define MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL (3 << 24) +#define MTHCA_TAVOR_CQ_DB_SET_CI (4 << 24) +#define MTHCA_TAVOR_CQ_DB_REQ_NOT_MULT (5 << 24) + +#define MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL (1 << 24) +#define MTHCA_ARBEL_CQ_DB_REQ_NOT (2 << 24) +#define MTHCA_ARBEL_CQ_DB_REQ_NOT_MULT (3 << 24) + +static inline struct mthca_cqe *get_cqe(struct mthca_cq *cq, int entry) +{ + if (cq->is_direct) + return (struct mthca_cqe *)((u8*)cq->queue.direct.page + (entry * MTHCA_CQ_ENTRY_SIZE)); + else + return (struct mthca_cqe *)((u8*)cq->queue.page_list[entry * MTHCA_CQ_ENTRY_SIZE / PAGE_SIZE].page + + (entry * MTHCA_CQ_ENTRY_SIZE) % PAGE_SIZE); +} + +static inline struct mthca_cqe *cqe_sw(struct mthca_cq *cq, int i) +{ + struct mthca_cqe *cqe = get_cqe(cq, i); + return MTHCA_CQ_ENTRY_OWNER_HW & cqe->owner ? NULL : cqe; +} + +static inline struct mthca_cqe *next_cqe_sw(struct mthca_cq *cq) +{ + return cqe_sw(cq, cq->cons_index & cq->ibcq.cqe); +} + +static inline void set_cqe_hw(struct mthca_cqe *cqe) +{ + cqe->owner = MTHCA_CQ_ENTRY_OWNER_HW; +} + +static void dump_cqe(u32 print_lvl, struct mthca_dev *dev, void *cqe_ptr) +{ + __be32 *cqe = cqe_ptr; + UNREFERENCED_PARAMETER(dev); + UNUSED_PARAM_WOWPP(print_lvl); + + (void) cqe; /* avoid warning if mthca_dbg compiled away... */ + HCA_PRINT(print_lvl,HCA_DBG_CQ,("CQE contents \n")); + HCA_PRINT(print_lvl,HCA_DBG_CQ,("\t[%2x] %08x %08x %08x %08x\n",0, + cl_ntoh32(cqe[0]), cl_ntoh32(cqe[1]), cl_ntoh32(cqe[2]), cl_ntoh32(cqe[3]))); + HCA_PRINT(print_lvl,HCA_DBG_CQ,("\t[%2x] %08x %08x %08x %08x \n",16, + cl_ntoh32(cqe[4]), cl_ntoh32(cqe[5]), cl_ntoh32(cqe[6]), cl_ntoh32(cqe[7]))); +} + +/* + * incr is ignored in native Arbel (mem-free) mode, so cq->cons_index + * should be correct before calling update_cons_index(). + */ +static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq, + int incr) +{ + __be32 doorbell[2]; + + if (mthca_is_memfree(dev)) { + *cq->set_ci_db = cl_hton32(cq->cons_index); + wmb(); + } else { + doorbell[0] = cl_hton32(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn); + doorbell[1] = cl_hton32(incr - 1); + + mthca_write64(doorbell, + dev->kar + MTHCA_CQ_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + } +} + +void mthca_cq_completion(struct mthca_dev *dev, u32 cqn) +{ + struct mthca_cq *cq; + + cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1)); + + if (!cq) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Completion event for bogus CQ %08x\n", cqn)); + return; + } + + if (mthca_is_memfree(dev)) { + if (cq->ibcq.ucontext) + ++*cq->p_u_arm_sn; + else + ++cq->arm_sn; + } + + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); +} + +void mthca_cq_event(struct mthca_dev *dev, u32 cqn, + enum ib_event_type event_type) +{ + struct mthca_cq *cq; + struct ib_event event; + SPIN_LOCK_PREP(lh); + + spin_lock(&dev->cq_table.lock, &lh); + + cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1)); + + if (cq) + atomic_inc(&cq->refcount); + spin_unlock(&lh); + + if (!cq) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Async event for bogus CQ %08x\n", cqn)); + return; + } + + event.device = &dev->ib_dev; + event.event = event_type; + event.element.cq = &cq->ibcq; + if (cq->ibcq.event_handler) + cq->ibcq.event_handler(&event, cq->ibcq.cq_context); + + if (atomic_dec_and_test(&cq->refcount)) + wake_up(&cq->wait); +} + +void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, + struct mthca_srq *srq) +{ + struct mthca_cq *cq; + struct mthca_cqe *cqe; + u32 prod_index; + int nfreed = 0; + SPIN_LOCK_PREP(lht); + SPIN_LOCK_PREP(lh); + + spin_lock_irq(&dev->cq_table.lock, &lht); + cq = mthca_array_get(&dev->cq_table.cq, cqn & (dev->limits.num_cqs - 1)); + if (cq) + atomic_inc(&cq->refcount); + spin_unlock_irq(&lht); + + if (!cq) + return; + + spin_lock_irq(&cq->lock, &lh); + + /* + * First we need to find the current producer index, so we + * know where to start cleaning from. It doesn't matter if HW + * adds new entries after this loop -- the QP we're worried + * about is already in RESET, so the new entries won't come + * from our QP and therefore don't need to be checked. + */ + for (prod_index = cq->cons_index; + cqe_sw(cq, prod_index & cq->ibcq.cqe); + ++prod_index) { + if (prod_index == cq->cons_index + cq->ibcq.cqe) + break; + } + + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Cleaning QPN %06x from CQN %06x; ci %d, pi %d\n", + qpn, cqn, cq->cons_index, prod_index)); + + /* + * Now sweep backwards through the CQ, removing CQ entries + * that match our QP by copying older entries on top of them. + */ + while ((int) --prod_index - (int) cq->cons_index >= 0) { + cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); + if (cqe->my_qpn == cl_hton32(qpn)) { + if (srq) + mthca_free_srq_wqe(srq, cl_ntoh32(cqe->wqe)); + ++nfreed; + } + else + if (nfreed) { + memcpy(get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe), + cqe, MTHCA_CQ_ENTRY_SIZE); + } + } + + if (nfreed) { + wmb(); + cq->cons_index += nfreed; + update_cons_index(dev, cq, nfreed); + } + + spin_unlock_irq(&lh); + if (atomic_dec_and_test(&cq->refcount)) + wake_up(&cq->wait); +} + +static void handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, + struct mthca_qp *qp, int wqe_index, int is_send, + struct mthca_err_cqe *cqe, + struct _ib_wc *entry, int *free_cqe) +{ + int dbd; + __be32 new_wqe; + + UNREFERENCED_PARAMETER(cq); + + if (cqe->syndrome != SYNDROME_WR_FLUSH_ERR) { + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_CQ ,("Completion with errro " + "(QPN %06x, WQE @ %08x, CQN %06x, index %d)\n", + cl_ntoh32(cqe->my_qpn), cl_ntoh32(cqe->wqe), + cq->cqn, cq->cons_index)); + dump_cqe(TRACE_LEVEL_INFORMATION, dev, cqe); + } + + + /* + * For completions in error, only work request ID, status, vendor error + * (and freed resource count for RD) have to be set. + */ + switch (cqe->syndrome) { + case SYNDROME_LOCAL_LENGTH_ERR: + entry->status = IB_WCS_LOCAL_LEN_ERR; + break; + case SYNDROME_LOCAL_QP_OP_ERR: + entry->status = IB_WCS_LOCAL_OP_ERR; + break; + case SYNDROME_LOCAL_PROT_ERR: + entry->status = IB_WCS_LOCAL_PROTECTION_ERR; + break; + case SYNDROME_WR_FLUSH_ERR: + entry->status = IB_WCS_WR_FLUSHED_ERR; + break; + case SYNDROME_MW_BIND_ERR: + entry->status = IB_WCS_MEM_WINDOW_BIND_ERR; + break; + case SYNDROME_BAD_RESP_ERR: + entry->status = IB_WCS_BAD_RESP_ERR; + break; + case SYNDROME_LOCAL_ACCESS_ERR: + entry->status = IB_WCS_LOCAL_ACCESS_ERR; + break; + case SYNDROME_REMOTE_INVAL_REQ_ERR: + entry->status = IB_WCS_REM_INVALID_REQ_ERR; + break; + case SYNDROME_REMOTE_ACCESS_ERR: + entry->status = IB_WCS_REM_ACCESS_ERR; + break; + case SYNDROME_REMOTE_OP_ERR: + entry->status = IB_WCS_REM_OP_ERR; + break; + case SYNDROME_RETRY_EXC_ERR: + entry->status = IB_WCS_TIMEOUT_RETRY_ERR; + break; + case SYNDROME_RNR_RETRY_EXC_ERR: + entry->status = IB_WCS_RNR_RETRY_ERR; + break; + case SYNDROME_REMOTE_INVAL_RD_REQ_ERR: + entry->status = IB_WCS_REM_INVALID_REQ_ERR; + break; + case SYNDROME_REMOTE_ABORTED_ERR: + case SYNDROME_LOCAL_EEC_OP_ERR: + case SYNDROME_LOCAL_RDD_VIOL_ERR: + case SYNDROME_INVAL_EECN_ERR: + case SYNDROME_INVAL_EEC_STATE_ERR: + default: + entry->status = IB_WCS_GENERAL_ERR; + break; + } + + entry->vendor_specific = cqe->vendor_err; + + /* + * Mem-free HCAs always generate one CQE per WQE, even in the + * error case, so we don't have to check the doorbell count, etc. + */ + if (mthca_is_memfree(dev)) + return; + + mthca_free_err_wqe(dev, qp, is_send, wqe_index, &dbd, &new_wqe); + + /* + * If we're at the end of the WQE chain, or we've used up our + * doorbell count, free the CQE. Otherwise just update it for + * the next poll operation. + */ + if (!(new_wqe & cl_hton32(0x3f)) || (!cqe->db_cnt && dbd)) + return; + + cqe->db_cnt = cl_hton16(cl_ntoh16(cqe->db_cnt) - (u16)dbd); + cqe->wqe = new_wqe; + cqe->syndrome = SYNDROME_WR_FLUSH_ERR; + + *free_cqe = 0; +} + +static inline int mthca_poll_one(struct mthca_dev *dev, + struct mthca_cq *cq, + struct mthca_qp **cur_qp, + int *freed, + struct _ib_wc *entry) +{ + struct mthca_wq *wq; + struct mthca_cqe *cqe; + unsigned wqe_index; + int is_error; + int is_send; + int free_cqe = 1; + int err = 0; + + HCA_ENTER(HCA_DBG_CQ); + cqe = next_cqe_sw(cq); + if (!cqe) + return -EAGAIN; + + /* + * Make sure we read CQ entry contents after we've checked the + * ownership bit. + */ + rmb(); + + { // debug print + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_CQ,("CQ: 0x%06x/%d: CQE -> QPN 0x%06x, WQE @ 0x%08x\n", + cq->cqn, cq->cons_index, cl_ntoh32(cqe->my_qpn), + cl_ntoh32(cqe->wqe))); + dump_cqe(TRACE_LEVEL_VERBOSE, dev, cqe); + } + + is_error = (cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) == + MTHCA_ERROR_CQE_OPCODE_MASK; + is_send = is_error ? cqe->opcode & 0x01 : cqe->is_send & 0x80; + + if (!*cur_qp || cl_ntoh32(cqe->my_qpn) != (*cur_qp)->qpn) { + /* + * We do not have to take the QP table lock here, + * because CQs will be locked while QPs are removed + * from the table. + */ + *cur_qp = mthca_array_get(&dev->qp_table.qp, + cl_ntoh32(cqe->my_qpn) & + (dev->limits.num_qps - 1)); + if (!*cur_qp) { + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_CQ, ("CQ entry for unknown QP %06x\n", + cl_ntoh32(cqe->my_qpn) & 0xffffff)); + err = -EINVAL; + goto out; + } + } + + if (is_send) { + wq = &(*cur_qp)->sq; + wqe_index = ((cl_ntoh32(cqe->wqe) - (*cur_qp)->send_wqe_offset) + >> wq->wqe_shift); + entry->wr_id = (*cur_qp)->wrid[wqe_index + + (*cur_qp)->rq.max]; + } else if ((*cur_qp)->ibqp.srq) { + struct mthca_srq *srq = to_msrq((*cur_qp)->ibqp.srq); + u32 wqe = cl_ntoh32(cqe->wqe); + wq = NULL; + wqe_index = wqe >> srq->wqe_shift; + entry->wr_id = srq->wrid[wqe_index]; + mthca_free_srq_wqe(srq, wqe); + } else { + wq = &(*cur_qp)->rq; + wqe_index = cl_ntoh32(cqe->wqe) >> wq->wqe_shift; + entry->wr_id = (*cur_qp)->wrid[wqe_index]; + } + + if (wq) { + if (wq->last_comp < wqe_index) + wq->tail += wqe_index - wq->last_comp; + else + wq->tail += wqe_index + wq->max - wq->last_comp; + + wq->last_comp = wqe_index; + } + + if (is_send) { + entry->recv.ud.recv_opt = 0; + switch (cqe->opcode) { + case MTHCA_OPCODE_RDMA_WRITE: + entry->wc_type = IB_WC_RDMA_WRITE; + break; + case MTHCA_OPCODE_RDMA_WRITE_IMM: + entry->wc_type = IB_WC_RDMA_WRITE; + entry->recv.ud.recv_opt |= IB_RECV_OPT_IMMEDIATE; + break; + case MTHCA_OPCODE_SEND: + entry->wc_type = IB_WC_SEND; + break; + case MTHCA_OPCODE_SEND_IMM: + entry->wc_type = IB_WC_SEND; + entry->recv.ud.recv_opt |= IB_RECV_OPT_IMMEDIATE; + break; + case MTHCA_OPCODE_RDMA_READ: + entry->wc_type = IB_WC_RDMA_READ; + entry->length = cl_ntoh32(cqe->byte_cnt); + break; + case MTHCA_OPCODE_ATOMIC_CS: + entry->wc_type = IB_WC_COMPARE_SWAP; + entry->length = MTHCA_BYTES_PER_ATOMIC_COMPL; + break; + case MTHCA_OPCODE_ATOMIC_FA: + entry->wc_type = IB_WC_FETCH_ADD; + entry->length = MTHCA_BYTES_PER_ATOMIC_COMPL; + break; + case MTHCA_OPCODE_BIND_MW: + entry->wc_type = IB_WC_MW_BIND; + break; + default: + entry->wc_type = IB_WC_SEND; + break; + } + } else { + entry->length = cl_ntoh32(cqe->byte_cnt); + switch (cqe->opcode & 0x1f) { + case IB_OPCODE_SEND_LAST_WITH_IMMEDIATE: + case IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE: + entry->recv.ud.recv_opt = IB_RECV_OPT_IMMEDIATE; + entry->recv.ud.immediate_data = cqe->imm_etype_pkey_eec; + entry->wc_type = IB_WC_RECV; + break; + case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE: + case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE: + entry->recv.ud.recv_opt = IB_RECV_OPT_IMMEDIATE; + entry->recv.ud.immediate_data = cqe->imm_etype_pkey_eec; + entry->wc_type = IB_WC_RECV_RDMA_WRITE; + break; + default: + entry->recv.ud.recv_opt = 0; + entry->wc_type = IB_WC_RECV; + break; + } + entry->recv.ud.remote_lid = cqe->rlid; + entry->recv.ud.remote_qp = cqe->rqpn & 0xffffff00; + entry->recv.ud.pkey_index = (u16)(cl_ntoh32(cqe->imm_etype_pkey_eec) >> 16); + entry->recv.ud.remote_sl = (uint8_t)(cl_ntoh16(cqe->sl_g_mlpath) >> 12); + entry->recv.ud.path_bits = (uint8_t)(cl_ntoh16(cqe->sl_g_mlpath) & 0x7f); + entry->recv.ud.recv_opt |= cl_ntoh16(cqe->sl_g_mlpath) & 0x80 ? + IB_RECV_OPT_GRH_VALID : 0; + } + if (!is_send && cqe->rlid == 0){ + HCA_PRINT(TRACE_LEVEL_INFORMATION,HCA_DBG_CQ,("found rlid == 0 \n ")); + entry->recv.ud.recv_opt |= IB_RECV_OPT_FORWARD; + + } + if (is_error) { + handle_error_cqe(dev, cq, *cur_qp, wqe_index, is_send, + (struct mthca_err_cqe *) cqe, entry, &free_cqe); + } + else + entry->status = IB_WCS_SUCCESS; + + out: + if (likely(free_cqe)) { + set_cqe_hw(cqe); + ++(*freed); + ++cq->cons_index; + } + HCA_EXIT(HCA_DBG_CQ); + return err; +} + +int mthca_poll_cq(struct ib_cq *ibcq, int num_entries, + struct _ib_wc *entry) +{ + struct mthca_dev *dev = to_mdev(ibcq->device); + struct mthca_cq *cq = to_mcq(ibcq); + struct mthca_qp *qp = NULL; + int err = 0; + int freed = 0; + int npolled; + SPIN_LOCK_PREP(lh); + + spin_lock_irqsave(&cq->lock, &lh); + + for (npolled = 0; npolled < num_entries; ++npolled) { + err = mthca_poll_one(dev, cq, &qp, + &freed, entry + npolled); + if (err) + break; + } + + if (freed) { + wmb(); + update_cons_index(dev, cq, freed); + } + + spin_unlock_irqrestore(&lh); + + return (err == 0 || err == -EAGAIN) ? npolled : err; +} + +int mthca_poll_cq_list( + IN struct ib_cq *ibcq, + IN OUT ib_wc_t** const pp_free_wclist, + OUT ib_wc_t** const pp_done_wclist ) +{ + struct mthca_dev *dev = to_mdev(ibcq->device); + struct mthca_cq *cq = to_mcq(ibcq); + struct mthca_qp *qp = NULL; + int err = 0; + int freed = 0; + ib_wc_t *wc_p, **next_pp; + SPIN_LOCK_PREP(lh); + + HCA_ENTER(HCA_DBG_CQ); + + spin_lock_irqsave(&cq->lock, &lh); + + // loop through CQ + next_pp = pp_done_wclist; + wc_p = *pp_free_wclist; + while( wc_p ) { + // poll one CQE + err = mthca_poll_one(dev, cq, &qp, &freed, wc_p); + if (err) + break; + + // prepare for the next loop + *next_pp = wc_p; + next_pp = &wc_p->p_next; + wc_p = wc_p->p_next; + } + + // prepare the results + *pp_free_wclist = wc_p; /* Set the head of the free list. */ + *next_pp = NULL; /* Clear the tail of the done list. */ + + // update consumer index + if (freed) { + wmb(); + update_cons_index(dev, cq, freed); + } + + spin_unlock_irqrestore(&lh); + HCA_EXIT(HCA_DBG_CQ); + return (err == 0 || err == -EAGAIN)? 0 : err; +} + + +int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify) +{ + __be32 doorbell[2]; + + doorbell[0] = cl_hton32((notify == IB_CQ_SOLICITED ? + MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL : + MTHCA_TAVOR_CQ_DB_REQ_NOT) | + to_mcq(cq)->cqn); + doorbell[1] = (__be32) 0xffffffff; + + mthca_write64(doorbell, + to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&to_mdev(cq->device)->doorbell_lock)); + + return 0; +} + +int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) +{ + struct mthca_cq *cq = to_mcq(ibcq); + __be32 doorbell[2]; + u32 sn; + __be32 ci; + + sn = cq->arm_sn & 3; + ci = cl_hton32(cq->cons_index); + + doorbell[0] = ci; + doorbell[1] = cl_hton32((cq->cqn << 8) | (2 << 5) | (sn << 3) | + (notify == IB_CQ_SOLICITED ? 1 : 2)); + + mthca_write_db_rec(doorbell, cq->arm_db); + + /* + * Make sure that the doorbell record in host memory is + * written before ringing the doorbell via PCI MMIO. + */ + wmb(); + + doorbell[0] = cl_hton32((sn << 28) | + (notify == IB_CQ_SOLICITED ? + MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL : + MTHCA_ARBEL_CQ_DB_REQ_NOT) | + cq->cqn); + doorbell[1] = ci; + + mthca_write64(doorbell, + to_mdev(ibcq->device)->kar + MTHCA_CQ_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->doorbell_lock)); + + return 0; +} + +static void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq *cq) +{ + mthca_buf_free(dev, (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE, + &cq->queue, cq->is_direct, &cq->mr); +} + +int mthca_init_cq(struct mthca_dev *dev, int nent, + struct mthca_ucontext *ctx, u32 pdn, + struct mthca_cq *cq) +{ + int size = NEXT_PAGE_ALIGN(nent * MTHCA_CQ_ENTRY_SIZE ); + struct mthca_mailbox *mailbox; + struct mthca_cq_context *cq_context; + int err = -ENOMEM; + u8 status; + int i; + SPIN_LOCK_PREP(lh); + + cq->ibcq.cqe = nent - 1; + cq->is_kernel = !ctx; + + cq->cqn = mthca_alloc(&dev->cq_table.alloc); + if (cq->cqn == -1) + return -ENOMEM; + + if (mthca_is_memfree(dev)) { + err = mthca_table_get(dev, dev->cq_table.table, cq->cqn); + if (err) + goto err_out; + + if (cq->is_kernel) { + cq->arm_sn = 1; + + err = -ENOMEM; + + cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, + cq->cqn, &cq->set_ci_db); + if (cq->set_ci_db_index < 0) + goto err_out_icm; + + cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM, + cq->cqn, &cq->arm_db); + if (cq->arm_db_index < 0) + goto err_out_ci; + } + } + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + goto err_out_arm; + + cq_context = mailbox->buf; + + if (cq->is_kernel) { + err = mthca_buf_alloc(dev, size, MTHCA_MAX_DIRECT_CQ_SIZE, + &cq->queue, &cq->is_direct, + &dev->driver_pd, 1, &cq->mr); + if (err) + goto err_out_mailbox; + + for (i = 0; i < nent; ++i) + set_cqe_hw(get_cqe(cq, i)); + } + + spin_lock_init(&cq->lock); + atomic_set(&cq->refcount, 1); + init_waitqueue_head(&cq->wait); + KeInitializeMutex(&cq->mutex, 0); + + RtlZeroMemory(cq_context, sizeof *cq_context); + cq_context->flags = cl_hton32(MTHCA_CQ_STATUS_OK | + MTHCA_CQ_STATE_DISARMED | + MTHCA_CQ_FLAG_TR); + cq_context->logsize_usrpage = cl_hton32((ffs(nent) - 1) << 24); + if (ctx) + cq_context->logsize_usrpage |= cl_hton32(ctx->uar.index); + else + cq_context->logsize_usrpage |= cl_hton32(dev->driver_uar.index); + cq_context->error_eqn = cl_hton32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn); + cq_context->comp_eqn = cl_hton32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn); + cq_context->pd = cl_hton32(pdn); + cq_context->lkey = cl_hton32(cq->mr.ibmr.lkey); + cq_context->cqn = cl_hton32(cq->cqn); + + if (mthca_is_memfree(dev)) { + cq_context->ci_db = cl_hton32(cq->set_ci_db_index); + cq_context->state_db = cl_hton32(cq->arm_db_index); + } + + err = mthca_SW2HW_CQ(dev, mailbox, cq->cqn, &status); + if (err) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("SW2HW_CQ failed (%d)\n", err)); + goto err_out_free_mr; + } + + if (status) { + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_LOW,("SW2HW_CQ returned status 0x%02x\n", + status)); + err = -EINVAL; + goto err_out_free_mr; + } + + spin_lock_irq(&dev->cq_table.lock, &lh); + if (mthca_array_set(&dev->cq_table.cq, + cq->cqn & (dev->limits.num_cqs - 1), + cq)) { + spin_unlock_irq(&lh); + goto err_out_free_mr; + } + spin_unlock_irq(&lh); + + cq->cons_index = 0; + + mthca_free_mailbox(dev, mailbox); + + return 0; + +err_out_free_mr: + if (cq->is_kernel) + mthca_free_cq_buf(dev, cq); + +err_out_mailbox: + mthca_free_mailbox(dev, mailbox); + +err_out_arm: + if (cq->is_kernel && mthca_is_memfree(dev)) + mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); + +err_out_ci: + if (cq->is_kernel && mthca_is_memfree(dev)) + mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); + +err_out_icm: + mthca_table_put(dev, dev->cq_table.table, cq->cqn); + +err_out: + mthca_free(&dev->cq_table.alloc, cq->cqn); + + return err; +} + +void mthca_free_cq(struct mthca_dev *dev, + struct mthca_cq *cq) +{ + struct mthca_mailbox *mailbox; + int err; + u8 status; + SPIN_LOCK_PREP(lh); + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("No memory for mailbox to free CQ.\n")); + return; + } + + err = mthca_HW2SW_CQ(dev, mailbox, cq->cqn, &status); + if (err){ + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("HW2SW_CQ failed (%d)\n", err)); + } + else if (status){ + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("HW2SW_CQ returned status 0x%02x\n", status)); + } + { // debug print + __be32 *ctx = mailbox->buf; + int j; + UNUSED_PARAM_WOWPP(ctx); + UNUSED_PARAM_WOWPP(j); + + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("context for CQN %x (cons index %x, next sw %d)\n", + cq->cqn, cq->cons_index, + cq->is_kernel ? !!next_cqe_sw(cq) : 0)); + for (j = 0; j < 16; ++j) + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("[%2x] %08x\n", j * 4, cl_ntoh32(ctx[j]))); + } + spin_lock_irq(&dev->cq_table.lock, &lh); + mthca_array_clear(&dev->cq_table.cq, + cq->cqn & (dev->limits.num_cqs - 1)); + spin_unlock_irq(&lh); + + /* wait for all RUNNING DPCs on that EQ to complete */ + ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL); + KeFlushQueuedDpcs(); + + atomic_dec(&cq->refcount); + wait_event(&cq->wait, !atomic_read(&cq->refcount)); + + if (cq->is_kernel) { + mthca_free_cq_buf(dev, cq); + if (mthca_is_memfree(dev)) { + mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); + mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); + } + } + + mthca_table_put(dev, dev->cq_table.table, cq->cqn); + mthca_free(&dev->cq_table.alloc, cq->cqn); + mthca_free_mailbox(dev, mailbox); +} + +int mthca_init_cq_table(struct mthca_dev *dev) +{ + int err; + + spin_lock_init(&dev->cq_table.lock); + + err = mthca_alloc_init(&dev->cq_table.alloc, + dev->limits.num_cqs, + (1 << 24) - 1, + dev->limits.reserved_cqs); + if (err) + return err; + + err = mthca_array_init(&dev->cq_table.cq, + dev->limits.num_cqs); + if (err) + mthca_alloc_cleanup(&dev->cq_table.alloc); + + return err; +} + +void mthca_cleanup_cq_table(struct mthca_dev *dev) +{ + mthca_array_cleanup(&dev->cq_table.cq, dev->limits.num_cqs); + mthca_alloc_cleanup(&dev->cq_table.alloc); +} + + diff --git a/branches/IBFD/hw/mthca/kernel/mthca_dev.h b/branches/IBFD/hw/mthca/kernel/mthca_dev.h new file mode 100644 index 00000000..86153f0e --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca_dev.h @@ -0,0 +1,605 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. + * Copyright (c) 2006 SilverStorm Technologies, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#ifndef MTHCA_DEV_H +#define MTHCA_DEV_H + +#include "hca_driver.h" +#include "mthca_log.h" +#include "mthca_provider.h" +#include "mthca_doorbell.h" + +// must be synchronized with MTHCA.INF +#define DRV_NAME "mthca" +#define PFX DRV_NAME ": " +#define DRV_VERSION "1.0.0000.614" +#define DRV_RELDATE "08/03/2006" + +#define HZ 1000000 /* 1 sec in usecs */ + +enum { + MTHCA_FLAG_DDR_HIDDEN = 1 << 1, + MTHCA_FLAG_SRQ = 1 << 2, + MTHCA_FLAG_MSI = 1 << 3, + MTHCA_FLAG_MSI_X = 1 << 4, + MTHCA_FLAG_NO_LAM = 1 << 5, + MTHCA_FLAG_FMR = 1 << 6, + MTHCA_FLAG_MEMFREE = 1 << 7, + MTHCA_FLAG_PCIE = 1 << 8, + MTHCA_FLAG_SINAI_OPT = 1 << 9, + MTHCA_FLAG_LIVEFISH = 1 << 10 +}; + +enum { + MTHCA_MAX_PORTS = 2 +}; + +enum { + MTHCA_BOARD_ID_LEN = 64 +}; + +enum { + MTHCA_EQ_CONTEXT_SIZE = 0x40, + MTHCA_CQ_CONTEXT_SIZE = 0x40, + MTHCA_QP_CONTEXT_SIZE = 0x200, + MTHCA_RDB_ENTRY_SIZE = 0x20, + MTHCA_AV_SIZE = 0x20, + MTHCA_MGM_ENTRY_SIZE = 0x40, + + /* Arbel FW gives us these, but we need them for Tavor */ + MTHCA_MPT_ENTRY_SIZE = 0x40, + MTHCA_MTT_SEG_SIZE = 0x40, + + MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2) +}; + +enum { + MTHCA_EQ_CMD, + MTHCA_EQ_ASYNC, + MTHCA_EQ_COMP, + MTHCA_NUM_EQ +}; + +enum { + MTHCA_BYTES_PER_ATOMIC_COMPL = 8 +}; + +enum mthca_wr_opcode{ + MTHCA_OPCODE_NOP = 0x00, + MTHCA_OPCODE_RDMA_WRITE = 0x08, + MTHCA_OPCODE_RDMA_WRITE_IMM = 0x09, + MTHCA_OPCODE_SEND = 0x0a, + MTHCA_OPCODE_SEND_IMM = 0x0b, + MTHCA_OPCODE_RDMA_READ = 0x10, + MTHCA_OPCODE_ATOMIC_CS = 0x11, + MTHCA_OPCODE_ATOMIC_FA = 0x12, + MTHCA_OPCODE_BIND_MW = 0x18, + MTHCA_OPCODE_INVALID = 0xff +}; + +struct mthca_cmd { + struct pci_pool *pool; + int use_events; + KMUTEX hcr_mutex; + KSEMAPHORE poll_sem; + KSEMAPHORE event_sem; + int max_cmds; + spinlock_t context_lock; + int free_head; + struct mthca_cmd_context *context; + u16 token_mask; +}; + +struct mthca_limits { + int num_ports; + int vl_cap; + int mtu_cap; + int gid_table_len; + int pkey_table_len; + int local_ca_ack_delay; + int num_uars; + int max_sg; + int num_qps; + int max_wqes; + int max_desc_sz; + int max_qp_init_rdma; + int reserved_qps; + int num_srqs; + int max_srq_wqes; + int max_srq_sge; + int reserved_srqs; + int num_eecs; + int reserved_eecs; + int num_cqs; + int max_cqes; + int reserved_cqs; + int num_eqs; + int reserved_eqs; + int num_mpts; + int num_mtt_segs; + int fmr_reserved_mtts; + int reserved_mtts; + int reserved_mrws; + int reserved_uars; + int num_mgms; + int num_amgms; + int reserved_mcgs; + int num_pds; + int reserved_pds; + u32 page_size_cap; + u32 flags; + u8 port_width_cap; +}; + +struct mthca_alloc { + u32 last; + u32 top; + u32 max; + u32 mask; + spinlock_t lock; + unsigned long *table; +}; + +struct mthca_array { + struct { + void **page; + int used; + } *page_list; +}; + +struct mthca_uar_table { + struct mthca_alloc alloc; + u64 uarc_base; + int uarc_size; +}; + +struct mthca_pd_table { + struct mthca_alloc alloc; +}; + +struct mthca_buddy { + unsigned long **bits; + int max_order; + spinlock_t lock; +}; + +struct mthca_mr_table { + struct mthca_alloc mpt_alloc; + struct mthca_buddy mtt_buddy; + struct mthca_buddy *fmr_mtt_buddy; + u64 mtt_base; + u64 mpt_base; + struct mthca_icm_table *mtt_table; + struct mthca_icm_table *mpt_table; + struct { + void __iomem *mpt_base; + SIZE_T mpt_base_size; + void __iomem *mtt_base; + SIZE_T mtt_base_size; + struct mthca_buddy mtt_buddy; + } tavor_fmr; +}; + +struct mthca_eq_table { + struct mthca_alloc alloc; + void __iomem *clr_int; + u32 clr_mask; + u32 arm_mask; + struct mthca_eq eq[MTHCA_NUM_EQ]; + u64 icm_virt; + struct scatterlist sg; + int have_irq; + u8 inta_pin; + KLOCK_QUEUE_HANDLE lockh; +}; + +struct mthca_cq_table { + struct mthca_alloc alloc; + spinlock_t lock; + struct mthca_array cq; + struct mthca_icm_table *table; +}; + +struct mthca_srq_table { + struct mthca_alloc alloc; + spinlock_t lock; + struct mthca_array srq; + struct mthca_icm_table *table; +}; + +struct mthca_qp_table { + struct mthca_alloc alloc; + u32 rdb_base; + int rdb_shift; + int sqp_start; + spinlock_t lock; + struct mthca_array qp; + struct mthca_icm_table *qp_table; + struct mthca_icm_table *eqp_table; + struct mthca_icm_table *rdb_table; +}; + +struct mthca_av_table { + struct pci_pool *pool; + int num_ddr_avs; + u64 ddr_av_base; + void __iomem *av_map; + SIZE_T av_map_size; + struct mthca_alloc alloc; +}; + +struct mthca_mcg_table { + KMUTEX mutex; + struct mthca_alloc alloc; + struct mthca_icm_table *table; +}; + +struct mthca_catas_err { + u64 addr; + u32 __iomem *map; + SIZE_T map_size; + unsigned long stop; + u32 size; + KTIMER timer; + KDPC timer_dpc; + LARGE_INTEGER interval; +}; + +struct mthca_dev { + struct ib_device ib_dev; + hca_dev_ext_t *ext; + uplink_info_t uplink_info; + volatile long dpc_lock; + + int hca_type; + unsigned long mthca_flags; + unsigned long device_cap_flags; + + u32 rev_id; + char board_id[MTHCA_BOARD_ID_LEN]; + + /* firmware info */ + u64 fw_ver; + union { + struct { + u64 fw_start; + u64 fw_end; + } tavor; + struct { + u64 clr_int_base; + u64 eq_arm_base; + u64 eq_set_ci_base; + struct mthca_icm *fw_icm; + struct mthca_icm *aux_icm; + u16 fw_pages; + } arbel; + } fw; + + u64 ddr_start; + u64 ddr_end; + + MTHCA_DECLARE_DOORBELL_LOCK(doorbell_lock) + KMUTEX cap_mask_mutex; + + u8 __iomem *hcr; + SIZE_T hcr_size; + u8 __iomem *kar; + SIZE_T kar_size; + u8 __iomem *clr_base; + SIZE_T clr_base_size; + union { + struct { + void __iomem *ecr_base; + SIZE_T ecr_base_size; + } tavor; + struct { + void __iomem *eq_arm; + SIZE_T eq_arm_size; + void __iomem *eq_set_ci_base; + SIZE_T eq_set_ci_base_size; + } arbel; + } eq_regs; + + struct mthca_cmd cmd; + struct mthca_limits limits; + + struct mthca_uar_table uar_table; + struct mthca_pd_table pd_table; + struct mthca_mr_table mr_table; + struct mthca_eq_table eq_table; + struct mthca_cq_table cq_table; + struct mthca_srq_table srq_table; + struct mthca_qp_table qp_table; + struct mthca_av_table av_table; + struct mthca_mcg_table mcg_table; + struct mthca_catas_err catas_err; + struct mthca_uar driver_uar; + struct mthca_db_table *db_tab; + struct mthca_pd driver_pd; + struct mthca_mr driver_mr; + + struct ib_mad_agent *send_agent[MTHCA_MAX_PORTS][2]; + struct ib_ah *sm_ah[MTHCA_MAX_PORTS]; + spinlock_t sm_lock; + u32 state; +}; + +// mthca_dev states +enum { + MTHCA_DEV_UNINITIALIZED, + MTHCA_DEV_INITIALIZED, + MTHCA_DEV_FAILED +}; + +enum { + MTHCA_CQ_ENTRY_SIZE = 0x20 +}; + + + +#define MTHCA_GET(dest, source, offset) \ + { \ + void *__p = (char *) (source) + (offset); \ + void *__q = &(dest); \ + switch (sizeof (dest)) { \ + case 1: *(u8 *)__q = *(u8 *) __p; break; \ + case 2: *(u16 *)__q = (u16)cl_ntoh16(*(u16 *)__p); break; \ + case 4: *(u32 *)__q = (u32)cl_ntoh32(*(u32 *)__p); break; \ + case 8: *(u64 *)__q = (u64)cl_ntoh64(*(u64 *)__p); break; \ + default: ASSERT(0); \ + } \ + } + + +#define MTHCA_PUT(dest, source, offset) \ + { \ + void *__d = ((char *) (dest) + (offset)); \ + switch (sizeof(source)) { \ + case 1: *(u8 *) __d = (u8)(source); break; \ + case 2: *(__be16 *) __d = cl_hton16((u16)source); break; \ + case 4: *(__be32 *) __d = cl_hton32((u32)source); break; \ + case 8: *(__be64 *) __d = cl_hton64((u64)source); break; \ + default: ASSERT(0); \ + } \ + } + +NTSTATUS mthca_reset(struct mthca_dev *mdev); + +u32 mthca_alloc(struct mthca_alloc *alloc); +void mthca_free(struct mthca_alloc *alloc, u32 obj); +int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask, + u32 reserved); +void mthca_alloc_cleanup(struct mthca_alloc *alloc); +void *mthca_array_get(struct mthca_array *array, int index); +int mthca_array_set(struct mthca_array *array, int index, void *value); +void mthca_array_clear(struct mthca_array *array, int index); +int mthca_array_init(struct mthca_array *array, int nent); +void mthca_array_cleanup(struct mthca_array *array, int nent); +int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct, + union mthca_buf *buf, int *is_direct, struct mthca_pd *pd, + int hca_write, struct mthca_mr *mr); +void mthca_buf_free(struct mthca_dev *dev, int size, union mthca_buf *buf, + int is_direct, struct mthca_mr *mr); + +int mthca_init_uar_table(struct mthca_dev *dev); +int mthca_init_pd_table(struct mthca_dev *dev); +int mthca_init_mr_table(struct mthca_dev *dev); +int mthca_init_eq_table(struct mthca_dev *dev); +int mthca_init_cq_table(struct mthca_dev *dev); +int mthca_init_srq_table(struct mthca_dev *dev); +int mthca_init_qp_table(struct mthca_dev *dev); +int mthca_init_av_table(struct mthca_dev *dev); +int mthca_init_mcg_table(struct mthca_dev *dev); + +void mthca_cleanup_uar_table(struct mthca_dev *dev); +void mthca_cleanup_pd_table(struct mthca_dev *dev); +void mthca_cleanup_mr_table(struct mthca_dev *dev); +void mthca_cleanup_eq_table(struct mthca_dev *dev); +void mthca_cleanup_cq_table(struct mthca_dev *dev); +void mthca_cleanup_srq_table(struct mthca_dev *dev); +void mthca_cleanup_qp_table(struct mthca_dev *dev); +void mthca_cleanup_av_table(struct mthca_dev *dev); +void mthca_cleanup_mcg_table(struct mthca_dev *dev); + +int mthca_register_device(struct mthca_dev *dev); +void mthca_unregister_device(struct mthca_dev *dev); + +void mthca_start_catas_poll(struct mthca_dev *dev); +void mthca_stop_catas_poll(struct mthca_dev *dev); + +int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar); +void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar); + +int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd); +void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd); + +struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size); +void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt); +int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, + int start_index, u64 *buffer_list, int list_len); +int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, + u64 iova, u64 total_size, mthca_mpt_access_t access, struct mthca_mr *mr); +int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd, + mthca_mpt_access_t access, struct mthca_mr *mr); +int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd, + u64 *buffer_list, int buffer_size_shift, + int list_len, u64 iova, u64 total_size, + mthca_mpt_access_t access, struct mthca_mr *mr); +void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr); + +int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, + mthca_mpt_access_t access, struct mthca_fmr *fmr); +int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, + int list_len, u64 iova); +void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr); +int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, + int list_len, u64 iova); +void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr); +int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr); + +int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt); +void mthca_unmap_eq_icm(struct mthca_dev *dev); + +int mthca_poll_cq(struct ib_cq *ibcq, int num_entries, + struct _ib_wc *entry); +int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify); +int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify); +int mthca_init_cq(struct mthca_dev *dev, int nent, + struct mthca_ucontext *ctx, u32 pdn, + struct mthca_cq *cq); +void mthca_free_cq(struct mthca_dev *dev, + struct mthca_cq *cq); +void mthca_cq_completion(struct mthca_dev *dev, u32 cqn); +void mthca_cq_event(struct mthca_dev *dev, u32 cqn, + enum ib_event_type event_type); +void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, + struct mthca_srq *srq); + +int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, + ib_srq_attr_t *attr, struct mthca_srq *srq); +void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq); +int mthca_modify_srq(struct ib_srq *ibsrq, ib_srq_attr_t *attr, + ib_srq_attr_mask_t attr_mask); +void mthca_srq_event(struct mthca_dev *dev, u32 srqn, + enum ib_event_type event_type, u8 vendor_code); +void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr); +int mthca_tavor_post_srq_recv(struct ib_srq *srq, struct _ib_recv_wr *wr, + struct _ib_recv_wr **bad_wr); +int mthca_arbel_post_srq_recv(struct ib_srq *srq, struct _ib_recv_wr *wr, + struct _ib_recv_wr **bad_wr); + +void mthca_qp_event(struct mthca_dev *dev, u32 qpn, + enum ib_event_type event_type, u8 vendor_code); +int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask); +int mthca_tavor_post_send(struct ib_qp *ibqp, struct _ib_send_wr *wr, + struct _ib_send_wr **bad_wr); +int mthca_tavor_post_recv(struct ib_qp *ibqp, struct _ib_recv_wr *wr, + struct _ib_recv_wr **bad_wr); +int mthca_arbel_post_send(struct ib_qp *ibqp, struct _ib_send_wr *wr, + struct _ib_send_wr **bad_wr); +int mthca_arbel_post_recv(struct ib_qp *ibqp, struct _ib_recv_wr *wr, + struct _ib_recv_wr **bad_wr); +void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, + int index, int *dbd, __be32 *new_wqe); +int mthca_alloc_qp(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_cq *send_cq, + struct mthca_cq *recv_cq, + enum ib_qp_type_t type, + enum ib_sig_type send_policy, + struct ib_qp_cap *cap, + struct mthca_qp *qp); +int mthca_alloc_sqp(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_cq *send_cq, + struct mthca_cq *recv_cq, + enum ib_sig_type send_policy, + struct ib_qp_cap *cap, + int qpn, + int port, + struct mthca_sqp *sqp); +void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp); +int mthca_create_ah(struct mthca_dev *dev, + struct mthca_pd *pd, + struct ib_ah_attr *ah_attr, + struct mthca_ah *ah); +int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah); +int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah, + struct ib_ud_header *header); + +int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); +int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); + +int mthca_process_mad(struct ib_device *ibdev, + int mad_flags, + u8 port_num, + struct _ib_wc *in_wc, + struct _ib_grh *in_grh, + struct ib_mad *in_mad, + struct ib_mad *out_mad); + +static inline struct mthca_dev *to_mdev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct mthca_dev, ib_dev); +} + +static inline int mthca_is_memfree(struct mthca_dev *dev) +{ + return dev->mthca_flags & MTHCA_FLAG_MEMFREE; +} + +VOID +WriteEventLogEntry( + PVOID pi_pIoObject, + ULONG pi_ErrorCode, + ULONG pi_UniqueErrorCode, + ULONG pi_FinalStatus, + ULONG pi_nDataItems, + ... + ); + +VOID +WriteEventLogEntryStr( + PVOID pi_pIoObject, + ULONG pi_ErrorCode, + ULONG pi_UniqueErrorCode, + ULONG pi_FinalStatus, + PWCHAR pi_InsertionStr, + ULONG pi_nDataItems, + ... + ); + + +static inline int mthca_is_livefish(struct mthca_dev *mdev) +{ + return mdev->mthca_flags & MTHCA_FLAG_LIVEFISH; +} + +void mthca_get_av_params( struct mthca_ah *ah_p, u8 *port_num, __be16 *dlid, u8 *sr, u8 *path_bits ); + +void mthca_set_av_params( struct mthca_dev *dev, struct mthca_ah *ah_p, struct ib_ah_attr *ah_attr ); + +int ib_uverbs_init(void); +void ib_uverbs_cleanup(void); +int mthca_ah_grh_present(struct mthca_ah *ah); + +int mthca_max_srq_sge(struct mthca_dev *dev); + + +#endif /* MTHCA_DEV_H */ diff --git a/branches/IBFD/hw/mthca/kernel/mthca_doorbell.h b/branches/IBFD/hw/mthca/kernel/mthca_doorbell.h new file mode 100644 index 00000000..92cfb3fd --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca_doorbell.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +enum { + MTHCA_SEND_DOORBELL_FENCE = 1 << 5 +}; + +#ifdef _WIN64 +/* + * Assume that we can just write a 64-bit doorbell atomically. s390 + * actually doesn't have writeq() but S/390 systems don't even have + * PCI so we won't worry about it. + */ + + + +#define MTHCA_DECLARE_DOORBELL_LOCK(name) +#define MTHCA_INIT_DOORBELL_LOCK(ptr) +#define MTHCA_GET_DOORBELL_LOCK(ptr) (NULL) + +static inline void mthca_write64_raw(__be64 val, void __iomem *dest) +{ + __raw_writeq((u64) val, dest); +} + +static inline void mthca_write64(__be32 val[2], void __iomem *dest, + spinlock_t *doorbell_lock) +{ + UNUSED_PARAM(doorbell_lock); + *(volatile u64 *)dest = *(volatile u64 *)val; +} + +static inline void mthca_write_db_rec(__be32 val[2], __be32 *db) +{ + *(volatile u64 *) db = *(volatile u64 *) val; +} + +#else + +/* + * Just fall back to a spinlock to protect the doorbell if + * BITS_PER_LONG is 32 -- there's no portable way to do atomic 64-bit + * MMIO writes. + */ + +#define MTHCA_DECLARE_DOORBELL_LOCK(name) spinlock_t name; +#define MTHCA_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr) +#define MTHCA_GET_DOORBELL_LOCK(ptr) (ptr) + +static inline void mthca_write64_raw(__be64 val, void __iomem *dest) +{ + __raw_writel(((u32 *) &val)[0], dest); + __raw_writel(((u32 *) &val)[1], (u8*)dest + 4); +} + +static inline void mthca_write64(__be32 val[2], void __iomem *dest, + spinlock_t *doorbell_lock) +{ + SPIN_LOCK_PREP(lh); + spin_lock_irqsave(doorbell_lock, &lh); + __raw_writel((u32) val[0], dest); + __raw_writel((u32) val[1], (u8*)dest + 4); + spin_unlock_irqrestore(&lh); +} + +static inline void mthca_write_db_rec(__be32 val[2], __be32 *db) +{ + db[0] = val[0]; + wmb(); + db[1] = val[1]; +} + +#endif diff --git a/branches/IBFD/hw/mthca/kernel/mthca_eq.c b/branches/IBFD/hw/mthca/kernel/mthca_eq.c new file mode 100644 index 00000000..111dea71 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca_eq.c @@ -0,0 +1,1106 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_eq.tmh" +#endif +#include "mthca_cmd.h" +#include "mthca_config_reg.h" +#include "mthca_wqe.h" + +static int mthca_map_reg(struct mthca_dev *dev, + u64 offset, unsigned long size, + void __iomem **map, SIZE_T *map_size); +static int mthca_map_eq_regs(struct mthca_dev *dev); +static void mthca_unmap_eq_regs(struct mthca_dev *dev); +static int mthca_create_eq(struct mthca_dev *dev, + int nent, + u8 intr, + struct mthca_eq *eq); + + + +#ifdef ALLOC_PRAGMA +#pragma alloc_text (PAGE, mthca_map_reg) +#pragma alloc_text (PAGE, mthca_map_eq_regs) +#pragma alloc_text (PAGE, mthca_init_eq_table) +#pragma alloc_text (PAGE, mthca_unmap_eq_regs) +#pragma alloc_text (PAGE, mthca_map_eq_icm) +#pragma alloc_text (PAGE, mthca_unmap_eq_icm) +#pragma alloc_text (PAGE, mthca_create_eq) +#pragma alloc_text (PAGE, mthca_cleanup_eq_table) +#endif + +enum { + MTHCA_NUM_ASYNC_EQE = 0x80, + MTHCA_NUM_CMD_EQE = 0x80, + MTHCA_NUM_SPARE_EQE = 0x80, + MTHCA_EQ_ENTRY_SIZE = 0x20 +}; + +/* + * Must be packed because start is 64 bits but only aligned to 32 bits. + */ +#pragma pack(push,1) +struct mthca_eq_context { + __be32 flags; + __be64 start; + __be32 logsize_usrpage; + __be32 tavor_pd; /* reserved for Arbel */ + u8 reserved1[3]; + u8 intr; + __be32 arbel_pd; /* lost_count for Tavor */ + __be32 lkey; + u32 reserved2[2]; + __be32 consumer_index; + __be32 producer_index; + u32 reserved3[4]; +}; +#pragma pack(pop) + +#define MTHCA_EQ_STATUS_OK ( 0 << 28) +#define MTHCA_EQ_STATUS_OVERFLOW ( 9 << 28) +#define MTHCA_EQ_STATUS_WRITE_FAIL (10 << 28) +#define MTHCA_EQ_OWNER_SW ( 0 << 24) +#define MTHCA_EQ_OWNER_HW ( 1 << 24) +#define MTHCA_EQ_FLAG_TR ( 1 << 18) +#define MTHCA_EQ_FLAG_OI ( 1 << 17) +#define MTHCA_EQ_STATE_ARMED ( 1 << 8) +#define MTHCA_EQ_STATE_FIRED ( 2 << 8) +#define MTHCA_EQ_STATE_ALWAYS_ARMED ( 3 << 8) +#define MTHCA_EQ_STATE_ARBEL ( 8 << 8) + +enum { + MTHCA_EVENT_TYPE_COMP = 0x00, + MTHCA_EVENT_TYPE_PATH_MIG = 0x01, + MTHCA_EVENT_TYPE_COMM_EST = 0x02, + MTHCA_EVENT_TYPE_SQ_DRAINED = 0x03, + MTHCA_EVENT_TYPE_CQ_ERROR = 0x04, + MTHCA_EVENT_TYPE_WQ_CATAS_ERROR = 0x05, + MTHCA_EVENT_TYPE_EEC_CATAS_ERROR = 0x06, + MTHCA_EVENT_TYPE_PATH_MIG_FAILED = 0x07, + MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR = 0x08, + MTHCA_EVENT_TYPE_PORT_CHANGE = 0x09, + MTHCA_EVENT_TYPE_CMD = 0x0a, + MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10, + MTHCA_EVENT_TYPE_ECC_DETECT = 0x0e, + MTHCA_EVENT_TYPE_EQ_OVERFLOW = 0x0f, + MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR = 0x11, + MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR = 0x12, + MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE = 0x13, + MTHCA_EVENT_TYPE_SRQ_LIMIT = 0x14 +}; + +#define MTHCA_ASYNC_EVENT_MASK ((1Ui64 << MTHCA_EVENT_TYPE_PATH_MIG) | \ + (1Ui64 << MTHCA_EVENT_TYPE_COMM_EST) | \ + (1Ui64 << MTHCA_EVENT_TYPE_SQ_DRAINED) | \ + (1Ui64 << MTHCA_EVENT_TYPE_CQ_ERROR) | \ + (1Ui64 << MTHCA_EVENT_TYPE_WQ_CATAS_ERROR) | \ + (1Ui64 << MTHCA_EVENT_TYPE_EEC_CATAS_ERROR) | \ + (1Ui64 << MTHCA_EVENT_TYPE_PATH_MIG_FAILED) | \ + (1Ui64 << MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \ + (1Ui64 << MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR) | \ + (1Ui64 << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR) | \ + (1Ui64 << MTHCA_EVENT_TYPE_PORT_CHANGE) | \ + (1Ui64 << MTHCA_EVENT_TYPE_ECC_DETECT)) +#define MTHCA_SRQ_EVENT_MASK ((1Ui64 << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR) | \ + (1Ui64 << MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE) | \ + (1Ui64 << MTHCA_EVENT_TYPE_SRQ_LIMIT)) + +#define MTHCA_CMD_EVENT_MASK (1Ui64 << MTHCA_EVENT_TYPE_CMD) + +#define MTHCA_EQ_DB_INC_CI (1 << 24) +#define MTHCA_EQ_DB_REQ_NOT (2 << 24) +#define MTHCA_EQ_DB_DISARM_CQ (3 << 24) +#define MTHCA_EQ_DB_SET_CI (4 << 24) +#define MTHCA_EQ_DB_ALWAYS_ARM (5 << 24) + +#pragma pack(push,1) +struct mthca_eqe { + u8 reserved1; + u8 type; + u8 reserved2; + u8 subtype; + union { + u32 raw[6]; + struct { + __be32 cqn; + } comp; + struct { + u16 reserved1; + __be16 token; + u32 reserved2; + u8 reserved3[3]; + u8 status; + __be64 out_param; + } cmd; + struct { + __be32 qpn; + u32 reserved1; + u32 reserved2; + u8 reserved3[1]; + u8 vendor_code; + u8 reserved4[2]; + } qp; + struct { + __be32 srqn; + u32 reserved1; + u32 reserved2; + u8 reserved3[1]; + u8 vendor_code; + u8 reserved4[2]; + } srq; + struct { + __be32 cqn; + u32 reserved1; + u8 reserved2[3]; + u8 syndrome; + } cq_err; + struct { + u32 reserved1[2]; + __be32 port; + } port_change; + } event; + u8 reserved3[3]; + u8 owner; +} ; +#pragma pack(pop) + +#define MTHCA_EQ_ENTRY_OWNER_SW (0 << 7) +#define MTHCA_EQ_ENTRY_OWNER_HW (1 << 7) + +static inline u64 async_mask(struct mthca_dev *dev) +{ + return dev->mthca_flags & MTHCA_FLAG_SRQ ? + MTHCA_ASYNC_EVENT_MASK | MTHCA_SRQ_EVENT_MASK : + MTHCA_ASYNC_EVENT_MASK; +} + +static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci) +{ + __be32 doorbell[2]; + + doorbell[0] = cl_hton32(MTHCA_EQ_DB_SET_CI | eq->eqn); + doorbell[1] = cl_hton32(ci & (eq->nent - 1)); + + /* + * This barrier makes sure that all updates to ownership bits + * done by set_eqe_hw() hit memory before the consumer index + * is updated. set_eq_ci() allows the HCA to possibly write + * more EQ entries, and we want to avoid the exceedingly + * unlikely possibility of the HCA writing an entry and then + * having set_eqe_hw() overwrite the owner field. + */ + wmb(); + mthca_write64(doorbell, + dev->kar + MTHCA_EQ_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); +} + +static inline void arbel_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci) +{ + /* See comment in tavor_set_eq_ci() above. */ + wmb(); + __raw_writel((u32) cl_hton32(ci), + (u8*)dev->eq_regs.arbel.eq_set_ci_base + eq->eqn * 8); + /* We still want ordering, just not swabbing, so add a barrier */ + mb(); +} + +static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci) +{ + if (mthca_is_memfree(dev)) + arbel_set_eq_ci(dev, eq, ci); + else + tavor_set_eq_ci(dev, eq, ci); +} + +static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn) +{ + __be32 doorbell[2]; + + doorbell[0] = cl_hton32(MTHCA_EQ_DB_REQ_NOT | eqn); + doorbell[1] = 0; + + mthca_write64(doorbell, + dev->kar + MTHCA_EQ_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); +} + +static inline void arbel_eq_req_not(struct mthca_dev *dev, u32 eqn_mask) +{ + writel(eqn_mask, dev->eq_regs.arbel.eq_arm); +} + +static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn) +{ + if (!mthca_is_memfree(dev)) { + __be32 doorbell[2]; + + doorbell[0] = cl_hton32(MTHCA_EQ_DB_DISARM_CQ | eqn); + doorbell[1] = cl_hton32(cqn); + + mthca_write64(doorbell, + dev->kar + MTHCA_EQ_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + } +} + +static inline struct mthca_eqe *get_eqe(struct mthca_eq *eq, u32 entry) +{ + unsigned long off = (entry & (eq->nent - 1)) * MTHCA_EQ_ENTRY_SIZE; + return (struct mthca_eqe *)((u8*)eq->page_list[off / PAGE_SIZE].page + off % PAGE_SIZE); +} + +static inline struct mthca_eqe* next_eqe_sw(struct mthca_eq *eq) +{ + struct mthca_eqe* eqe; + eqe = get_eqe(eq, eq->cons_index); + return (MTHCA_EQ_ENTRY_OWNER_HW & eqe->owner) ? NULL : eqe; +} + +static inline void set_eqe_hw(struct mthca_eqe *eqe) +{ + eqe->owner = MTHCA_EQ_ENTRY_OWNER_HW; +} + +static void port_change(struct mthca_dev *dev, int port, int active) +{ + struct ib_event record; + + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Port change to %s for port %d\n", + active ? "active" : "down", port)); + + record.device = &dev->ib_dev; + record.event = active ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; + record.element.port_num = (u8)port; + // Gen2 ib_core mechanism + ib_dispatch_event(&record); + // our callback + ca_event_handler( &record, &dev->ext->hca.hob ); +} + +static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq) +{ + int disarm_cqn; + int eqes_found = 0; + int set_ci = 0; + struct mthca_eqe *eqe = next_eqe_sw(eq); + uint64_t start = cl_get_time_stamp(); + int loops = 0; + + while (eqe) { + + /* + * Make sure we read EQ entry contents after we've + * checked the ownership bit. + */ + rmb(); + + switch (eqe->type) { + case MTHCA_EVENT_TYPE_COMP: + disarm_cqn = cl_ntoh32(eqe->event.comp.cqn) & 0xffffff; + disarm_cq(dev, eq->eqn, disarm_cqn); + mthca_cq_completion(dev, disarm_cqn); + break; + + case MTHCA_EVENT_TYPE_PATH_MIG: + mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_PATH_MIG, eqe->event.qp.vendor_code); + break; + + case MTHCA_EVENT_TYPE_COMM_EST: + mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_COMM_EST, eqe->event.qp.vendor_code); + break; + + case MTHCA_EVENT_TYPE_SQ_DRAINED: + mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_SQ_DRAINED, eqe->event.qp.vendor_code); + break; + + case MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE: + mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_SRQ_QP_LAST_WQE_REACHED, eqe->event.qp.vendor_code); + break; + + case MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR: + mthca_srq_event(dev, cl_ntoh32(eqe->event.srq.srqn) & 0xffffff, + IB_EVENT_SRQ_LIMIT_REACHED, eqe->event.qp.vendor_code); + break; + + case MTHCA_EVENT_TYPE_SRQ_LIMIT: + mthca_srq_event(dev, cl_ntoh32(eqe->event.srq.srqn) & 0xffffff, + IB_EVENT_SRQ_LIMIT_REACHED, eqe->event.qp.vendor_code); + break; + + case MTHCA_EVENT_TYPE_WQ_CATAS_ERROR: + mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_QP_FATAL, eqe->event.qp.vendor_code); + break; + + case MTHCA_EVENT_TYPE_PATH_MIG_FAILED: + mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_PATH_MIG_ERR, eqe->event.qp.vendor_code); + break; + + case MTHCA_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_QP_REQ_ERR, eqe->event.qp.vendor_code); + break; + + case MTHCA_EVENT_TYPE_WQ_ACCESS_ERROR: + mthca_qp_event(dev, cl_ntoh32(eqe->event.qp.qpn) & 0xffffff, + IB_EVENT_QP_ACCESS_ERR, eqe->event.qp.vendor_code); + break; + + case MTHCA_EVENT_TYPE_CMD: + mthca_cmd_event(dev, + cl_ntoh16(eqe->event.cmd.token), + eqe->event.cmd.status, + cl_ntoh64(eqe->event.cmd.out_param)); + break; + + case MTHCA_EVENT_TYPE_PORT_CHANGE: + port_change(dev, + (cl_ntoh32(eqe->event.port_change.port) >> 28) & 3, + eqe->subtype == 0x4); + break; + + case MTHCA_EVENT_TYPE_CQ_ERROR: + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_LOW, ("CQ %s on CQN %06x (syndrome %d)\n", + eqe->event.cq_err.syndrome == 1 ? + "overrun" : "access violation", + cl_ntoh32(eqe->event.cq_err.cqn) & 0xffffff, eqe->event.cq_err.syndrome)); + mthca_cq_event(dev, cl_ntoh32(eqe->event.cq_err.cqn), + IB_EVENT_CQ_ERR); + break; + + case MTHCA_EVENT_TYPE_EQ_OVERFLOW: + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("EQ overrun on EQN %d\n", eq->eqn)); + break; + + case MTHCA_EVENT_TYPE_EEC_CATAS_ERROR: + case MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR: + case MTHCA_EVENT_TYPE_ECC_DETECT: + default: + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_LOW, ("Unhandled event %02x(%02x) on EQ %d\n", + eqe->type, eqe->subtype, eq->eqn)); + break; + }; + + set_eqe_hw(eqe); + ++eq->cons_index; + eqes_found += 1; + ++set_ci; + + /* + * The HCA will think the queue has overflowed if we + * don't tell it we've been processing events. We + * create our EQs with MTHCA_NUM_SPARE_EQE extra + * entries, so we must update our consumer index at + * least that often. + */ + if (unlikely(set_ci >= MTHCA_NUM_SPARE_EQE)) { + /* + * Conditional on hca_type is OK here because + * this is a rare case, not the fast path. + */ + set_eq_ci(dev, eq, eq->cons_index); + set_ci = 0; + } + loops++; + if (cl_get_time_stamp() - start > g_max_DPC_time_us ) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Handling of EQ stopped, and a new DPC is entered after %d loops\n", loops)); + KeInsertQueueDpc(&dev->eq_table.eq[eq->eq_num].dpc, NULL, NULL); + break; + } + eqe = next_eqe_sw(eq); + } + + /* + * Rely on caller to set consumer index so that we don't have + * to test hca_type in our interrupt handling fast path. + */ + return eqes_found; +} + +static void mthca_tavor_dpc( PRKDPC dpc, + PVOID ctx, PVOID arg1, PVOID arg2 ) +{ + struct mthca_eq *eq = ctx; + struct mthca_dev *dev = eq->dev; + SPIN_LOCK_PREP(lh); + + UNREFERENCED_PARAMETER(dpc); + UNREFERENCED_PARAMETER(arg1); + UNREFERENCED_PARAMETER(arg2); + + spin_lock_dpc(&eq->lock, &lh); + + /* we need 'if' in case, when there were scheduled 2 DPC for one EQ */ + if (mthca_eq_int(dev, eq)) { + tavor_set_eq_ci(dev, eq, eq->cons_index); + tavor_eq_req_not(dev, eq->eqn); + } + + spin_unlock_dpc(&lh); +} + +static BOOLEAN mthca_tavor_interrupt( + PKINTERRUPT int_obj, + PVOID ctx + ) +{ + struct mthca_dev *dev = ctx; + u32 ecr; + int i; + + UNREFERENCED_PARAMETER(int_obj); + + if (dev->eq_table.clr_mask) + writel(dev->eq_table.clr_mask, dev->eq_table.clr_int); + + ecr = readl((u8*)dev->eq_regs.tavor.ecr_base + 4); + if (!ecr) + return FALSE; + + writel(ecr, (u8*)dev->eq_regs.tavor.ecr_base + + MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4); + + for (i = 0; i < MTHCA_NUM_EQ; ++i) { + if (ecr & dev->eq_table.eq[i].eqn_mask && + next_eqe_sw(&dev->eq_table.eq[i])) { + KeInsertQueueDpc(&dev->eq_table.eq[i].dpc, NULL, NULL); + } + } + + return TRUE; +} + +#ifdef MSI_SUPPORT +static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr, + struct pt_regs *regs) +{ + struct mthca_eq *eq = eq_ptr; + struct mthca_dev *dev = eq->dev; + + mthca_eq_int(dev, eq); + tavor_set_eq_ci(dev, eq, eq->cons_index); + tavor_eq_req_not(dev, eq->eqn); + + /* MSI-X vectors always belong to us */ + return IRQ_HANDLED; +} +#endif + +static void mthca_arbel_dpc( PRKDPC dpc, + PVOID ctx, PVOID arg1, PVOID arg2 ) +{ + struct mthca_eq *eq = ctx; + struct mthca_dev *dev = eq->dev; + SPIN_LOCK_PREP(lh); + + UNREFERENCED_PARAMETER(dpc); + UNREFERENCED_PARAMETER(arg1); + UNREFERENCED_PARAMETER(arg2); + + spin_lock_dpc(&eq->lock, &lh); + + /* we need 'if' in case, when there were scheduled 2 DPC for one EQ */ + if (mthca_eq_int(dev, eq)) + arbel_set_eq_ci(dev, eq, eq->cons_index); + arbel_eq_req_not(dev, eq->eqn_mask); + + spin_unlock_dpc(&lh); +} + +static BOOLEAN mthca_arbel_interrupt( + PKINTERRUPT int_obj, + PVOID ctx + ) +{ + struct mthca_dev *dev = ctx; + int work = 0; + int i; + + UNREFERENCED_PARAMETER(int_obj); + + if (dev->eq_table.clr_mask) + writel(dev->eq_table.clr_mask, dev->eq_table.clr_int); + + for (i = 0; i < MTHCA_NUM_EQ; ++i) { + if (next_eqe_sw( &dev->eq_table.eq[i]) ) { + work = 1; + while(InterlockedCompareExchange(&dev->dpc_lock, 1, 0)); + + KeInsertQueueDpc(&dev->eq_table.eq[i].dpc, NULL, NULL); + InterlockedCompareExchange(&dev->dpc_lock, 0, 1); + } else { + arbel_eq_req_not(dev, dev->eq_table.eq[i].eqn_mask); + } + } + + return (BOOLEAN)work; +} + +#ifdef MSI_SUPPORT +static irqreturn_t mthca_arbel_msi_x_interrupt(int irq, void *eq_ptr, + struct pt_regs *regs) +{ + struct mthca_eq *eq = eq_ptr; + struct mthca_dev *dev = eq->dev; + + mthca_eq_int(dev, eq); + arbel_set_eq_ci(dev, eq, eq->cons_index); + arbel_eq_req_not(dev, eq->eqn_mask); + + /* MSI-X vectors always belong to us */ + return IRQ_HANDLED; +} +#endif + +static int mthca_create_eq(struct mthca_dev *dev, + int nent, + u8 intr, + struct mthca_eq *eq) +{ + int npages; + u64 *dma_list = NULL; + struct mthca_mailbox *mailbox; + struct mthca_eq_context *eq_context; + int err = -ENOMEM; + int i; + u8 status; + + HCA_ENTER(HCA_DBG_INIT); + eq->dev = dev; + eq->nent = roundup_pow_of_two(max(nent, 2)); + npages = ALIGN(eq->nent * MTHCA_EQ_ENTRY_SIZE, PAGE_SIZE) / PAGE_SIZE; + + eq->page_list = kmalloc(npages * sizeof *eq->page_list, + GFP_KERNEL); + if (!eq->page_list) + goto err_out; + + for (i = 0; i < npages; ++i) + eq->page_list[i].page = NULL; + + dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); + if (!dma_list) + goto err_out_free; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + goto err_out_free; + eq_context = mailbox->buf; + + for (i = 0; i < npages; ++i) { + alloc_dma_zmem_map(dev, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL, &eq->page_list[i]); + if (!eq->page_list[i].page) + goto err_out_free_pages; + dma_list[i] = eq->page_list[i].dma_address; + } + + for (i = 0; i < eq->nent; ++i) + set_eqe_hw(get_eqe(eq, i)); + + eq->eqn = mthca_alloc(&dev->eq_table.alloc); + if (eq->eqn == -1) + goto err_out_free_pages; + + err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num, + dma_list, PAGE_SHIFT, npages, + 0, npages * PAGE_SIZE, + MTHCA_MPT_FLAG_LOCAL_WRITE | + MTHCA_MPT_FLAG_LOCAL_READ, + &eq->mr); + if (err) + goto err_out_free_eq; + + RtlZeroMemory(eq_context, sizeof *eq_context); + eq_context->flags = cl_hton32(MTHCA_EQ_STATUS_OK | + MTHCA_EQ_OWNER_HW | + MTHCA_EQ_STATE_ARMED | + MTHCA_EQ_FLAG_TR); + if (mthca_is_memfree(dev)) + eq_context->flags |= cl_hton32(MTHCA_EQ_STATE_ARBEL); + + eq_context->logsize_usrpage = cl_hton32((ffs(eq->nent) - 1) << 24); + if (mthca_is_memfree(dev)) { + eq_context->arbel_pd = cl_hton32(dev->driver_pd.pd_num); + } else { + eq_context->logsize_usrpage |= cl_hton32(dev->driver_uar.index); + eq_context->tavor_pd = cl_hton32(dev->driver_pd.pd_num); + } + eq_context->intr = intr; + eq_context->lkey = cl_hton32(eq->mr.ibmr.lkey); + + err = mthca_SW2HW_EQ(dev, mailbox, eq->eqn, &status); + if (err) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("SW2HW_EQ failed (%d)\n", err)); + goto err_out_free_mr; + } + if (status) { + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_LOW,("SW2HW_EQ returned status 0x%02x\n", + status)); + err = -EINVAL; + goto err_out_free_mr; + } + + kfree(dma_list); + mthca_free_mailbox(dev, mailbox); + + eq->eqn_mask = _byteswap_ulong(1 << eq->eqn); + eq->cons_index = 0; + + dev->eq_table.arm_mask |= eq->eqn_mask; + + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_INIT ,("Allocated EQ %d with %d entries\n", + eq->eqn, eq->nent)); + + HCA_EXIT(HCA_DBG_INIT); + return err; + + err_out_free_mr: + mthca_free_mr(dev, &eq->mr); + + err_out_free_eq: + mthca_free(&dev->eq_table.alloc, eq->eqn); + + err_out_free_pages: + for (i = 0; i < npages; ++i) { + if (eq->page_list[i].page) { + free_dma_mem_map(dev, &eq->page_list[i], PCI_DMA_BIDIRECTIONAL); + } + } + mthca_free_mailbox(dev, mailbox); + + err_out_free: + kfree(eq->page_list); + kfree(dma_list); + + err_out: + HCA_EXIT(HCA_DBG_INIT); + return err; +} + +static void mthca_free_eq(struct mthca_dev *dev, + struct mthca_eq *eq) +{ + struct mthca_mailbox *mailbox; + int err; + u8 status; + int npages = (eq->nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) / + PAGE_SIZE; + int i; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return; + + err = mthca_HW2SW_EQ(dev, mailbox, eq->eqn, &status); + if (err) + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("HW2SW_EQ failed (%d)\n", err)); + if (status) + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("HW2SW_EQ returned status 0x%02x\n", status)); + + dev->eq_table.arm_mask &= ~eq->eqn_mask; + + { // debug print + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("Dumping EQ context %02x:\n", eq->eqn)); + for (i = 0; i < sizeof (struct mthca_eq_context) / 4; i=i+4) { + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("[%02x] %08x %08x %08x %08x\n", i, + cl_ntoh32(*(u32*)((u8*)mailbox->buf + i * 4)), + cl_ntoh32(*(u32*)((u8*)mailbox->buf + (i+1)*4)), + cl_ntoh32(*(u32*)((u8*)mailbox->buf + (i+2)*4)), + cl_ntoh32(*(u32*)((u8*)mailbox->buf + (i+1)*4)))); + + } + } + + mthca_free_mr(dev, &eq->mr); + for (i = 0; i < npages; ++i) { + free_dma_mem_map(dev, &eq->page_list[i], PCI_DMA_BIDIRECTIONAL); + } + + kfree(eq->page_list); + mthca_free_mailbox(dev, mailbox); +} + +static void mthca_free_irqs(struct mthca_dev *dev) +{ + if (dev->eq_table.have_irq) + free_irq(dev->ext->int_obj); +#ifdef MSI_SUPPORT + for (i = 0; i < MTHCA_NUM_EQ; ++i) + if (dev->eq_table.eq[i].have_irq) + free_irq(dev->eq_table.eq[i].msi_x_vector, + dev->eq_table.eq + i); +#endif +} + +static int mthca_map_reg(struct mthca_dev *dev, + u64 offset, unsigned long size, + void __iomem **map, SIZE_T *map_size) +{ + u64 base = pci_resource_start(dev, HCA_BAR_TYPE_HCR); + *map = ioremap(base + offset, size, map_size); + if (!*map) + return -ENOMEM; + return 0; +} + +static void mthca_unmap_reg(struct mthca_dev *dev, u64 offset, + unsigned long size, void __iomem *map, SIZE_T map_size) +{ + UNREFERENCED_PARAMETER(dev); + UNREFERENCED_PARAMETER(size); + UNREFERENCED_PARAMETER(offset); + iounmap(map, map_size); +} + +static int mthca_map_eq_regs(struct mthca_dev *dev) +{ + u64 mthca_base; + + mthca_base = pci_resource_start(dev, HCA_BAR_TYPE_HCR); + + if (mthca_is_memfree(dev)) { + /* + * We assume that the EQ arm and EQ set CI registers + * fall within the first BAR. We can't trust the + * values firmware gives us, since those addresses are + * valid on the HCA's side of the PCI bus but not + * necessarily the host side. + */ + if (mthca_map_reg(dev, (pci_resource_len(dev, 0) - 1) & + dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, + &dev->clr_base, &dev->clr_base_size)) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Couldn't map interrupt clear register, " + "aborting.\n")); + return -ENOMEM; + } + + /* + * Add 4 because we limit ourselves to EQs 0 ... 31, + * so we only need the low word of the register. + */ + if (mthca_map_reg(dev, ((pci_resource_len(dev, 0) - 1) & + dev->fw.arbel.eq_arm_base) + 4, 4, + &dev->eq_regs.arbel.eq_arm, &dev->eq_regs.arbel.eq_arm_size)) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Couldn't map EQ arm register, aborting.\n")); + mthca_unmap_reg(dev, (pci_resource_len(dev, 0) - 1) & + dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, + dev->clr_base, dev->clr_base_size); + return -ENOMEM; + } + + if (mthca_map_reg(dev, (pci_resource_len(dev, 0) - 1) & + dev->fw.arbel.eq_set_ci_base, + MTHCA_EQ_SET_CI_SIZE, + &dev->eq_regs.arbel.eq_set_ci_base, + &dev->eq_regs.arbel.eq_set_ci_base_size + )) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Couldn't map EQ CI register, aborting.\n")); + mthca_unmap_reg(dev, ((pci_resource_len(dev, 0) - 1) & + dev->fw.arbel.eq_arm_base) + 4, 4, + dev->eq_regs.arbel.eq_arm, dev->eq_regs.arbel.eq_arm_size); + mthca_unmap_reg(dev, (pci_resource_len(dev, 0) - 1) & + dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, + dev->clr_base, dev->clr_base_size); + return -ENOMEM; + } + } else { + if (mthca_map_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE, + &dev->clr_base, &dev->clr_base_size)) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Couldn't map interrupt clear register, " + "aborting.\n")); + return -ENOMEM; + } + + if (mthca_map_reg(dev, MTHCA_ECR_BASE, + MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE, + &dev->eq_regs.tavor.ecr_base, &dev->eq_regs.tavor.ecr_base_size)) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Couldn't map ecr register, " + "aborting.\n")); + mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE, + dev->clr_base, dev->clr_base_size); + return -ENOMEM; + } + } + + return 0; + +} + +static void mthca_unmap_eq_regs(struct mthca_dev *dev) +{ + if (mthca_is_memfree(dev)) { + mthca_unmap_reg(dev, (pci_resource_len(dev, 0) - 1) & + dev->fw.arbel.eq_set_ci_base, + MTHCA_EQ_SET_CI_SIZE, + dev->eq_regs.arbel.eq_set_ci_base, + dev->eq_regs.arbel.eq_set_ci_base_size); + mthca_unmap_reg(dev, ((pci_resource_len(dev, 0) - 1) & + dev->fw.arbel.eq_arm_base) + 4, 4, + dev->eq_regs.arbel.eq_arm, + dev->eq_regs.arbel.eq_arm_size); + mthca_unmap_reg(dev, (pci_resource_len(dev, 0) - 1) & + dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, + dev->clr_base, dev->clr_base_size); + } else { + mthca_unmap_reg(dev, MTHCA_ECR_BASE, + MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE, + dev->eq_regs.tavor.ecr_base, + dev->eq_regs.tavor.ecr_base_size); + mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE, + dev->clr_base, dev->clr_base_size); + } +} + +int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt) +{ + int ret; + u8 status; + + /* + * We assume that mapping one page is enough for the whole EQ + * context table. This is fine with all current HCAs, because + * we only use 32 EQs and each EQ uses 32 bytes of context + * memory, or 1 KB total. + */ + dev->eq_table.icm_virt = icm_virt; + alloc_dma_zmem_map(dev,PAGE_SIZE, PCI_DMA_BIDIRECTIONAL, &dev->eq_table.sg); + if (!dev->eq_table.sg.page) + return -ENOMEM; + + ret = mthca_MAP_ICM_page(dev, dev->eq_table.sg.dma_address, icm_virt, &status); + if (!ret && status) + ret = -EINVAL; + if (ret) + free_dma_mem_map(dev, &dev->eq_table.sg, PCI_DMA_BIDIRECTIONAL ); + + return ret; +} + +void mthca_unmap_eq_icm(struct mthca_dev *dev) +{ + u8 status; + + mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, PAGE_SIZE / 4096, &status); + free_dma_mem_map(dev, &dev->eq_table.sg, PCI_DMA_BIDIRECTIONAL ); +} + +int mthca_init_eq_table(struct mthca_dev *dev) +{ + int err; + u8 status; + u8 intr; + int i; + + HCA_ENTER(HCA_DBG_INIT); + err = mthca_alloc_init(&dev->eq_table.alloc, + dev->limits.num_eqs, + dev->limits.num_eqs - 1, + dev->limits.reserved_eqs); + if (err) + return err; + + err = mthca_map_eq_regs(dev); + if (err) + goto err_out_free; + +#ifdef MSI_SUPPORT + if (dev->mthca_flags & MTHCA_FLAG_MSI || + dev->mthca_flags & MTHCA_FLAG_MSI_X) { + dev->eq_table.clr_mask = 0; + } else +#endif + { + dev->eq_table.clr_mask = + _byteswap_ulong(1 << (dev->eq_table.inta_pin & 31)); + dev->eq_table.clr_int = dev->clr_base + + (dev->eq_table.inta_pin < 32 ? 4 : 0); + } + + dev->eq_table.arm_mask = 0; + + intr = (dev->mthca_flags & MTHCA_FLAG_MSI) ? + 128 : dev->eq_table.inta_pin; + + err = mthca_create_eq(dev, dev->limits.num_cqs + MTHCA_NUM_SPARE_EQE, + (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 128 : intr, + &dev->eq_table.eq[MTHCA_EQ_COMP]); + if (err) + goto err_out_unmap; + + err = mthca_create_eq(dev, MTHCA_NUM_ASYNC_EQE + MTHCA_NUM_SPARE_EQE, + (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 129 : intr, + &dev->eq_table.eq[MTHCA_EQ_ASYNC]); + if (err) + goto err_out_comp; + + err = mthca_create_eq(dev, MTHCA_NUM_CMD_EQE + MTHCA_NUM_SPARE_EQE, + (dev->mthca_flags & MTHCA_FLAG_MSI_X) ? 130 : intr, + &dev->eq_table.eq[MTHCA_EQ_CMD]); + if (err) + goto err_out_async; + +#ifdef MSI_SUPPORT + if (dev->mthca_flags & MTHCA_FLAG_MSI_X) { + static const char *eq_name[] = { + [MTHCA_EQ_COMP] = DRV_NAME " (comp)", + [MTHCA_EQ_ASYNC] = DRV_NAME " (async)", + [MTHCA_EQ_CMD] = DRV_NAME " (cmd)" + }; + + for (i = 0; i < MTHCA_NUM_EQ; ++i) { + err = request_irq(dev->eq_table.eq[i].msi_x_vector, + mthca_is_memfree(dev) ? + mthca_arbel_msi_x_interrupt : + mthca_tavor_msi_x_interrupt, + 0, eq_name[i], dev->eq_table.eq + i); + if (err) + goto err_out_cmd; + dev->eq_table.eq[i].have_irq = 1; + /* init DPC stuff something like that */ + spin_lock_init( &dev->eq_table.eq[i].lock ); + dev->dpc_lock = 0; + KeInitializeDpc( + &dev->eq_table.eq[i].dpc, + mthca_is_memfree(dev) ? + mthca_arbel_msi_x_dpc : + mthca_tavor_msi_x_dpc, + dev->eq_table.eq + i); + } + } else +#endif + { + spin_lock_init( &dev->ext->isr_lock ); + err = request_irq( + &dev->ext->interruptInfo, + &dev->ext->isr_lock.lock , + mthca_is_memfree(dev) ? mthca_arbel_interrupt : mthca_tavor_interrupt, + dev, + &dev->ext->int_obj + ); + if (err) + goto err_out_cmd; + dev->eq_table.have_irq = 1; + + /* init DPC stuff */ + for (i = 0; i < MTHCA_NUM_EQ; ++i) { + spin_lock_init( &dev->eq_table.eq[i].lock ); + KeInitializeDpc( + &dev->eq_table.eq[i].dpc, + mthca_is_memfree(dev) ? + mthca_arbel_dpc : + mthca_tavor_dpc, + dev->eq_table.eq + i); + dev->eq_table.eq[i].eq_num = i; + } + } + + err = mthca_MAP_EQ(dev, async_mask(dev), + 0, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status); + if (err) + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_INIT,("MAP_EQ for async EQ %d failed (%d)\n", + dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, err)); + if (status) + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_INIT, ("MAP_EQ for async EQ %d returned status 0x%02x\n", + dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, status)); + err = mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK, + 0, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status); + if (err) + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_INIT, ("MAP_EQ for cmd EQ %d failed (%d)\n", + dev->eq_table.eq[MTHCA_EQ_CMD].eqn, err)); + if (status) + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_INIT,("MAP_EQ for cmd EQ %d returned status 0x%02x\n", + dev->eq_table.eq[MTHCA_EQ_CMD].eqn, status)); + + for (i = 0; i < MTHCA_NUM_EQ; ++i) + if (mthca_is_memfree(dev)) + arbel_eq_req_not(dev, dev->eq_table.eq[i].eqn_mask); + else + tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn); + + return 0; + +err_out_cmd: + mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_CMD]); + +err_out_async: + mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_ASYNC]); + +err_out_comp: + mthca_free_eq(dev, &dev->eq_table.eq[MTHCA_EQ_COMP]); + +err_out_unmap: + mthca_unmap_eq_regs(dev); + +err_out_free: + mthca_alloc_cleanup(&dev->eq_table.alloc); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_INIT ,("mthca_init_eq failed %d\n",err)); + return err; +} + +void mthca_cleanup_eq_table(struct mthca_dev *dev) +{ + u8 status; + int i; + + mthca_free_irqs(dev); + + mthca_MAP_EQ(dev, async_mask(dev), + 1, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status); + mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK, + 1, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status); + + for (i = 0; i < MTHCA_NUM_EQ; ++i) + mthca_free_eq(dev, &dev->eq_table.eq[i]); + + mthca_unmap_eq_regs(dev); + + mthca_alloc_cleanup(&dev->eq_table.alloc); +} + + + + diff --git a/branches/IBFD/hw/mthca/kernel/mthca_log.c b/branches/IBFD/hw/mthca/kernel/mthca_log.c new file mode 100644 index 00000000..52024600 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca_log.c @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2005 Mellanox Technologies LTD. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +// Author: Yossi Leybovich + +#include "hca_driver.h" + + +VOID +WriteEventLogEntry( + PVOID pi_pIoObject, + ULONG pi_ErrorCode, + ULONG pi_UniqueErrorCode, + ULONG pi_FinalStatus, + ULONG pi_nDataItems, + ... + ) +/*++ + +Routine Description: + Writes an event log entry to the event log. + +Arguments: + + pi_pIoObject......... The IO object ( driver object or device object ). + pi_ErrorCode......... The error code. + pi_UniqueErrorCode... A specific error code. + pi_FinalStatus....... The final status. + pi_nDataItems........ Number of data items. + . + . data items values + . + +Return Value: + + None . + +--*/ +{ /* WriteEventLogEntry */ + + /* Variable argument list */ + va_list l_Argptr; + /* Pointer to an error log entry */ + PIO_ERROR_LOG_PACKET l_pErrorLogEntry; + + /* Init the variable argument list */ + va_start(l_Argptr, pi_nDataItems); + + /* Allocate an error log entry */ + l_pErrorLogEntry = + (PIO_ERROR_LOG_PACKET)IoAllocateErrorLogEntry( + pi_pIoObject, + (UCHAR)(sizeof(IO_ERROR_LOG_PACKET)+pi_nDataItems*sizeof(ULONG)) + ); + /* Check allocation */ + if ( l_pErrorLogEntry != NULL) + { /* OK */ + + /* Data item index */ + USHORT l_nDataItem ; + + /* Set the error log entry header */ + l_pErrorLogEntry->ErrorCode = pi_ErrorCode; + l_pErrorLogEntry->DumpDataSize = (USHORT) (pi_nDataItems*sizeof(ULONG)); + l_pErrorLogEntry->SequenceNumber = 0; + l_pErrorLogEntry->MajorFunctionCode = 0; + l_pErrorLogEntry->IoControlCode = 0; + l_pErrorLogEntry->RetryCount = 0; + l_pErrorLogEntry->UniqueErrorValue = pi_UniqueErrorCode; + l_pErrorLogEntry->FinalStatus = pi_FinalStatus; + + /* Insert the data items */ + for (l_nDataItem = 0; l_nDataItem < pi_nDataItems; l_nDataItem++) + { /* Inset a data item */ + + /* Current data item */ + int l_CurDataItem ; + + /* Get next data item */ + l_CurDataItem = va_arg( l_Argptr, int); + + /* Put it into the data array */ + l_pErrorLogEntry->DumpData[l_nDataItem] = l_CurDataItem ; + + } /* Inset a data item */ + + /* Write the packet */ + IoWriteErrorLogEntry(l_pErrorLogEntry); + + } /* OK */ + + /* Term the variable argument list */ + va_end(l_Argptr); + +} /* WriteEventLogEntry */ + +/*------------------------------------------------------------------------------------------------------*/ + +VOID +WriteEventLogEntryStr( + PVOID pi_pIoObject, + ULONG pi_ErrorCode, + ULONG pi_UniqueErrorCode, + ULONG pi_FinalStatus, + PWCHAR pi_InsertionStr, + ULONG pi_nDataItems, + ... + ) +/*++ + +Routine Description: + Writes an event log entry to the event log. + +Arguments: + + pi_pIoObject......... The IO object ( driver object or device object ). + pi_ErrorCode......... The error code. + pi_UniqueErrorCode... A specific error code. + pi_FinalStatus....... The final status. + pi_nDataItems........ Number of data items. + . + . data items values + . + +Return Value: + + None . + +--*/ +{ /* WriteEventLogEntryStr */ + + /* Variable argument list */ + va_list l_Argptr; + /* Pointer to an error log entry */ + PIO_ERROR_LOG_PACKET l_pErrorLogEntry; + /* sizeof insertion string */ + int l_Size = (int)((pi_InsertionStr) ? ((wcslen(pi_InsertionStr) + 1) * sizeof( WCHAR )) : 0); + int l_PktSize =sizeof(IO_ERROR_LOG_PACKET)+pi_nDataItems*sizeof(ULONG); + int l_TotalSize =l_PktSize +l_Size; + + /* Init the variable argument list */ + va_start(l_Argptr, pi_nDataItems); + + /* Allocate an error log entry */ + if (l_TotalSize >= ERROR_LOG_MAXIMUM_SIZE - 2) + l_TotalSize = ERROR_LOG_MAXIMUM_SIZE - 2; + l_pErrorLogEntry = (PIO_ERROR_LOG_PACKET)IoAllocateErrorLogEntry( + pi_pIoObject, (UCHAR)l_TotalSize ); + + /* Check allocation */ + if ( l_pErrorLogEntry != NULL) + { /* OK */ + + /* Data item index */ + USHORT l_nDataItem ; + + /* Set the error log entry header */ + l_pErrorLogEntry->ErrorCode = pi_ErrorCode; + l_pErrorLogEntry->DumpDataSize = (USHORT) (pi_nDataItems*sizeof(ULONG)); + l_pErrorLogEntry->SequenceNumber = 0; + l_pErrorLogEntry->MajorFunctionCode = 0; + l_pErrorLogEntry->IoControlCode = 0; + l_pErrorLogEntry->RetryCount = 0; + l_pErrorLogEntry->UniqueErrorValue = pi_UniqueErrorCode; + l_pErrorLogEntry->FinalStatus = pi_FinalStatus; + + /* Insert the data items */ + for (l_nDataItem = 0; l_nDataItem < pi_nDataItems; l_nDataItem++) + { /* Inset a data item */ + + /* Current data item */ + int l_CurDataItem ; + + /* Get next data item */ + l_CurDataItem = va_arg( l_Argptr, int); + + /* Put it into the data array */ + l_pErrorLogEntry->DumpData[l_nDataItem] = l_CurDataItem ; + + } /* Inset a data item */ + + /* add insertion string */ + if (pi_InsertionStr) { + char *ptr; + int sz = min( l_TotalSize - l_PktSize, l_Size ); + l_pErrorLogEntry->NumberOfStrings = 1; + l_pErrorLogEntry->StringOffset = sizeof(IO_ERROR_LOG_PACKET) + l_pErrorLogEntry->DumpDataSize; + ptr = (char*)l_pErrorLogEntry + l_pErrorLogEntry->StringOffset; + memcpy( ptr, pi_InsertionStr, sz ); + *(WCHAR*)&ptr[sz - 2] = (WCHAR)0; + } + + /* Write the packet */ + IoWriteErrorLogEntry(l_pErrorLogEntry); + + } /* OK */ + + /* Term the variable argument list */ + va_end(l_Argptr); + +} /* WriteEventLogEntry */ + + + + + + diff --git a/branches/IBFD/hw/mthca/kernel/mthca_log.mc b/branches/IBFD/hw/mthca/kernel/mthca_log.mc new file mode 100644 index 00000000..08cbddae --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca_log.mc @@ -0,0 +1,56 @@ +;/*++ +;============================================================================= +;Copyright (c) 2001 Mellanox Technologies +; +;Module Name: +; +; mthcalog.mc +; +;Abstract: +; +; MTHCA Driver event log messages +; +;Authors: +; +; Yossi Leybovich +; +;Environment: +; +; Kernel Mode . +; +;============================================================================= +;--*/ +; +MessageIdTypedef = NTSTATUS + +SeverityNames = ( + Success = 0x0:STATUS_SEVERITY_SUCCESS + Informational = 0x1:STATUS_SEVERITY_INFORMATIONAL + Warning = 0x2:STATUS_SEVERITY_WARNING + Error = 0x3:STATUS_SEVERITY_ERROR + ) + +FacilityNames = ( + System = 0x0 + RpcRuntime = 0x2:FACILITY_RPC_RUNTIME + RpcStubs = 0x3:FACILITY_RPC_STUBS + Io = 0x4:FACILITY_IO_ERROR_CODE + MTHCA = 0x7:FACILITY_MTHCA_ERROR_CODE + ) + + +MessageId=0x0001 Facility=MTHCA Severity=Informational SymbolicName=EVENT_MTHCA_ANY_INFO +Language=English +%2 +. + +MessageId=0x0002 Facility=MTHCA Severity=Warning SymbolicName=EVENT_MTHCA_ANY_WARN +Language=English +%2 +. + +MessageId=0x0003 Facility=MTHCA Severity=Error SymbolicName=EVENT_MTHCA_ANY_ERROR +Language=English +%2 +. + diff --git a/branches/IBFD/hw/mthca/kernel/mthca_log.rc b/branches/IBFD/hw/mthca/kernel/mthca_log.rc new file mode 100644 index 00000000..116522b7 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca_log.rc @@ -0,0 +1,2 @@ +LANGUAGE 0x9,0x1 +1 11 MSG00001.bin diff --git a/branches/IBFD/hw/mthca/kernel/mthca_mad.c b/branches/IBFD/hw/mthca/kernel/mthca_mad.c new file mode 100644 index 00000000..07dee658 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca_mad.c @@ -0,0 +1,293 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include +#include +#include + +#include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_mad.tmh" +#endif +#include "mthca_cmd.h" + +enum { + MTHCA_VENDOR_CLASS1 = 0x9, + MTHCA_VENDOR_CLASS2 = 0xa +}; + +struct mthca_trap_mad { + struct scatterlist sg; +}; + +static void update_sm_ah(struct mthca_dev *dev, + u8 port_num, u16 lid, u8 sl) +{ + struct ib_ah *new_ah; + struct ib_ah_attr ah_attr; + SPIN_LOCK_PREP(lh); + + if (!dev->send_agent[port_num - 1][0]) + return; + + RtlZeroMemory(&ah_attr, sizeof ah_attr); + ah_attr.dlid = lid; + ah_attr.sl = sl; + ah_attr.port_num = port_num; + + new_ah = ibv_create_ah(dev->send_agent[port_num - 1][0]->qp->pd, + &ah_attr, NULL, NULL); + if (IS_ERR(new_ah)) + return; + + spin_lock_irqsave(&dev->sm_lock, &lh); + if (dev->sm_ah[port_num - 1]) { + ibv_destroy_ah(dev->sm_ah[port_num - 1]); + } + dev->sm_ah[port_num - 1] = new_ah; + spin_unlock_irqrestore(&lh); +} + +/* + * Snoop SM MADs for port info and P_Key table sets, so we can + * synthesize LID change and P_Key change events. + */ +static void smp_snoop(struct ib_device *ibdev, + u8 port_num, + struct ib_mad *mad) +{ + struct ib_event event; + + if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && + mad->mad_hdr.method == IB_MGMT_METHOD_SET) { + if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) { + update_sm_ah(to_mdev(ibdev), port_num, + cl_ntoh16(*(__be16 *) (mad->data + 58)), + (*(u8 *) (mad->data + 76)) & 0xf); + + event.device = ibdev; + event.event = IB_EVENT_LID_CHANGE; + event.element.port_num = port_num; + ib_dispatch_event(&event); + } + + if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) { + event.device = ibdev; + event.event = IB_EVENT_PKEY_CHANGE; + event.element.port_num = port_num; + ib_dispatch_event(&event); + } + } +} + +static void forward_trap(struct mthca_dev *dev, + u8 port_num, + struct ib_mad *mad) +{ + int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED; + struct mthca_trap_mad *tmad; + struct ib_sge gather_list; + struct _ib_send_wr wr; + struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn]; + int ret; + SPIN_LOCK_PREP(lh); + + /* fill the template */ + wr.ds_array = (ib_local_ds_t* __ptr64)(void*)&gather_list; + wr.num_ds = 1; + wr.wr_type = WR_SEND; + wr.send_opt = IB_SEND_OPT_SIGNALED; + wr.dgrm.ud.remote_qp = cl_hton32(qpn); + wr.dgrm.ud.remote_qkey = qpn ? IB_QP1_QKEY : 0; + + if (agent) { + tmad = kmalloc(sizeof *tmad, GFP_KERNEL); + if (!tmad) + return; + + alloc_dma_zmem(dev, sizeof *mad, &tmad->sg); + if (!tmad->sg.page) { + kfree(tmad); + return; + } + + memcpy(tmad->sg.page, mad, sizeof *mad); + + wr.dgrm.ud.rsvd = (void* __ptr64)&((struct ib_mad *)tmad->sg.page)->mad_hdr; + wr.wr_id = (u64)(ULONG_PTR)tmad; + gather_list.addr = tmad->sg.dma_address; + gather_list.length = tmad->sg.length; + gather_list.lkey = to_mpd(agent->qp->pd)->ntmr.ibmr.lkey; + + /* + * We rely here on the fact that MLX QPs don't use the + * address handle after the send is posted (this is + * wrong following the IB spec strictly, but we know + * it's OK for our devices). + */ + spin_lock_irqsave(&dev->sm_lock, &lh); + wr.dgrm.ud.h_av = (ib_av_handle_t)dev->sm_ah[port_num - 1]; + if (wr.dgrm.ud.h_av) { + HCA_PRINT( TRACE_LEVEL_ERROR ,HCA_DBG_MAD ,(" ib_post_send_mad not ported \n" )); + ret = -EINVAL; + } + else + ret = -EINVAL; + spin_unlock_irqrestore(&lh); + + if (ret) { + free_dma_mem_map(dev, &tmad->sg, PCI_DMA_BIDIRECTIONAL ); + kfree(tmad); + } + } +} + +int mthca_process_mad(struct ib_device *ibdev, + int mad_flags, + u8 port_num, + struct _ib_wc *in_wc, + struct _ib_grh *in_grh, + struct ib_mad *in_mad, + struct ib_mad *out_mad) +{ + int err; + u8 status; + u16 slid = in_wc ? in_wc->recv.ud.remote_lid : cl_ntoh16(IB_LID_PERMISSIVE); + + HCA_PRINT( TRACE_LEVEL_VERBOSE ,HCA_DBG_MAD ,("in: Class %02x, Method %02x, AttrId %x, AttrMod %x, ClSpec %x, Tid %I64x\n", + (u32)in_mad->mad_hdr.mgmt_class, (u32)in_mad->mad_hdr.method, + (u32)in_mad->mad_hdr.attr_id, in_mad->mad_hdr.attr_mod, + (u32)in_mad->mad_hdr.class_specific, in_mad->mad_hdr.tid )); + + /* Forward locally generated traps to the SM */ + if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && + slid == 0) { + forward_trap(to_mdev(ibdev), port_num, in_mad); + HCA_PRINT( TRACE_LEVEL_VERBOSE ,HCA_DBG_MAD ,("Not sent, but locally forwarded\n")); + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + } + + /* + * Only handle SM gets, sets and trap represses for SM class + * + * Only handle PMA and Mellanox vendor-specific class gets and + * sets for other classes. + */ + if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + + if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && + in_mad->mad_hdr.method != IB_MGMT_METHOD_SET && + in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS) { + HCA_PRINT( TRACE_LEVEL_VERBOSE,HCA_DBG_MAD,(" Skip some methods. Nothing done !\n")); + return IB_MAD_RESULT_SUCCESS; + } + + /* + * Don't process SMInfo queries or vendor-specific + * MADs -- the SMA can't handle them. + */ + if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO || + ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) == + IB_SMP_ATTR_VENDOR_MASK)) { + HCA_PRINT( TRACE_LEVEL_VERBOSE ,HCA_DBG_MAD ,("Skip SMInfo queries or vendor-specific MADs. Nothing done !\n")); + return IB_MAD_RESULT_SUCCESS; + } + } + else { + if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || + in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS1 || + in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS2) { + + if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && + in_mad->mad_hdr.method != IB_MGMT_METHOD_SET) { + HCA_PRINT( TRACE_LEVEL_VERBOSE ,HCA_DBG_MAD ,("Skip some management methods. Nothing done !\n")); + return IB_MAD_RESULT_SUCCESS; + } + } + else { + HCA_PRINT( TRACE_LEVEL_VERBOSE ,HCA_DBG_MAD ,("Skip IB_MGMT_CLASS_PERF_MGMT et al. Nothing done !\n")); + return IB_MAD_RESULT_SUCCESS; + } + } + + // send MAD + err = mthca_MAD_IFC(to_mdev(ibdev), + mad_flags & IB_MAD_IGNORE_MKEY, + mad_flags & IB_MAD_IGNORE_BKEY, + port_num, in_wc, in_grh, in_mad, out_mad, + &status); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MAD ,("MAD_IFC failed\n")); + return IB_MAD_RESULT_FAILURE; + } + if (status == MTHCA_CMD_STAT_BAD_PKT) + return IB_MAD_RESULT_SUCCESS; + if (status) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MAD ,("MAD_IFC returned status %02x\n", status)); + return IB_MAD_RESULT_FAILURE; + } + + if (!out_mad->mad_hdr.status) + smp_snoop(ibdev, port_num, in_mad); + + HCA_PRINT( TRACE_LEVEL_VERBOSE ,HCA_DBG_MAD,("out: Class %02x, Method %02x, AttrId %x, AttrMod %x, ClSpec %x, Tid %I64x, Status %x\n", + (u32)out_mad->mad_hdr.mgmt_class, (u32)out_mad->mad_hdr.method, + (u32)out_mad->mad_hdr.attr_id, out_mad->mad_hdr.attr_mod, + (u32)out_mad->mad_hdr.class_specific, out_mad->mad_hdr.tid, + (u32)out_mad->mad_hdr.status )); + + if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) { + /* no response for trap repress */ + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + } + + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; +} + +static void send_handler(struct ib_mad_agent *agent, + struct ib_mad_send_wc *mad_send_wc) +{ + struct mthca_trap_mad *tmad = + (void *) (ULONG_PTR) mad_send_wc->wr_id; + + free_dma_mem_map(agent->device->mdev, &tmad->sg, PCI_DMA_BIDIRECTIONAL ); + kfree(tmad); +} diff --git a/branches/IBFD/hw/mthca/kernel/mthca_main.c b/branches/IBFD/hw/mthca/kernel/mthca_main.c new file mode 100644 index 00000000..3137a6b2 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca_main.c @@ -0,0 +1,1108 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_main.tmh" +#endif +#include "mthca_config_reg.h" +#include "mthca_cmd.h" +#include "mthca_profile.h" +#include "mthca_memfree.h" + +static const char mthca_version[] = + DRV_NAME ": HCA Driver v" + DRV_VERSION " (" DRV_RELDATE ")"; + +static struct mthca_profile default_profile = { + 1 << 16, // num_qp + 4, // rdb_per_qp + 0, // num_srq + 1 << 16, // num_cq + 1 << 13, // num_mcg + 1 << 17, // num_mpt + 1 << 20, // num_mtt + 1 << 15, // num_udav (Tavor only) + 0, // num_uar + 1 << 18, // uarc_size (Arbel only) + 1 << 18, // fmr_reserved_mtts (Tavor only) +}; + +/* Types of supported HCA */ +enum __hca_type { + TAVOR, /* MT23108 */ + ARBEL_COMPAT, /* MT25208 in Tavor compat mode */ + ARBEL_NATIVE, /* MT25218 with extended features */ + SINAI, /* MT25204 */ + LIVEFISH /* a burning device */ +}; + +#define MTHCA_FW_VER(major, minor, subminor) \ + (((u64) (major) << 32) | ((u64) (minor) << 16) | (u64) (subminor)) + +static struct { + u64 max_unsupported_fw; + u64 min_supported_fw; + int is_memfree; + int is_pcie; +} mthca_hca_table[] = { + { MTHCA_FW_VER(3, 3, 2), MTHCA_FW_VER(3, 4, 0), 0, 0 }, /* TAVOR */ + { MTHCA_FW_VER(4, 7, 0), MTHCA_FW_VER(4, 7, 400), 0, 1 }, /* ARBEL_COMPAT */ + { MTHCA_FW_VER(5, 1, 0), MTHCA_FW_VER(5, 1, 400), 1, 1 }, /* ARBEL_NATIVE */ + { MTHCA_FW_VER(1, 0, 800), MTHCA_FW_VER(1, 1, 0), 1, 1 }, /* SINAI */ + { MTHCA_FW_VER(0, 0, 0), MTHCA_FW_VER(0, 0, 0), 0, 0 } /* LIVEFISH */ +}; + + +#define HCA(v, d, t) \ + { PCI_VENDOR_ID_##v, PCI_DEVICE_ID_MELLANOX_##d, t } + +static struct pci_device_id { + unsigned vendor; + unsigned device; + enum __hca_type driver_data; +} mthca_pci_table[] = { + HCA(MELLANOX, TAVOR, TAVOR), + HCA(MELLANOX, ARBEL_COMPAT, ARBEL_COMPAT), + HCA(MELLANOX, ARBEL, ARBEL_NATIVE), + HCA(MELLANOX, SINAI_OLD, SINAI), + HCA(MELLANOX, SINAI, SINAI), + HCA(TOPSPIN, TAVOR, TAVOR), + HCA(TOPSPIN, ARBEL_COMPAT, TAVOR), + HCA(TOPSPIN, ARBEL, ARBEL_NATIVE), + HCA(TOPSPIN, SINAI_OLD, SINAI), + HCA(TOPSPIN, SINAI, SINAI), + // live fishes + HCA(MELLANOX, TAVOR_BD, LIVEFISH), + HCA(MELLANOX, ARBEL_BD, LIVEFISH), + HCA(MELLANOX, SINAI_OLD_BD, LIVEFISH), + HCA(MELLANOX, SINAI_BD, LIVEFISH), + HCA(TOPSPIN, TAVOR_BD, LIVEFISH), + HCA(TOPSPIN, ARBEL_BD, LIVEFISH), + HCA(TOPSPIN, SINAI_OLD_BD, LIVEFISH), + HCA(TOPSPIN, SINAI_BD, LIVEFISH), +}; +#define MTHCA_PCI_TABLE_SIZE (sizeof(mthca_pci_table)/sizeof(struct pci_device_id)) + +// wrapper to driver's hca_tune_pci +static NTSTATUS mthca_tune_pci(struct mthca_dev *mdev) +{ + PDEVICE_OBJECT pdo = mdev->ext->cl_ext.p_self_do; + return hca_tune_pci(pdo, &mdev->uplink_info); +} + +int mthca_get_dev_info(struct mthca_dev *mdev, __be64 *node_guid, u32 *hw_id) +{ + struct ib_device_attr props; + struct ib_device *ib_dev = &mdev->ib_dev; + int err = (ib_dev->query_device )(ib_dev, &props ); + + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("can't get guid - mthca_query_port() failed (%08X)\n", err )); + return err; + } + + //TODO: do we need to convert GUID to LE by cl_ntoh64(x) ? + *node_guid = ib_dev->node_guid; + *hw_id = props.hw_ver; + return 0; +} + +static struct pci_device_id * mthca_find_pci_dev(unsigned ven_id, unsigned dev_id) +{ + struct pci_device_id *p_id = mthca_pci_table; + int i; + + // find p_id (appropriate line in mthca_pci_table) + for (i = 0; i < MTHCA_PCI_TABLE_SIZE; ++i, ++p_id) { + if (p_id->device == dev_id && p_id->vendor == ven_id) + return p_id; + } + return NULL; +} + + +static int mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim) +{ + int err; + u8 status; + + err = mthca_QUERY_DEV_LIM(mdev, dev_lim, &status); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("QUERY_DEV_LIM command failed, aborting.\n")); + return err; + } + if (status) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("QUERY_DEV_LIM returned status 0x%02x, " + "aborting.\n", status)); + return -EINVAL; + } + if (dev_lim->min_page_sz > PAGE_SIZE) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("HCA minimum page size of %d bigger than " + "kernel PAGE_SIZE of %ld, aborting.\n", + dev_lim->min_page_sz, PAGE_SIZE)); + return -ENODEV; + } + if (dev_lim->num_ports > MTHCA_MAX_PORTS) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("HCA has %d ports, but we only support %d, " + "aborting.\n", + dev_lim->num_ports, MTHCA_MAX_PORTS)); + return -ENODEV; + } + + if (dev_lim->uar_size > (int)pci_resource_len(mdev, HCA_BAR_TYPE_UAR)) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW , ("HCA reported UAR size of 0x%x bigger than " + "Bar%d size of 0x%lx, aborting.\n", + dev_lim->uar_size, HCA_BAR_TYPE_UAR, + (unsigned long)pci_resource_len(mdev, HCA_BAR_TYPE_UAR))); + return -ENODEV; + } + + + mdev->limits.num_ports = dev_lim->num_ports; + mdev->limits.vl_cap = dev_lim->max_vl; + mdev->limits.mtu_cap = dev_lim->max_mtu; + mdev->limits.gid_table_len = dev_lim->max_gids; + mdev->limits.pkey_table_len = dev_lim->max_pkeys; + mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay; + mdev->limits.max_sg = dev_lim->max_sg; + mdev->limits.max_wqes = dev_lim->max_qp_sz; + mdev->limits.max_qp_init_rdma = dev_lim->max_requester_per_qp; + mdev->limits.reserved_qps = dev_lim->reserved_qps; + mdev->limits.max_srq_wqes = dev_lim->max_srq_sz; + mdev->limits.reserved_srqs = dev_lim->reserved_srqs; + mdev->limits.reserved_eecs = dev_lim->reserved_eecs; + mdev->limits.max_desc_sz = dev_lim->max_desc_sz; + mdev->limits.max_srq_sge = mthca_max_srq_sge(mdev); + /* + * Subtract 1 from the limit because we need to allocate a + * spare CQE so the HCA HW can tell the difference between an + * empty CQ and a full CQ. + */ + mdev->limits.max_cqes = dev_lim->max_cq_sz - 1; + mdev->limits.reserved_cqs = dev_lim->reserved_cqs; + mdev->limits.reserved_eqs = dev_lim->reserved_eqs; + mdev->limits.reserved_mtts = dev_lim->reserved_mtts; + mdev->limits.reserved_mrws = dev_lim->reserved_mrws; + mdev->limits.reserved_uars = dev_lim->reserved_uars; + mdev->limits.reserved_pds = dev_lim->reserved_pds; + mdev->limits.port_width_cap = (u8)dev_lim->max_port_width; + mdev->limits.page_size_cap = !(u32)(dev_lim->min_page_sz - 1); + mdev->limits.flags = dev_lim->flags; + + /* IB_DEVICE_RESIZE_MAX_WR not supported by driver. + May be doable since hardware supports it for SRQ. + + IB_DEVICE_N_NOTIFY_CQ is supported by hardware but not by driver. + + IB_DEVICE_SRQ_RESIZE is supported by hardware but SRQ is not + supported by driver. */ + mdev->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | + IB_DEVICE_PORT_ACTIVE_EVENT | + IB_DEVICE_SYS_IMAGE_GUID | + IB_DEVICE_RC_RNR_NAK_GEN; + + if (dev_lim->flags & DEV_LIM_FLAG_BAD_PKEY_CNTR) + mdev->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; + + if (dev_lim->flags & DEV_LIM_FLAG_BAD_QKEY_CNTR) + mdev->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; + + if (dev_lim->flags & DEV_LIM_FLAG_RAW_MULTI) + mdev->device_cap_flags |= IB_DEVICE_RAW_MULTI; + + if (dev_lim->flags & DEV_LIM_FLAG_AUTO_PATH_MIG) + mdev->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; + + if (dev_lim->flags & DEV_LIM_FLAG_UD_AV_PORT_ENFORCE) + mdev->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE; + + if (dev_lim->flags & DEV_LIM_FLAG_SRQ) + mdev->mthca_flags |= MTHCA_FLAG_SRQ; + + return 0; +} + +static int mthca_init_tavor(struct mthca_dev *mdev) +{ + u8 status; + int err; + struct mthca_dev_lim dev_lim; + struct mthca_profile profile; + struct mthca_init_hca_param init_hca; + + err = mthca_SYS_EN(mdev, &status); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("SYS_EN command failed, aborting.\n")); + return err; + } + if (status) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("SYS_EN returned status 0x%02x, " + "aborting.\n", status)); + return -EINVAL; + } + + err = mthca_QUERY_FW(mdev, &status); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("QUERY_FW command failed, aborting.\n")); + goto err_disable; + } + if (status) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("QUERY_FW returned status 0x%02x, " + "aborting.\n", status)); + err = -EINVAL; + goto err_disable; + } + err = mthca_QUERY_DDR(mdev, &status); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("QUERY_DDR command failed, aborting.\n")); + goto err_disable; + } + if (status) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,( "QUERY_DDR returned status 0x%02x, " + "aborting.\n", status)); + err = -EINVAL; + goto err_disable; + } + + err = mthca_dev_lim(mdev, &dev_lim); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,( "QUERY_DEV_LIM command failed, aborting.\n")); + goto err_disable; + } + + profile = default_profile; + profile.num_uar = dev_lim.uar_size / PAGE_SIZE; + profile.uarc_size = 0; + + /* correct default profile */ + if ( g_profile_qp_num != 0 ) + profile.num_qp = g_profile_qp_num; + + if ( g_profile_rd_out != 0xffffffff ) + profile.rdb_per_qp = g_profile_rd_out; + + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) + profile.num_srq = dev_lim.max_srqs; + + err = (int)mthca_make_profile(mdev, &profile, &dev_lim, &init_hca); + if (err < 0) + goto err_disable; + + err = (int)mthca_INIT_HCA(mdev, &init_hca, &status); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("INIT_HCA command failed, aborting.\n")); + goto err_disable; + } + if (status) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("INIT_HCA returned status 0x%02x, " + "aborting.\n", status)); + err = -EINVAL; + goto err_disable; + } + + return 0; + +err_disable: + mthca_SYS_DIS(mdev, &status); + + return err; +} + +static int mthca_load_fw(struct mthca_dev *mdev) +{ + u8 status; + int err; + + /* FIXME: use HCA-attached memory for FW if present */ + + mdev->fw.arbel.fw_icm = + mthca_alloc_icm(mdev, mdev->fw.arbel.fw_pages, + GFP_HIGHUSER | __GFP_NOWARN); + if (!mdev->fw.arbel.fw_icm) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Couldn't allocate FW area, aborting.\n")); + return -ENOMEM; + } + + err = mthca_MAP_FA(mdev, mdev->fw.arbel.fw_icm, &status); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("MAP_FA command failed, aborting.\n")); + goto err_free; + } + if (status) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("MAP_FA returned status 0x%02x, aborting.\n", status)); + err = -EINVAL; + goto err_free; + } + err = mthca_RUN_FW(mdev, &status); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("RUN_FW command failed, aborting.\n")); + goto err_unmap_fa; + } + if (status) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("RUN_FW returned status 0x%02x, aborting.\n", status)); + err = -EINVAL; + goto err_unmap_fa; + } + + return 0; + +err_unmap_fa: + mthca_UNMAP_FA(mdev, &status); + +err_free: + mthca_free_icm(mdev, mdev->fw.arbel.fw_icm); + return err; +} + +static int mthca_init_icm(struct mthca_dev *mdev, + struct mthca_dev_lim *dev_lim, + struct mthca_init_hca_param *init_hca, + u64 icm_size) +{ + u64 aux_pages; + u8 status; + int err; + + err = mthca_SET_ICM_SIZE(mdev, icm_size, &aux_pages, &status); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("SET_ICM_SIZE command failed, aborting.\n")); + return err; + } + if (status) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("SET_ICM_SIZE returned status 0x%02x, " + "aborting.\n", status)); + return -EINVAL; + } + + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_LOW , ("%I64d KB of HCA context requires %I64d KB aux memory.\n", + (u64) icm_size >> 10, + (u64) aux_pages << 2)); + + mdev->fw.arbel.aux_icm = mthca_alloc_icm(mdev, (int)aux_pages, + GFP_HIGHUSER | __GFP_NOWARN); + if (!mdev->fw.arbel.aux_icm) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Couldn't allocate aux memory, aborting.\n")); + return -ENOMEM; + } + + err = mthca_MAP_ICM_AUX(mdev, mdev->fw.arbel.aux_icm, &status); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("MAP_ICM_AUX command failed, aborting.\n")); + goto err_free_aux; + } + if (status) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("MAP_ICM_AUX returned status 0x%02x, aborting.\n", status)); + err = -EINVAL; + goto err_free_aux; + } + + err = mthca_map_eq_icm(mdev, init_hca->eqc_base); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Failed to map EQ context memory, aborting.\n")); + goto err_unmap_aux; + } + + mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base, + MTHCA_MTT_SEG_SIZE, + mdev->limits.num_mtt_segs, + mdev->limits.reserved_mtts, 1); + if (!mdev->mr_table.mtt_table) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Failed to map MTT context memory, aborting.\n")); + err = -ENOMEM; + goto err_unmap_eq; + } + + mdev->mr_table.mpt_table = mthca_alloc_icm_table(mdev, init_hca->mpt_base, + dev_lim->mpt_entry_sz, + mdev->limits.num_mpts, + mdev->limits.reserved_mrws, 1); + if (!mdev->mr_table.mpt_table) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Failed to map MPT context memory, aborting.\n")); + err = -ENOMEM; + goto err_unmap_mtt; + } + + mdev->qp_table.qp_table = mthca_alloc_icm_table(mdev, init_hca->qpc_base, + dev_lim->qpc_entry_sz, + mdev->limits.num_qps, + mdev->limits.reserved_qps, 0); + if (!mdev->qp_table.qp_table) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Failed to map QP context memory, aborting.\n")); + err = -ENOMEM; + goto err_unmap_mpt; + } + + mdev->qp_table.eqp_table = mthca_alloc_icm_table(mdev, init_hca->eqpc_base, + dev_lim->eqpc_entry_sz, + mdev->limits.num_qps, + mdev->limits.reserved_qps, 0); + if (!mdev->qp_table.eqp_table) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Failed to map EQP context memory, aborting.\n")); + err = -ENOMEM; + goto err_unmap_qp; + } + + mdev->qp_table.rdb_table = mthca_alloc_icm_table(mdev, init_hca->rdb_base, + MTHCA_RDB_ENTRY_SIZE, + mdev->limits.num_qps << + mdev->qp_table.rdb_shift, + 0, 0); + if (!mdev->qp_table.rdb_table) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Failed to map RDB context memory, aborting\n")); + err = -ENOMEM; + goto err_unmap_eqp; + } + + mdev->cq_table.table = mthca_alloc_icm_table(mdev, init_hca->cqc_base, + dev_lim->cqc_entry_sz, + mdev->limits.num_cqs, + mdev->limits.reserved_cqs, 0); + if (!mdev->cq_table.table) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Failed to map CQ context memory, aborting.\n")); + err = -ENOMEM; + goto err_unmap_rdb; + } + + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) { + mdev->srq_table.table = + mthca_alloc_icm_table(mdev, init_hca->srqc_base, + dev_lim->srq_entry_sz, + mdev->limits.num_srqs, + mdev->limits.reserved_srqs, 0); + if (!mdev->srq_table.table) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Failed to map SRQ context memory, " + "aborting.\n")); + err = -ENOMEM; + goto err_unmap_cq; + } + } + + /* + * It's not strictly required, but for simplicity just map the + * whole multicast group table now. The table isn't very big + * and it's a lot easier than trying to track ref counts. + */ + mdev->mcg_table.table = mthca_alloc_icm_table(mdev, init_hca->mc_base, + MTHCA_MGM_ENTRY_SIZE, + mdev->limits.num_mgms + + mdev->limits.num_amgms, + mdev->limits.num_mgms + + mdev->limits.num_amgms, + 0); + if (!mdev->mcg_table.table) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Failed to map MCG context memory, aborting.\n")); + err = -ENOMEM; + goto err_unmap_srq; + } + + return 0; + +err_unmap_srq: + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) + mthca_free_icm_table(mdev, mdev->srq_table.table); + +err_unmap_cq: + mthca_free_icm_table(mdev, mdev->cq_table.table); + +err_unmap_rdb: + mthca_free_icm_table(mdev, mdev->qp_table.rdb_table); + +err_unmap_eqp: + mthca_free_icm_table(mdev, mdev->qp_table.eqp_table); + +err_unmap_qp: + mthca_free_icm_table(mdev, mdev->qp_table.qp_table); + +err_unmap_mpt: + mthca_free_icm_table(mdev, mdev->mr_table.mpt_table); + +err_unmap_mtt: + mthca_free_icm_table(mdev, mdev->mr_table.mtt_table); + +err_unmap_eq: + mthca_unmap_eq_icm(mdev); + +err_unmap_aux: + mthca_UNMAP_ICM_AUX(mdev, &status); + +err_free_aux: + mthca_free_icm(mdev, mdev->fw.arbel.aux_icm); + + return err; +} + +static int mthca_init_arbel(struct mthca_dev *mdev) +{ + struct mthca_dev_lim dev_lim; + struct mthca_profile profile; + struct mthca_init_hca_param init_hca; + u64 icm_size; + u8 status; + int err; + + err = mthca_QUERY_FW(mdev, &status); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("QUERY_FW command failed, aborting.\n")); + return err; + } + if (status) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("QUERY_FW returned status 0x%02x, " + "aborting.\n", status)); + return -EINVAL; + } + + err = mthca_ENABLE_LAM(mdev, &status); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("ENABLE_LAM command failed, aborting.\n")); + return err; + } + if (status == MTHCA_CMD_STAT_LAM_NOT_PRE) { + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_LOW ,("No HCA-attached memory (running in MemFree mode)\n")); + mdev->mthca_flags |= MTHCA_FLAG_NO_LAM; + } else if (status) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("ENABLE_LAM returned status 0x%02x, " + "aborting.\n", status)); + return -EINVAL; + } + + err = mthca_load_fw(mdev); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Failed to start FW, aborting.\n")); + goto err_disable; + } + + err = mthca_dev_lim(mdev, &dev_lim); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("QUERY_DEV_LIM command failed, aborting.\n")); + goto err_stop_fw; + } + + profile = default_profile; + profile.num_uar = dev_lim.uar_size / PAGE_SIZE; + profile.num_udav = 0; + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) + profile.num_srq = dev_lim.max_srqs; + + /* correct default profile */ + if ( g_profile_qp_num != 0 ) + profile.num_qp = g_profile_qp_num; + + if ( g_profile_rd_out != 0xffffffff ) + profile.rdb_per_qp = g_profile_rd_out; + + RtlZeroMemory( &init_hca, sizeof(init_hca)); + icm_size = mthca_make_profile(mdev, &profile, &dev_lim, &init_hca); + if ((int) icm_size < 0) { + err = (int)icm_size; + goto err_stop_fw; + } + + err = mthca_init_icm(mdev, &dev_lim, &init_hca, icm_size); + if (err) + goto err_stop_fw; + + err = mthca_INIT_HCA(mdev, &init_hca, &status); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("INIT_HCA command failed, aborting.\n")); + goto err_free_icm; + } + if (status) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("INIT_HCA returned status 0x%02x, " + "aborting.\n", status)); + err = -EINVAL; + goto err_free_icm; + } + + return 0; + +err_free_icm: + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) + mthca_free_icm_table(mdev, mdev->srq_table.table); + mthca_free_icm_table(mdev, mdev->cq_table.table); + mthca_free_icm_table(mdev, mdev->qp_table.rdb_table); + mthca_free_icm_table(mdev, mdev->qp_table.eqp_table); + mthca_free_icm_table(mdev, mdev->qp_table.qp_table); + mthca_free_icm_table(mdev, mdev->mr_table.mpt_table); + mthca_free_icm_table(mdev, mdev->mr_table.mtt_table); + mthca_unmap_eq_icm(mdev); + + mthca_UNMAP_ICM_AUX(mdev, &status); + mthca_free_icm(mdev, mdev->fw.arbel.aux_icm); + +err_stop_fw: + mthca_UNMAP_FA(mdev, &status); + mthca_free_icm(mdev, mdev->fw.arbel.fw_icm); + +err_disable: + if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM)) + mthca_DISABLE_LAM(mdev, &status); + + return err; +} + +static void mthca_close_hca(struct mthca_dev *mdev) +{ + u8 status; + + mthca_CLOSE_HCA(mdev, 0, &status); + + if (mthca_is_memfree(mdev)) { + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) + mthca_free_icm_table(mdev, mdev->srq_table.table); + mthca_free_icm_table(mdev, mdev->cq_table.table); + mthca_free_icm_table(mdev, mdev->qp_table.rdb_table); + mthca_free_icm_table(mdev, mdev->qp_table.eqp_table); + mthca_free_icm_table(mdev, mdev->qp_table.qp_table); + mthca_free_icm_table(mdev, mdev->mr_table.mpt_table); + mthca_free_icm_table(mdev, mdev->mr_table.mtt_table); + mthca_free_icm_table(mdev, mdev->mcg_table.table); + mthca_unmap_eq_icm(mdev); + + mthca_UNMAP_ICM_AUX(mdev, &status); + mthca_free_icm(mdev, mdev->fw.arbel.aux_icm); + + mthca_UNMAP_FA(mdev, &status); + mthca_free_icm(mdev, mdev->fw.arbel.fw_icm); + + if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM)) + mthca_DISABLE_LAM(mdev, &status); + } else + mthca_SYS_DIS(mdev, &status); +} + +static int mthca_init_hca(struct mthca_dev *mdev) +{ + u8 status; + int err; + struct mthca_adapter adapter; + + if (mthca_is_memfree(mdev)) + err = mthca_init_arbel(mdev); + else + err = mthca_init_tavor(mdev); + + if (err) + return err; + + err = mthca_QUERY_ADAPTER(mdev, &adapter, &status); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("QUERY_ADAPTER command failed, aborting.\n")); + goto err_close; + } + if (status) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("QUERY_ADAPTER returned status 0x%02x, " + "aborting.\n", status)); + err = -EINVAL; + goto err_close; + } + + mdev->eq_table.inta_pin = adapter.inta_pin; + mdev->rev_id = adapter.revision_id; + memcpy(mdev->board_id, adapter.board_id, sizeof mdev->board_id); + + return 0; + +err_close: + mthca_close_hca(mdev); + return err; +} + +static int mthca_setup_hca(struct mthca_dev *mdev) +{ + int err; + u8 status; + + MTHCA_INIT_DOORBELL_LOCK(&mdev->doorbell_lock); + + err = mthca_init_uar_table(mdev); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Failed to initialize " + "user access region table, aborting.\n")); + return err; + } + + err = mthca_uar_alloc(mdev, &mdev->driver_uar); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Failed to allocate driver access region, " + "aborting.\n")); + goto err_uar_table_free; + } + + mdev->kar = ioremap((io_addr_t)mdev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE,&mdev->kar_size); + if (!mdev->kar) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Couldn't map kernel access region, " + "aborting.\n")); + err = -ENOMEM; + goto err_uar_free; + } + + err = mthca_init_pd_table(mdev); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Failed to initialize " + "protection domain table, aborting.\n")); + goto err_kar_unmap; + } + + err = mthca_init_mr_table(mdev); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Failed to initialize " + "memory region table, aborting.\n")); + goto err_pd_table_free; + } + + err = mthca_pd_alloc(mdev, 1, &mdev->driver_pd); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Failed to create driver PD, " + "aborting.\n")); + goto err_mr_table_free; + } + + err = mthca_init_eq_table(mdev); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR,HCA_DBG_LOW, ("Failed to initialize " + "event queue table, aborting.\n")); + goto err_pd_free; + } + + err = mthca_cmd_use_events(mdev); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Failed to switch to event-driven " + "firmware commands, aborting.\n")); + goto err_eq_table_free; + } + + err = mthca_NOP(mdev, &status); + if (err || status) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("NOP command failed to generate interrupt, aborting.\n")); + if (mdev->mthca_flags & (MTHCA_FLAG_MSI | MTHCA_FLAG_MSI_X)){ + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Try again with MSI/MSI-X disabled.\n")); + }else{ + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("BIOS or ACPI interrupt routing problem?\n")); + } + + goto err_cmd_poll; + } + + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("NOP command IRQ test passed\n")); + + err = mthca_init_cq_table(mdev); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Failed to initialize " + "completion queue table, aborting.\n")); + goto err_cmd_poll; + } + + err = mthca_init_srq_table(mdev); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Failed to initialize " + "shared receive queue table, aborting.\n")); + goto err_cq_table_free; + } + + err = mthca_init_qp_table(mdev); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR,HCA_DBG_LOW, ("Failed to initialize " + "queue pair table, aborting.\n")); + goto err_srq_table_free; + } + + err = mthca_init_av_table(mdev); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Failed to initialize " + "address vector table, aborting.\n")); + goto err_qp_table_free; + } + + err = mthca_init_mcg_table(mdev); + if (err) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Failed to initialize " + "multicast group table, aborting.\n")); + goto err_av_table_free; + } + + return 0; + +err_av_table_free: + mthca_cleanup_av_table(mdev); + +err_qp_table_free: + mthca_cleanup_qp_table(mdev); + +err_srq_table_free: + mthca_cleanup_srq_table(mdev); + +err_cq_table_free: + mthca_cleanup_cq_table(mdev); + +err_cmd_poll: + mthca_cmd_use_polling(mdev); + +err_eq_table_free: + mthca_cleanup_eq_table(mdev); + +err_pd_free: + mthca_pd_free(mdev, &mdev->driver_pd); + +err_mr_table_free: + mthca_cleanup_mr_table(mdev); + +err_pd_table_free: + mthca_cleanup_pd_table(mdev); + +err_kar_unmap: + iounmap(mdev->kar, mdev->kar_size); + +err_uar_free: + mthca_uar_free(mdev, &mdev->driver_uar); + +err_uar_table_free: + mthca_cleanup_uar_table(mdev); + return err; +} + + +static int mthca_check_fw(struct mthca_dev *mdev, struct pci_device_id *p_id) +{ + int err = 0; + + if (mdev->fw_ver < mthca_hca_table[p_id->driver_data].max_unsupported_fw) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("HCA FW version %d.%d.%d is not supported. Use %d.%d.%d or higher.\n", + (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff, + (int) (mdev->fw_ver & 0xffff), + (int) (mthca_hca_table[p_id->driver_data].min_supported_fw >> 32), + (int) (mthca_hca_table[p_id->driver_data].min_supported_fw >> 16) & 0xffff, + (int) (mthca_hca_table[p_id->driver_data].min_supported_fw & 0xffff))); + err = -EINVAL; + } + else + if (mdev->fw_ver < mthca_hca_table[p_id->driver_data].min_supported_fw) { + HCA_PRINT_EV(TRACE_LEVEL_WARNING ,HCA_DBG_LOW , + ("The HCA FW version is %d.%d.%d, which is not the latest one. \n" + "If you meet any issues with the HCA please first try to upgrade the FW to version %d.%d.%d or higher.\n", + (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff, + (int) (mdev->fw_ver & 0xffff), + (int) (mthca_hca_table[p_id->driver_data].min_supported_fw >> 32), + (int) (mthca_hca_table[p_id->driver_data].min_supported_fw >> 16) & 0xffff, + (int) (mthca_hca_table[p_id->driver_data].min_supported_fw & 0xffff))); + } + else { + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_LOW ,("Current HCA FW version is %d.%d.%d. \n", + (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff, + (int) (mdev->fw_ver & 0xffff))); + } + + return err; +} + +NTSTATUS mthca_init_one(hca_dev_ext_t *ext) +{ + static int mthca_version_printed = 0; + int err; + NTSTATUS status; + struct mthca_dev *mdev; + struct pci_device_id *p_id; + + /* print version */ + if (!mthca_version_printed) { + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_LOW ,("%s\n", mthca_version)); + ++mthca_version_printed; + } + + /* find the type of device */ +find_pci_dev: + p_id = mthca_find_pci_dev( + (unsigned)ext->hcaConfig.VendorID, + (unsigned)ext->hcaConfig.DeviceID); + if (p_id == NULL) { + status = STATUS_NO_SUCH_DEVICE; + goto end; + } + + /* allocate mdev structure */ + mdev = kzalloc(sizeof *mdev, GFP_KERNEL); + if (!mdev) { + // can't use HCA_PRINT_EV here ! + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Device struct alloc failed, " + "aborting.\n")); + status = STATUS_INSUFFICIENT_RESOURCES; + goto end; + } + + /* set some fields */ + mdev->ext = ext; /* pointer to DEVICE OBJECT extension */ + mdev->hca_type = p_id->driver_data; + mdev->ib_dev.mdev = mdev; + if (p_id->driver_data == LIVEFISH) + mdev->mthca_flags |= MTHCA_FLAG_LIVEFISH; + if (mthca_is_livefish(mdev)) + goto done; + if (ext->hca_hidden) + mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN; + if (mthca_hca_table[p_id->driver_data].is_memfree) + mdev->mthca_flags |= MTHCA_FLAG_MEMFREE; + if (mthca_hca_table[p_id->driver_data].is_pcie) + mdev->mthca_flags |= MTHCA_FLAG_PCIE; + +//TODO: after we have a FW, capable of reset, +// write a routine, that only presses the button + + /* + * Now reset the HCA before we touch the PCI capabilities or + * attempt a firmware command, since a boot ROM may have left + * the HCA in an undefined state. + */ + status = hca_reset( mdev->ext->cl_ext.p_self_do, p_id->driver_data == TAVOR ); + if ( !NT_SUCCESS( status ) ) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Failed to reset HCA, aborting.\n")); + goto err_free_dev; + } + + if (mthca_cmd_init(mdev)) { + HCA_PRINT_EV(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Failed to init command interface, aborting.\n")); + status = STATUS_DEVICE_DATA_ERROR; + goto err_free_dev; + } + + status = mthca_tune_pci(mdev); + if ( !NT_SUCCESS( status ) ) { + goto err_cmd; + } + + err = mthca_init_hca(mdev); + if (err) { + status = STATUS_UNSUCCESSFUL; + goto err_cmd; + } + + err = mthca_check_fw(mdev, p_id); + if (err) { + status = STATUS_UNSUCCESSFUL; + goto err_close; + } + + err = mthca_setup_hca(mdev); + if (err) { + status = STATUS_UNSUCCESSFUL; + goto err_close; + } + + err = mthca_register_device(mdev); + if (err) { + status = STATUS_UNSUCCESSFUL; + goto err_cleanup; + } + + done: + ext->hca.mdev = mdev; + mdev->state = MTHCA_DEV_INITIALIZED; + return 0; + +err_cleanup: + mthca_cleanup_mcg_table(mdev); + mthca_cleanup_av_table(mdev); + mthca_cleanup_qp_table(mdev); + mthca_cleanup_srq_table(mdev); + mthca_cleanup_cq_table(mdev); + mthca_cmd_use_polling(mdev); + mthca_cleanup_eq_table(mdev); + + mthca_pd_free(mdev, &mdev->driver_pd); + + mthca_cleanup_mr_table(mdev); + mthca_cleanup_pd_table(mdev); + mthca_cleanup_uar_table(mdev); + +err_close: + mthca_close_hca(mdev); + +err_cmd: + mthca_cmd_cleanup(mdev); + +err_free_dev: + kfree(mdev); + + /* we failed device initialization - try to simulate "livefish" device to facilitate using FW burning tools */ + if (ext->hcaConfig.DeviceID == PCI_DEVICE_ID_MELLANOX_ARBEL) + ext->hcaConfig.DeviceID = PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT; + ext->hcaConfig.DeviceID += 1; /* generate appropriate "livefish" DevId */ + goto find_pci_dev; + +end: + return status; +} + +void mthca_remove_one(hca_dev_ext_t *ext) +{ + struct mthca_dev *mdev = ext->hca.mdev; + u8 status; + int p; + + ext->hca.mdev = NULL; + if (mdev) { + mdev->state = MTHCA_DEV_UNINITIALIZED; + if (mthca_is_livefish(mdev)) + goto done; + mthca_unregister_device(mdev); + + for (p = 1; p <= mdev->limits.num_ports; ++p) + mthca_CLOSE_IB(mdev, p, &status); + + mthca_cleanup_mcg_table(mdev); + mthca_cleanup_av_table(mdev); + mthca_cleanup_qp_table(mdev); + mthca_cleanup_srq_table(mdev); + mthca_cleanup_cq_table(mdev); + mthca_cmd_use_polling(mdev); + mthca_cleanup_eq_table(mdev); + mthca_pd_free(mdev, &mdev->driver_pd); + mthca_cleanup_mr_table(mdev); + mthca_cleanup_pd_table(mdev); + iounmap(mdev->kar, mdev->kar_size); + mthca_uar_free(mdev, &mdev->driver_uar); + mthca_cleanup_uar_table(mdev); + mthca_close_hca(mdev); + mthca_cmd_cleanup(mdev); +done: + kfree(mdev); + } +} + + + diff --git a/branches/IBFD/hw/mthca/kernel/mthca_mcg.c b/branches/IBFD/hw/mthca/kernel/mthca_mcg.c new file mode 100644 index 00000000..ec477a9e --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca_mcg.c @@ -0,0 +1,408 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_mcg.tmh" +#endif +#include "mthca_cmd.h" + + +#ifdef ALLOC_PRAGMA +#pragma alloc_text (PAGE, mthca_init_mcg_table) +#pragma alloc_text (PAGE, mthca_cleanup_mcg_table) +#endif + +struct mthca_mgm { + __be32 next_gid_index; + u32 reserved[3]; + u8 gid[16]; + __be32 qp[MTHCA_QP_PER_MGM]; +}; + +static const u8 zero_gid[16] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + +/* + * Caller must hold MCG table semaphore. gid and mgm parameters must + * be properly aligned for command interface. + * + * Returns 0 unless a firmware command error occurs. + * + * If GID is found in MGM or MGM is empty, *index = *hash, *prev = -1 + * and *mgm holds MGM entry. + * + * if GID is found in AMGM, *index = index in AMGM, *prev = index of + * previous entry in hash chain and *mgm holds AMGM entry. + * + * If no AMGM exists for given gid, *index = -1, *prev = index of last + * entry in hash chain and *mgm holds end of hash chain. + */ +static int find_mgm(struct mthca_dev *dev, + u8 *gid, struct mthca_mailbox *mgm_mailbox, + u16 *hash, int *prev, int *index) +{ + struct mthca_mailbox *mailbox; + struct mthca_mgm *mgm = mgm_mailbox->buf; + u8 *mgid; + int err; + u8 status; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return -ENOMEM; + mgid = mailbox->buf; + + memcpy(mgid, gid, 16); + + err = mthca_MGID_HASH(dev, mailbox, hash, &status); + if (err) + goto out; + if (status) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("MGID_HASH returned status %02x\n", status)); + err = -EINVAL; + goto out; + } + + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("Hash for %04x:%04x:%04x:%04x:" + "%04x:%04x:%04x:%04x is %04x\n", + cl_ntoh16(((__be16 *) gid)[0]), + cl_ntoh16(((__be16 *) gid)[1]), + cl_ntoh16(((__be16 *) gid)[2]), + cl_ntoh16(((__be16 *) gid)[3]), + cl_ntoh16(((__be16 *) gid)[4]), + cl_ntoh16(((__be16 *) gid)[5]), + cl_ntoh16(((__be16 *) gid)[6]), + cl_ntoh16(((__be16 *) gid)[7]), + *hash)); + + *index = *hash; + *prev = -1; + + do { + err = mthca_READ_MGM(dev, *index, mgm_mailbox, &status); + if (err) + goto out; + if (status) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("READ_MGM returned status %02x\n", status)); + err = -EINVAL; + goto out; + } + + if (!memcmp(mgm->gid, zero_gid, 16)) { + if (*index != *hash) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("Found zero MGID in AMGM.\n")); + err = -EINVAL; + } + goto out; + } + + if (!memcmp(mgm->gid, gid, 16)) + goto out; + + *prev = *index; + *index = cl_ntoh32(mgm->next_gid_index) >> 6; + } while (*index); + + *index = -1; + + out: + mthca_free_mailbox(dev, mailbox); + return err; +} + +int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct mthca_dev *dev = to_mdev(ibqp->device); + struct mthca_mailbox *mailbox; + struct mthca_mgm *mgm; + u16 hash; + int index, prev; + int link = 0; + int i; + int err; + u8 status; + + UNREFERENCED_PARAMETER(lid); + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + mgm = mailbox->buf; + + down(&dev->mcg_table.mutex); + + err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index); + if (err) + goto out; + + if (index != -1) { + if (!memcmp(mgm->gid, zero_gid, 16)) + memcpy(mgm->gid, gid->raw, 16); + } else { + link = 1; + + index = mthca_alloc(&dev->mcg_table.alloc); + if (index == -1) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("No AMGM entries left\n")); + err = -ENOMEM; + goto out; + } + + err = mthca_READ_MGM(dev, index, mailbox, &status); + if (err) + goto out; + if (status) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("READ_MGM returned status %02x\n", status)); + err = -EINVAL; + goto out; + } + + memset(mgm, 0, sizeof *mgm); + memcpy(mgm->gid, gid->raw, 16); + mgm->next_gid_index = 0; + } + + for (i = 0; i < MTHCA_QP_PER_MGM; ++i) + if (mgm->qp[i] == cl_hton32(ibqp->qp_num | (1 << 31))) { + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("QP %06x already a member of MGM\n", + ibqp->qp_num)); + err = 0; + goto out; + } else if (!(mgm->qp[i] & cl_hton32(1UL << 31))) { + mgm->qp[i] = cl_hton32(ibqp->qp_num | (1 << 31)); + break; + } + + if (i == MTHCA_QP_PER_MGM) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("MGM at index %x is full.\n", index)); + err = -ENOMEM; + goto out; + } + + err = mthca_WRITE_MGM(dev, index, mailbox, &status); + if (err) + goto out; + if (status) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("WRITE_MGM returned status %02x\n", status)); + err = -EINVAL; + goto out; + } + + if (!link) + goto out; + + err = mthca_READ_MGM(dev, prev, mailbox, &status); + if (err) + goto out; + if (status) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("READ_MGM returned status %02x\n", status)); + err = -EINVAL; + goto out; + } + + mgm->next_gid_index = cl_hton32(index << 6); + + err = mthca_WRITE_MGM(dev, prev, mailbox, &status); + if (err) + goto out; + if (status) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("WRITE_MGM returned status %02x\n", status)); + err = -EINVAL; + } + +out: + if (err && link && index != -1) { + BUG_ON(index < dev->limits.num_mgms); + mthca_free(&dev->mcg_table.alloc, index); + } + KeReleaseMutex(&dev->mcg_table.mutex,FALSE); + mthca_free_mailbox(dev, mailbox); + return err; +} + +int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct mthca_dev *dev = to_mdev(ibqp->device); + struct mthca_mailbox *mailbox; + struct mthca_mgm *mgm; + u16 hash; + int prev, index; + int i, loc; + int err; + u8 status; + + UNREFERENCED_PARAMETER(lid); + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + mgm = mailbox->buf; + + down(&dev->mcg_table.mutex); + + err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index); + if (err) + goto out; + + if (index == -1) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_LOW, ("MGID %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x " + "not found\n", + cl_ntoh16(((__be16 *) gid->raw)[0]), + cl_ntoh16(((__be16 *) gid->raw)[1]), + cl_ntoh16(((__be16 *) gid->raw)[2]), + cl_ntoh16(((__be16 *) gid->raw)[3]), + cl_ntoh16(((__be16 *) gid->raw)[4]), + cl_ntoh16(((__be16 *) gid->raw)[5]), + cl_ntoh16(((__be16 *) gid->raw)[6]), + cl_ntoh16(((__be16 *) gid->raw)[7]))); + err = -EINVAL; + goto out; + } + + for (loc = -1, i = 0; i < MTHCA_QP_PER_MGM; ++i) { + if (mgm->qp[i] == cl_hton32(ibqp->qp_num | (1 << 31))) + loc = i; + if (!(mgm->qp[i] & cl_hton32(1UL << 31))) + break; + } + + if (loc == -1) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("QP %06x not found in MGM\n", ibqp->qp_num)); + err = -EINVAL; + goto out; + } + + mgm->qp[loc] = mgm->qp[i - 1]; + mgm->qp[i - 1] = 0; + + err = mthca_WRITE_MGM(dev, index, mailbox, &status); + if (err) + goto out; + if (status) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("WRITE_MGM returned status %02x\n", status)); + err = -EINVAL; + goto out; + } + + if (i != 1) + goto out; + + if (prev == -1) { + /* Remove entry from MGM */ + int amgm_index_to_free = cl_ntoh32(mgm->next_gid_index) >> 6; + if (amgm_index_to_free) { + err = mthca_READ_MGM(dev, amgm_index_to_free, + mailbox, &status); + if (err) + goto out; + if (status) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("READ_MGM returned status %02x\n", + status)); + err = -EINVAL; + goto out; + } + } else + RtlZeroMemory(mgm->gid, 16); + + err = mthca_WRITE_MGM(dev, index, mailbox, &status); + if (err) + goto out; + if (status) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("WRITE_MGM returned status %02x\n", status)); + err = -EINVAL; + goto out; + } + if (amgm_index_to_free) { + BUG_ON(amgm_index_to_free < dev->limits.num_mgms); + mthca_free(&dev->mcg_table.alloc, amgm_index_to_free); + } + } else { + /* Remove entry from AMGM */ + int curr_next_index = cl_ntoh32(mgm->next_gid_index) >> 6; + err = mthca_READ_MGM(dev, prev, mailbox, &status); + if (err) + goto out; + if (status) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("READ_MGM returned status %02x\n", status)); + err = -EINVAL; + goto out; + } + + mgm->next_gid_index = cl_hton32(curr_next_index << 6); + + err = mthca_WRITE_MGM(dev, prev, mailbox, &status); + if (err) + goto out; + if (status) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_LOW ,("WRITE_MGM returned status %02x\n", status)); + err = -EINVAL; + goto out; + } + BUG_ON(index < dev->limits.num_mgms); + mthca_free(&dev->mcg_table.alloc, index); + } + + out: + KeReleaseMutex(&dev->mcg_table.mutex, FALSE); + mthca_free_mailbox(dev, mailbox); + return err; +} + +int mthca_init_mcg_table(struct mthca_dev *dev) +{ + int err; + int table_size = dev->limits.num_mgms + dev->limits.num_amgms; + + err = mthca_alloc_init(&dev->mcg_table.alloc, + table_size, + table_size - 1, + dev->limits.num_mgms); + + if (err) + return err; + + KeInitializeMutex(&dev->mcg_table.mutex,0); + + return 0; +} + +void mthca_cleanup_mcg_table(struct mthca_dev *dev) +{ + mthca_alloc_cleanup(&dev->mcg_table.alloc); +} + + diff --git a/branches/IBFD/hw/mthca/kernel/mthca_memfree.c b/branches/IBFD/hw/mthca/kernel/mthca_memfree.c new file mode 100644 index 00000000..975ce6ab --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca_memfree.c @@ -0,0 +1,729 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include "hca_driver.h" +#include "mthca_memfree.h" +#include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_memfree.tmh" +#endif +#include "mthca_cmd.h" + +/* + * We allocate in as big chunks as we can, up to a maximum of 256 KB + * per chunk. + */ +enum { + MTHCA_ICM_ALLOC_SIZE = 1 << 18, + MTHCA_TABLE_CHUNK_SIZE = 1 << 18 +}; + +#pragma warning( disable : 4200) +struct mthca_user_db_table { + KMUTEX mutex; + struct { + u64 uvirt; + struct scatterlist mem; + int refcount; + } page[0]; +}; +#pragma warning( default : 4200) + +void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm) +{ + struct mthca_icm_chunk *chunk, *tmp; + int i; + + if (!icm) + return; + + list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list,struct mthca_icm_chunk,struct mthca_icm_chunk) { + if (chunk->nsg > 0) + pci_unmap_sg(dev, chunk->mem, chunk->npages, + PCI_DMA_BIDIRECTIONAL); + + for (i = 0; i < chunk->npages; ++i) + free_dma_mem_map(dev, &chunk->mem[i], PCI_DMA_BIDIRECTIONAL ); + + kfree(chunk); + } + + kfree(icm); +} + +/* allocate device memory of 'npages' pages as a list of chunks, each containing an array of + continuous buffers. Allocated physical pages, and then they are mapped to bus space !*/ +struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages, + unsigned int gfp_mask) +{ + struct mthca_icm *icm; + struct mthca_icm_chunk *chunk = NULL; + int cur_order; + + icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); + if (!icm) + return icm; + + icm->refcount = 0; + INIT_LIST_HEAD(&icm->chunk_list); + + cur_order = get_order(MTHCA_ICM_ALLOC_SIZE); + + while (npages > 0) { + /* allocate a new chunk */ + if (!chunk) { + chunk = kmalloc(sizeof *chunk, + gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); + if (!chunk) + goto fail; + + RtlZeroMemory( chunk, sizeof *chunk ); + list_add_tail(&chunk->list, &icm->chunk_list); + } + + /* fill chunk with allocated consistent areas of integer pages each */ + while (1 << cur_order > npages) + /* try to take a max (required) number of pages */ + --cur_order; + + /* try to allocate a contiguous PHYSICAL buffer */ + alloc_dma_zmem( dev, PAGE_SIZE << cur_order, + &chunk->mem[chunk->npages] ); + + /* if succeded - proceed handling */ + if (chunk->mem[chunk->npages].page) { + + /* check, whether a chunk is full */ + if (++chunk->npages == MTHCA_ICM_CHUNK_LEN) { + /* it's full --> map physical addresses to bus ones */ + chunk->nsg = pci_map_sg(dev, chunk->mem, + chunk->npages, PCI_DMA_BIDIRECTIONAL ); + + if (chunk->nsg <= 0) + goto fail; + + chunk = NULL; + } + + /* calculate the remaining memory to be allocated */ + npages -= 1 << cur_order; + } + /* failed to allocate - lets decrement buffer size and try once more */ + else { + --cur_order; + if (cur_order < 0) + goto fail; + } + } + + /* last, not full chunk: map physical addresses to bus ones */ + if (chunk) { + chunk->nsg = pci_map_sg(dev, chunk->mem, + chunk->npages, + PCI_DMA_BIDIRECTIONAL); + + if (chunk->nsg <= 0) + goto fail; + } + + return icm; + +fail: + mthca_free_icm(dev, icm); + return NULL; +} + +int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj) +{ + int i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE; + int ret = 0; + u8 status; + + down(&table->mutex); + + if (table->icm[i]) { + ++table->icm[i]->refcount; + goto out; + } + + table->icm[i] = mthca_alloc_icm(dev, MTHCA_TABLE_CHUNK_SIZE >> PAGE_SHIFT, + (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) | + __GFP_NOWARN); + if (!table->icm[i]) { + ret = -ENOMEM; + goto out; + } + + if (mthca_MAP_ICM(dev, table->icm[i], table->virt + i * MTHCA_TABLE_CHUNK_SIZE, + &status) || status) { + mthca_free_icm(dev, table->icm[i]); + table->icm[i] = NULL; + ret = -ENOMEM; + goto out; + } + + ++table->icm[i]->refcount; + +out: + up(&table->mutex); + return ret; +} + +void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj) +{ + int i; + u8 status; + + if (!mthca_is_memfree(dev)) + return; + + i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE; + + down(&table->mutex); + + if (--table->icm[i]->refcount == 0) { + mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE, + MTHCA_TABLE_CHUNK_SIZE >> 12, &status); + mthca_free_icm(dev, table->icm[i]); + table->icm[i] = NULL; + } + + up(&table->mutex); +} + +void *mthca_table_find(struct mthca_icm_table *table, int obj) +{ + int idx, offset, i; + struct mthca_icm_chunk *chunk; + struct mthca_icm *icm; + struct page *page = NULL; + + if (!table->lowmem) + return NULL; + + down(&table->mutex); + + idx = (obj & (table->num_obj - 1)) * table->obj_size; + icm = table->icm[idx / MTHCA_TABLE_CHUNK_SIZE]; + offset = idx % MTHCA_TABLE_CHUNK_SIZE; + + if (!icm) + goto out; + + list_for_each_entry(chunk, &icm->chunk_list, list,struct mthca_icm_chunk) { + for (i = 0; i < chunk->npages; ++i) { + if ((int)chunk->mem[i].length >= offset) { + page = chunk->mem[i].page; + goto out; + } + offset -= chunk->mem[i].length; + } + } + +out: + up(&table->mutex); + return page ? (char*)page + offset : NULL; +} + +int mthca_table_get_range(struct mthca_dev *dev, struct mthca_icm_table *table, + int start, int end) +{ + int inc = MTHCA_TABLE_CHUNK_SIZE / table->obj_size; + int i, err; + + for (i = start; i <= end; i += inc) { + err = mthca_table_get(dev, table, i); + if (err) + goto fail; + } + + return 0; + +fail: + while (i > start) { + i -= inc; + mthca_table_put(dev, table, i); + } + + return err; +} + +void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table, + int start, int end) +{ + int i; + + if (!mthca_is_memfree(dev)) + return; + + for (i = start; i <= end; i += MTHCA_TABLE_CHUNK_SIZE / table->obj_size) + mthca_table_put(dev, table, i); +} + +struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev, + u64 virt, int obj_size, + int nobj, int reserved, + int use_lowmem) +{ + struct mthca_icm_table *table; + int num_icm; + unsigned chunk_size; + int i; + u8 status; + + num_icm = (obj_size * nobj + MTHCA_TABLE_CHUNK_SIZE -1) / MTHCA_TABLE_CHUNK_SIZE; + + table = kmalloc(sizeof *table + num_icm * sizeof *table->icm, GFP_KERNEL); + if (!table) + return NULL; + + table->virt = virt; + table->num_icm = num_icm; + table->num_obj = nobj; + table->obj_size = obj_size; + table->lowmem = use_lowmem; + KeInitializeMutex( &table->mutex, 0 ); + + for (i = 0; i < num_icm; ++i) + table->icm[i] = NULL; + + for (i = 0; i * MTHCA_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) { + chunk_size = MTHCA_TABLE_CHUNK_SIZE; + if ((i + 1) * MTHCA_TABLE_CHUNK_SIZE > nobj * obj_size) + chunk_size = nobj * obj_size - i * MTHCA_TABLE_CHUNK_SIZE; + + table->icm[i] = mthca_alloc_icm(dev, chunk_size >> PAGE_SHIFT, + (use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) | + __GFP_NOWARN); + if (!table->icm[i]) + goto err; + if (mthca_MAP_ICM(dev, table->icm[i], virt + i * MTHCA_TABLE_CHUNK_SIZE, + &status) || status) { + mthca_free_icm(dev, table->icm[i]); + table->icm[i] = NULL; + goto err; + } + + /* + * Add a reference to this ICM chunk so that it never + * gets freed (since it contains reserved firmware objects). + */ + ++table->icm[i]->refcount; + } + + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW, + ("Allocated/max chunks %d:%d, reserved/max objects %#x:%#x, one/total size %#x:%#x at %I64x \n", + i, num_icm, reserved, nobj, obj_size, nobj * obj_size, (u64) virt)); + + return table; + +err: + for (i = 0; i < num_icm; ++i) + if (table->icm[i]) { + mthca_UNMAP_ICM(dev, virt + i * MTHCA_TABLE_CHUNK_SIZE, + MTHCA_TABLE_CHUNK_SIZE >> 12, &status); + mthca_free_icm(dev, table->icm[i]); + } + + kfree(table); + + return NULL; +} + +void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table) +{ + int i; + u8 status; + + for (i = 0; i < table->num_icm; ++i) + if (table->icm[i]) { + mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE, + MTHCA_TABLE_CHUNK_SIZE >> 12, &status); + mthca_free_icm(dev, table->icm[i]); + } + + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW, + ( "Released chunks %d, objects %#x, one/total size %#x:%#x at %I64x \n", + table->num_icm, table->num_obj, table->obj_size, + table->num_obj * table->obj_size, (u64) table->virt)); + kfree(table); +} + +static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int page) +{ + return dev->uar_table.uarc_base + + uar->index * dev->uar_table.uarc_size + + page * 4096; +} + +int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab, int index, u64 uaddr, void **kva) +{ + int ret = 0; + u8 status; + int i; + + if (!mthca_is_memfree(dev)) + return 0; + + if (index < 0 || index > dev->uar_table.uarc_size / 8) + return -EINVAL; + + down(&db_tab->mutex); + + i = index / MTHCA_DB_REC_PER_PAGE; + + if ((db_tab->page[i].refcount >= MTHCA_DB_REC_PER_PAGE) || + (db_tab->page[i].uvirt && db_tab->page[i].uvirt != uaddr) || + (uaddr & 4095)) { + ret = -EINVAL; + goto out; + } + + if (db_tab->page[i].refcount) { + ++db_tab->page[i].refcount; + goto done; + } + + ret = get_user_pages(dev, uaddr & PAGE_MASK, 1, 1, + &db_tab->page[i].mem); + if (ret < 0) + goto out; + + db_tab->page[i].mem.length = 4096; + db_tab->page[i].mem.offset = (unsigned)(uaddr & ~PAGE_MASK); + + ret = pci_map_sg(dev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); + if (ret <= 0) { + put_page(&db_tab->page[i].mem); + goto out; + } + + ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem), + mthca_uarc_virt(dev, uar, i), &status); + if (!ret && status) + ret = -EINVAL; + if (ret) { + pci_unmap_sg(dev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); + put_page(&db_tab->page[i].mem); + goto out; + } + + db_tab->page[i].uvirt = uaddr; + db_tab->page[i].refcount = 1; + +done: + if (kva) + *kva = db_tab->page[i].mem.page; + +out: + up(&db_tab->mutex); + return ret; +} + +void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab, int index) +{ + u8 status; + int ix = index / MTHCA_DB_REC_PER_PAGE; + UNREFERENCED_PARAMETER(uar); + + if (!mthca_is_memfree(dev)) + return; + + /* + * To make our bookkeeping simpler, we don't unmap DB + * pages until we clean up the whole db table. + */ + + down(&db_tab->mutex); + + if (!--db_tab->page[ix].refcount) { + mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, ix), 1, &status); + pci_unmap_sg(dev, &db_tab->page[ix].mem, 1, PCI_DMA_TODEVICE); + put_page(&db_tab->page[ix].mem); + db_tab->page[ix].uvirt = 0; + } + + up(&db_tab->mutex); +} + +struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev) +{ + struct mthca_user_db_table *db_tab; + int npages; + int i; + + if (!mthca_is_memfree(dev)) + return NULL; + + npages = dev->uar_table.uarc_size / 4096; + db_tab = kmalloc(sizeof *db_tab + npages * sizeof *db_tab->page, GFP_KERNEL); + if (!db_tab) + return ERR_PTR(-ENOMEM); + + KeInitializeMutex(&db_tab->mutex,0); + for (i = 0; i < npages; ++i) { + db_tab->page[i].refcount = 0; + db_tab->page[i].uvirt = 0; + } + + return db_tab; +} + +void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab) +{ + int i; + u8 status; + + if (!mthca_is_memfree(dev)) + return; + + for (i = 0; i < dev->uar_table.uarc_size / 4096; ++i) { + if (db_tab->page[i].uvirt) { + mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status); + pci_unmap_sg(dev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); + put_page(&db_tab->page[i].mem); + } + } + + kfree(db_tab); +} + +int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type, u32 qn, __be32 **db) +{ + int group; + int start, end, dir; + int i, j; + struct mthca_db_page *page; + int ret = 0; + u8 status; + CPU_2_BE64_PREP; + + down(&dev->db_tab->mutex); + switch (type) { + case MTHCA_DB_TYPE_CQ_ARM: + case MTHCA_DB_TYPE_SQ: + group = 0; + start = 0; + end = dev->db_tab->max_group1; + dir = 1; + break; + + case MTHCA_DB_TYPE_CQ_SET_CI: + case MTHCA_DB_TYPE_RQ: + case MTHCA_DB_TYPE_SRQ: + group = 1; + start = dev->db_tab->npages - 1; + end = dev->db_tab->min_group2; + dir = -1; + break; + + default: + ret = -EINVAL; + goto out; + } + + /* try to find an unused index for a new page (in the bitmap) */ + for (i = start; i != end; i += dir) + if (dev->db_tab->page[i].db_rec && + !bitmap_full(dev->db_tab->page[i].used, + MTHCA_DB_REC_PER_PAGE)) { + page = dev->db_tab->page + i; + goto found; + } + + for (i = start; i != end; i += dir) { + if (!dev->db_tab->page[i].db_rec) { + page = dev->db_tab->page + i; + goto alloc; + } + } + + /* if there are no more place for DBs - get out */ + if (dev->db_tab->max_group1 >= dev->db_tab->min_group2 - 1) { + ret = -ENOMEM; + goto out; + } + + /* fix limits indeces */ + if (group == 0) + ++dev->db_tab->max_group1; + else + --dev->db_tab->min_group2; + + /* allocate page */ + page = dev->db_tab->page + end; + +alloc: + alloc_dma_zmem_map(dev, 4096, PCI_DMA_BIDIRECTIONAL, &page->sg); + if (!page->sg.page) { + ret = -ENOMEM; + goto out; + } + page->db_rec = (__be64*)page->sg.page; + + ret = mthca_MAP_ICM_page(dev, page->sg.dma_address, + mthca_uarc_virt(dev, &dev->driver_uar, i), &status); + if (!ret && status) + ret = -EINVAL; + if (ret) { + free_dma_mem_map(dev, &page->sg, PCI_DMA_BIDIRECTIONAL); + goto out; + } + + bitmap_zero(page->used, MTHCA_DB_REC_PER_PAGE); + +found: + j = find_first_zero_bit(page->used, MTHCA_DB_REC_PER_PAGE); + set_bit(j, (long*)page->used); + + if (group == 1) + j = MTHCA_DB_REC_PER_PAGE - 1 - j; + + ret = i * MTHCA_DB_REC_PER_PAGE + j; + + page->db_rec[j] = CPU_2_BE64((((ULONGLONG)qn << 8) | (type << 5))); + + *db = (__be32 *) &page->db_rec[j]; +out: + up(&dev->db_tab->mutex); + + return ret; +} + +void mthca_free_db(struct mthca_dev *dev, int type, int db_index) +{ + int i, j; + struct mthca_db_page *page; + u8 status; + + UNREFERENCED_PARAMETER(type); + + i = db_index / MTHCA_DB_REC_PER_PAGE; + j = db_index % MTHCA_DB_REC_PER_PAGE; + + page = dev->db_tab->page + i; + + down(&dev->db_tab->mutex); + + page->db_rec[j] = 0; + if (i >= dev->db_tab->min_group2) + j = MTHCA_DB_REC_PER_PAGE - 1 - j; + clear_bit(j, (long*)page->used); + + if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) && + i >= dev->db_tab->max_group1 - 1) { + mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status); + + free_dma_mem_map(dev, &page->sg, PCI_DMA_BIDIRECTIONAL); + page->db_rec = NULL; + + if (i == dev->db_tab->max_group1) { + --dev->db_tab->max_group1; + /* XXX may be able to unmap more pages now */ + } + if (i == dev->db_tab->min_group2) + ++dev->db_tab->min_group2; + } + + up(&dev->db_tab->mutex); +} + +int mthca_init_db_tab(struct mthca_dev *dev) +{ + int i; + + if (!mthca_is_memfree(dev)) + return 0; + + dev->db_tab = kmalloc(sizeof *dev->db_tab, GFP_KERNEL); + if (!dev->db_tab) + return -ENOMEM; + + KeInitializeMutex(&dev->db_tab->mutex, 0); + /* number of pages, needed for UAR context table */ + dev->db_tab->npages = dev->uar_table.uarc_size / 4096; + dev->db_tab->max_group1 = 0; + dev->db_tab->min_group2 = dev->db_tab->npages - 1; + /* allocate array of structures, containing descrpitors of UARC pages */ + dev->db_tab->page = kmalloc(dev->db_tab->npages * + sizeof *dev->db_tab->page, + GFP_KERNEL); + if (!dev->db_tab->page) { + kfree(dev->db_tab); + return -ENOMEM; + } + + for (i = 0; i < dev->db_tab->npages; ++i) + dev->db_tab->page[i].db_rec = NULL; + + return 0; +} + +void mthca_cleanup_db_tab(struct mthca_dev *dev) +{ + int i; + u8 status; + + if (!mthca_is_memfree(dev)) + return; + + /* + * Because we don't always free our UARC pages when they + * become empty to make mthca_free_db() simpler we need to + * make a sweep through the doorbell pages and free any + * leftover pages now. + */ + for (i = 0; i < dev->db_tab->npages; ++i) { + if (!dev->db_tab->page[i].db_rec) + continue; + + if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE)) + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_LOW ,("Kernel UARC page %d not empty\n", i)); + + mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status); + + free_dma_mem_map(dev, &dev->db_tab->page[i].sg, PCI_DMA_BIDIRECTIONAL); + } + + kfree(dev->db_tab->page); + kfree(dev->db_tab); +} diff --git a/branches/IBFD/hw/mthca/kernel/mthca_memfree.h b/branches/IBFD/hw/mthca/kernel/mthca_memfree.h new file mode 100644 index 00000000..184a3577 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca_memfree.h @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#ifndef MTHCA_MEMFREE_H +#define MTHCA_MEMFREE_H + + +#define MTHCA_ICM_CHUNK_LEN \ + ((256 - sizeof (struct list_head) - 2 * sizeof (int)) / \ + (sizeof (struct scatterlist))) + +struct mthca_icm_chunk { + struct list_head list; + int npages; + int nsg; + struct scatterlist mem[MTHCA_ICM_CHUNK_LEN]; +}; + +struct mthca_icm { + struct list_head chunk_list; + int refcount; +}; + +#pragma warning( disable : 4200) +struct mthca_icm_table { + u64 virt; + int num_icm; + int num_obj; + int obj_size; + int lowmem; + KMUTEX mutex; + struct mthca_icm *icm[0]; +}; +#pragma warning( default : 4200) + +struct mthca_icm_iter { + struct mthca_icm *icm; + struct mthca_icm_chunk *chunk; + int page_idx; +}; + +struct mthca_dev; + +struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages, + unsigned int gfp_mask); +void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm); + +struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev, + u64 virt, int obj_size, + int nobj, int reserved, + int use_lowmem); +void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table); +int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj); +void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj); +void *mthca_table_find(struct mthca_icm_table *table, int obj); +int mthca_table_get_range(struct mthca_dev *dev, struct mthca_icm_table *table, + int start, int end); +void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table, + int start, int end); + +static inline void mthca_icm_first(struct mthca_icm *icm, + struct mthca_icm_iter *iter) +{ + iter->icm = icm; + iter->chunk = list_empty(&icm->chunk_list) ? + NULL : list_entry(icm->chunk_list.next, + struct mthca_icm_chunk, list); + iter->page_idx = 0; +} + +static inline int mthca_icm_last(struct mthca_icm_iter *iter) +{ + return !iter->chunk; +} + +static inline void mthca_icm_next(struct mthca_icm_iter *iter) +{ + if (++iter->page_idx >= iter->chunk->nsg) { + if (iter->chunk->list.next == &iter->icm->chunk_list) { + iter->chunk = NULL; + return; + } + + iter->chunk = list_entry(iter->chunk->list.next, + struct mthca_icm_chunk, list); + iter->page_idx = 0; + } +} + +static inline dma_addr_t mthca_icm_addr(struct mthca_icm_iter *iter) +{ + return sg_dma_address(&iter->chunk->mem[iter->page_idx]); +} + +static inline unsigned long mthca_icm_size(struct mthca_icm_iter *iter) +{ + return sg_dma_len(&iter->chunk->mem[iter->page_idx]); +} + +enum { + MTHCA_DB_REC_PER_PAGE = 4096 / 8 +}; + +struct mthca_db_page { + DECLARE_BITMAP(used, MTHCA_DB_REC_PER_PAGE); + __be64 *db_rec; + struct scatterlist sg; +}; + +struct mthca_db_table { + int npages; + int max_group1; + int min_group2; + struct mthca_db_page *page; + KMUTEX mutex; +}; + +enum mthca_db_type { + MTHCA_DB_TYPE_INVALID = 0x0, + MTHCA_DB_TYPE_CQ_SET_CI = 0x1, + MTHCA_DB_TYPE_CQ_ARM = 0x2, + MTHCA_DB_TYPE_SQ = 0x3, + MTHCA_DB_TYPE_RQ = 0x4, + MTHCA_DB_TYPE_SRQ = 0x5, + MTHCA_DB_TYPE_GROUP_SEP = 0x7 +}; + +struct mthca_user_db_table; +struct mthca_uar; + +int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab, int index, u64 uaddr, void **kva); +void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab, int index); +struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev); +void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab); + +int mthca_init_db_tab(struct mthca_dev *dev); +void mthca_cleanup_db_tab(struct mthca_dev *dev); +int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type, u32 qn, __be32 **db); +void mthca_free_db(struct mthca_dev *dev, int type, int db_index); + +#endif /* MTHCA_MEMFREE_H */ diff --git a/branches/IBFD/hw/mthca/kernel/mthca_mr.c b/branches/IBFD/hw/mthca/kernel/mthca_mr.c new file mode 100644 index 00000000..f76b4b0c --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca_mr.c @@ -0,0 +1,970 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_mr.tmh" +#endif +#include "mthca_cmd.h" +#include "mthca_memfree.h" + +static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order); +static void mthca_buddy_cleanup(struct mthca_buddy *buddy); + +#ifdef ALLOC_PRAGMA +#pragma alloc_text (PAGE, mthca_buddy_init) +#pragma alloc_text (PAGE, mthca_buddy_cleanup) +#pragma alloc_text (PAGE, mthca_init_mr_table) +#pragma alloc_text (PAGE, mthca_cleanup_mr_table) +#endif + +struct mthca_mtt { + struct mthca_buddy *buddy; + int order; + u32 first_seg; +}; + +/* + * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits. + */ +#pragma pack(push,1) +struct mthca_mpt_entry { + __be32 flags; + __be32 page_size; + __be32 key; + __be32 pd; + __be64 start; + __be64 length; + __be32 lkey; + __be32 window_count; + __be32 window_count_limit; + __be64 mtt_seg; + __be32 mtt_sz; /* Arbel only */ + u32 reserved[2]; +} ; +#pragma pack(pop) + +#define MTHCA_MPT_FLAG_SW_OWNS (0xfUL << 28) +#define MTHCA_MPT_FLAG_MIO (1 << 17) +#define MTHCA_MPT_FLAG_BIND_ENABLE (1 << 15) +#define MTHCA_MPT_FLAG_PHYSICAL (1 << 9) +#define MTHCA_MPT_FLAG_REGION (1 << 8) + +#define MTHCA_MTT_FLAG_PRESENT 1 + +#define MTHCA_MPT_STATUS_SW 0xF0 +#define MTHCA_MPT_STATUS_HW 0x00 + +#define SINAI_FMR_KEY_INC 0x1000000 + +static void dump_mtt(u32 print_lvl, __be64 *mtt_entry ,int list_len) +{ + int i; + UNUSED_PARAM_WOWPP(mtt_entry); // for release version + UNUSED_PARAM_WOWPP(print_lvl); + HCA_PRINT(print_lvl ,HCA_DBG_MEMORY ,("Dumping MTT entry len %d :\n",list_len)); + for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; i=i+4) { + HCA_PRINT(print_lvl ,HCA_DBG_MEMORY ,("[%02x] %016I64x %016I64x %016I64x %016I64x\n",i, + cl_ntoh64(mtt_entry[i]), + cl_ntoh64(mtt_entry[i+1]), + cl_ntoh64(mtt_entry[i+2]), + cl_ntoh64(mtt_entry[i+3]))); + } +} + + +static void dump_mpt(u32 print_lvl, struct mthca_mpt_entry *mpt_entry ) +{ + int i; + UNUSED_PARAM_WOWPP(mpt_entry); // for release version + UNUSED_PARAM_WOWPP(print_lvl); + HCA_PRINT(print_lvl ,HCA_DBG_MEMORY ,("Dumping MPT entry %08x :\n", mpt_entry->key)); + for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; i=i+4) { + HCA_PRINT(print_lvl ,HCA_DBG_MEMORY ,("[%02x] %08x %08x %08x %08x \n",i, + cl_ntoh32(((__be32 *) mpt_entry)[i]), + cl_ntoh32(((__be32 *) mpt_entry)[i+1]), + cl_ntoh32(((__be32 *) mpt_entry)[i+2]), + cl_ntoh32(((__be32 *) mpt_entry)[i+3]))); + } +} + + + + + + + + +/* + * Buddy allocator for MTT segments (currently not very efficient + * since it doesn't keep a free list and just searches linearly + * through the bitmaps) + */ + +static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order) +{ + int o; + u32 m; + u32 seg; + SPIN_LOCK_PREP(lh); + + spin_lock(&buddy->lock, &lh); + + for (o = order; o <= buddy->max_order; ++o) { + m = 1 << (buddy->max_order - o); + seg = find_first_bit(buddy->bits[o], m); + if (seg < m) + goto found; + } + + spin_unlock(&lh); + return (u32)-1; + + found: + clear_bit(seg, (long*)buddy->bits[o]); + + while (o > order) { + --o; + seg <<= 1; + set_bit(seg ^ 1, (long*)buddy->bits[o]); + } + + spin_unlock(&lh); + + seg <<= order; + + return seg; +} + +static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order) +{ + SPIN_LOCK_PREP(lh); + + seg >>= order; + + spin_lock(&buddy->lock, &lh); + + while (test_bit(seg ^ 1, buddy->bits[order])) { + clear_bit(seg ^ 1, (long*)buddy->bits[order]); + seg >>= 1; + ++order; + } + + set_bit(seg, (long*)buddy->bits[order]); + + spin_unlock(&lh); +} + +static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order) +{ + int i, s; + + buddy->max_order = max_order; + spin_lock_init(&buddy->lock); + + buddy->bits = kmalloc((buddy->max_order + 1) * sizeof (long *), + GFP_KERNEL); + if (!buddy->bits) + goto err_out; + + RtlZeroMemory(buddy->bits, (buddy->max_order + 1) * sizeof (long *)); + + for (i = 0; i <= buddy->max_order; ++i) { + s = BITS_TO_LONGS(1 << (buddy->max_order - i)); + buddy->bits[i] = kmalloc(s * sizeof (long), GFP_KERNEL); + if (!buddy->bits[i]) + goto err_out_free; + bitmap_zero(buddy->bits[i], + 1 << (buddy->max_order - i)); + } + + set_bit(0, (long*)buddy->bits[buddy->max_order]); + + return 0; + +err_out_free: + for (i = 0; i <= buddy->max_order; ++i) + kfree(buddy->bits[i]); + + kfree(buddy->bits); + +err_out: + return -ENOMEM; +} + +static void mthca_buddy_cleanup(struct mthca_buddy *buddy) +{ + int i; + + for (i = 0; i <= buddy->max_order; ++i) + kfree(buddy->bits[i]); + + kfree(buddy->bits); +} + +static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order, + struct mthca_buddy *buddy) +{ + u32 seg = mthca_buddy_alloc(buddy, order); + + if (seg == -1) + return (u32)-1; + + if (mthca_is_memfree(dev)) + if (mthca_table_get_range(dev, dev->mr_table.mtt_table, seg, + seg + (1 << order) - 1)) { + mthca_buddy_free(buddy, seg, order); + seg = (u32)-1; + } + + return seg; +} + +static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size, + struct mthca_buddy *buddy) +{ + struct mthca_mtt *mtt; + int i; + HCA_ENTER(HCA_DBG_MEMORY); + if (size <= 0) + return ERR_PTR(-EINVAL); + + mtt = kmalloc(sizeof *mtt, GFP_KERNEL); + if (!mtt) + return ERR_PTR(-ENOMEM); + + mtt->buddy = buddy; + mtt->order = 0; + for (i = MTHCA_MTT_SEG_SIZE / 8; i < size; i <<= 1) + ++mtt->order; + + mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy); + if (mtt->first_seg == -1) { + kfree(mtt); + return ERR_PTR(-ENOMEM); + } + HCA_EXIT(HCA_DBG_MEMORY); + return mtt; +} + +struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size) +{ + return __mthca_alloc_mtt(dev, size, &dev->mr_table.mtt_buddy); +} + +void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt) +{ + if (!mtt) + return; + + mthca_buddy_free(mtt->buddy, mtt->first_seg, mtt->order); + + mthca_table_put_range(dev, dev->mr_table.mtt_table, + mtt->first_seg, + mtt->first_seg + (1 << mtt->order) - 1); + + kfree(mtt); +} + +int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, + int start_index, u64 *buffer_list, int list_len) +{ + struct mthca_mailbox *mailbox; + __be64 *mtt_entry; + int err = 0; + u8 status; + int i; + u64 val = 1; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + mtt_entry = mailbox->buf; + + while (list_len > 0) { + val = dev->mr_table.mtt_base + + mtt->first_seg * MTHCA_MTT_SEG_SIZE + start_index * 8; + //TODO: a workaround of bug in _byteswap_uint64 + // in release version optimizer puts the above expression into the function call and generates incorrect code + // so we call the macro to work around that + mtt_entry[0] = CL_HTON64(val); + mtt_entry[1] = 0; + for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i) { + val = buffer_list[i]; + // BUG in compiler: it can't perform OR on u64 !!! We perform OR on the low dword + *(PULONG)&val |= MTHCA_MTT_FLAG_PRESENT; + mtt_entry[i + 2] = cl_hton64(val); + } + + /* + * If we have an odd number of entries to write, add + * one more dummy entry for firmware efficiency. + */ + if (i & 1) + mtt_entry[i + 2] = 0; + + dump_mtt(TRACE_LEVEL_VERBOSE, mtt_entry ,i); + + err = mthca_WRITE_MTT(dev, mailbox, (i + 1) & ~1, &status); + if (err) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_MEMORY ,("WRITE_MTT failed (%d)\n", err)); + goto out; + } + if (status) { + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_MEMORY,("WRITE_MTT returned status 0x%02x\n", + status)); + err = -EINVAL; + goto out; + } + + list_len -= i; + start_index += i; + buffer_list += i; + } + +out: + mthca_free_mailbox(dev, mailbox); + return err; +} + +static inline u32 tavor_hw_index_to_key(u32 ind) +{ + return ind; +} + +static inline u32 tavor_key_to_hw_index(u32 key) +{ + return key; +} + +static inline u32 arbel_hw_index_to_key(u32 ind) +{ + return (ind >> 24) | (ind << 8); +} + +static inline u32 arbel_key_to_hw_index(u32 key) +{ + return (key << 24) | (key >> 8); +} + +static inline u32 hw_index_to_key(struct mthca_dev *dev, u32 ind) +{ + if (mthca_is_memfree(dev)) + return arbel_hw_index_to_key(ind); + else + return tavor_hw_index_to_key(ind); +} + +static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key) +{ + if (mthca_is_memfree(dev)) + return arbel_key_to_hw_index(key); + else + return tavor_key_to_hw_index(key); +} + + +static inline u32 adjust_key(struct mthca_dev *dev, u32 key) +{ + if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT) + return ((key << 20) & 0x800000) | (key & 0x7fffff); + else + return key; +} + +int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, + u64 iova, u64 total_size, mthca_mpt_access_t access, struct mthca_mr *mr) +{ + struct mthca_mailbox *mailbox; + struct mthca_mpt_entry *mpt_entry; + u32 key; + int err; + u8 status; + CPU_2_BE64_PREP; + + WARN_ON(buffer_size_shift >= 32); + + key = mthca_alloc(&dev->mr_table.mpt_alloc); + if (key == -1) + return -ENOMEM; + mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key); + + if (mthca_is_memfree(dev)) { + err = mthca_table_get(dev, dev->mr_table.mpt_table, key); + if (err) + goto err_out_mpt_free; + } + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); + goto err_out_table; + } + mpt_entry = mailbox->buf; + + mpt_entry->flags = cl_hton32(MTHCA_MPT_FLAG_SW_OWNS | + MTHCA_MPT_FLAG_MIO | + MTHCA_MPT_FLAG_REGION | + access); + if (!mr->mtt) + mpt_entry->flags |= cl_hton32(MTHCA_MPT_FLAG_PHYSICAL); + + mpt_entry->page_size = cl_hton32(buffer_size_shift - 12); + mpt_entry->key = cl_hton32(key); + mpt_entry->pd = cl_hton32(pd); + mpt_entry->start = cl_hton64(iova); + mpt_entry->length = cl_hton64(total_size); + + RtlZeroMemory(&mpt_entry->lkey, + sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey)); + + if (mr->mtt) + mpt_entry->mtt_seg = + CPU_2_BE64(dev->mr_table.mtt_base + + mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE); + + { + dump_mpt(TRACE_LEVEL_VERBOSE, mpt_entry); + } + + err = mthca_SW2HW_MPT(dev, mailbox, + key & (dev->limits.num_mpts - 1), + &status); + if (err) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_MEMORY ,("SW2HW_MPT failed (%d)\n", err)); + goto err_out_mailbox; + } else if (status) { + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_MEMORY,("SW2HW_MPT returned status 0x%02x\n", + status)); + err = -EINVAL; + goto err_out_mailbox; + } + + mthca_free_mailbox(dev, mailbox); + return err; + +err_out_mailbox: + mthca_free_mailbox(dev, mailbox); + +err_out_table: + mthca_table_put(dev, dev->mr_table.mpt_table, key); + +err_out_mpt_free: + mthca_free(&dev->mr_table.mpt_alloc, key); + return err; +} + +int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd, + mthca_mpt_access_t access, struct mthca_mr *mr) +{ + mr->mtt = NULL; + return mthca_mr_alloc(dev, pd, 12, 0, ~0Ui64, access, mr); +} + +int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd, + u64 *buffer_list, int buffer_size_shift, + int list_len, u64 iova, u64 total_size, + mthca_mpt_access_t access, struct mthca_mr *mr) +{ + int err; + HCA_ENTER(HCA_DBG_MEMORY); + mr->mtt = mthca_alloc_mtt(dev, list_len); + if (IS_ERR(mr->mtt)){ + err= PTR_ERR(mr->mtt); + goto out; + } + + err = mthca_write_mtt(dev, mr->mtt, 0, buffer_list, list_len); + if (err) { + mthca_free_mtt(dev, mr->mtt); + goto out; + } + + err = mthca_mr_alloc(dev, pd, buffer_size_shift, iova, + total_size, access, mr); + if (err) + mthca_free_mtt(dev, mr->mtt); + +out: + HCA_EXIT(HCA_DBG_MEMORY); + return err; +} + +/* Free mr or fmr */ +static void mthca_free_region(struct mthca_dev *dev, u32 lkey) +{ + mthca_table_put(dev, dev->mr_table.mpt_table, key_to_hw_index(dev, lkey)); + mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey)); +} + +void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr) +{ + int err; + u8 status; + + err = mthca_HW2SW_MPT(dev, NULL, + key_to_hw_index(dev, mr->ibmr.lkey) & + (dev->limits.num_mpts - 1), + &status); + if (err){ + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_MEMORY ,("HW2SW_MPT failed (%d)\n", err)); + }else if (status){ + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_MEMORY,("HW2SW_MPT returned status 0x%02x\n", + status)); + } + + mthca_free_region(dev, mr->ibmr.lkey); + mthca_free_mtt(dev, mr->mtt); +} + +int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, + mthca_mpt_access_t access, struct mthca_fmr *fmr) +{ + struct mthca_mpt_entry *mpt_entry; + struct mthca_mailbox *mailbox; + u64 mtt_seg; + u32 key, idx; + u8 status; + int list_len = fmr->attr.max_pages; + int err = -ENOMEM; + int i; + CPU_2_BE64_PREP; + + if (fmr->attr.page_shift < 12 || fmr->attr.page_shift >= 32) + return -EINVAL; + + /* For Arbel, all MTTs must fit in the same page. */ + if (mthca_is_memfree(dev) && + fmr->attr.max_pages * sizeof *fmr->mem.arbel.mtts > PAGE_SIZE) + return -EINVAL; + + fmr->maps = 0; + + key = mthca_alloc(&dev->mr_table.mpt_alloc); + if (key == -1) + return -ENOMEM; + key = adjust_key(dev, key); + + idx = key & (dev->limits.num_mpts - 1); + fmr->ibfmr.rkey = fmr->ibfmr.lkey = hw_index_to_key(dev, key); + + if (mthca_is_memfree(dev)) { + err = mthca_table_get(dev, dev->mr_table.mpt_table, key); + if (err) + goto err_out_mpt_free; + + fmr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key); + BUG_ON(!fmr->mem.arbel.mpt); + } else + fmr->mem.tavor.mpt = (struct mthca_mpt_entry*)((u8*)dev->mr_table.tavor_fmr.mpt_base + + sizeof *(fmr->mem.tavor.mpt) * idx); + + fmr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy); + if (IS_ERR(fmr->mtt)) + goto err_out_table; + + mtt_seg =fmr->mtt->first_seg * MTHCA_MTT_SEG_SIZE; + + if (mthca_is_memfree(dev)) { + fmr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table, + fmr->mtt->first_seg); + BUG_ON(!fmr->mem.arbel.mtts); + } else + fmr->mem.tavor.mtts = (u64*)((u8*)dev->mr_table.tavor_fmr.mtt_base + mtt_seg); + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + goto err_out_free_mtt; + + mpt_entry = mailbox->buf; + + mpt_entry->flags = cl_hton32(MTHCA_MPT_FLAG_SW_OWNS | + MTHCA_MPT_FLAG_MIO | + MTHCA_MPT_FLAG_REGION | + access); + + mpt_entry->page_size = cl_hton32(fmr->attr.page_shift - 12); + mpt_entry->key = cl_hton32(key); + mpt_entry->pd = cl_hton32(pd); + RtlZeroMemory(&mpt_entry->start, + sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, start)); + mpt_entry->mtt_seg = CPU_2_BE64(dev->mr_table.mtt_base + mtt_seg); + + { + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("Dumping MPT entry %08x:\n", fmr->ibfmr.lkey)); + for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; i=i+4) { + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("[%02x] %08x %08x %08x %08x \n",i, + cl_ntoh32(((__be32 *) mpt_entry)[i]), + cl_ntoh32(((__be32 *) mpt_entry)[i+1]), + cl_ntoh32(((__be32 *) mpt_entry)[i+2]), + cl_ntoh32(((__be32 *) mpt_entry)[i+3]))); + } + } + + err = mthca_SW2HW_MPT(dev, mailbox, + key & (dev->limits.num_mpts - 1), + &status); + + if (err) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_MEMORY ,("SW2HW_MPT failed (%d)\n", err)); + goto err_out_mailbox_free; + } + if (status) { + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_MEMORY,("SW2HW_MPT returned status 0x%02x\n", + status)); + err = -EINVAL; + goto err_out_mailbox_free; + } + + mthca_free_mailbox(dev, mailbox); + return 0; + +err_out_mailbox_free: + mthca_free_mailbox(dev, mailbox); + +err_out_free_mtt: + mthca_free_mtt(dev, fmr->mtt); + +err_out_table: + mthca_table_put(dev, dev->mr_table.mpt_table, key); + +err_out_mpt_free: + mthca_free(&dev->mr_table.mpt_alloc, fmr->ibfmr.lkey); + return err; +} + + +int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr) +{ + if (fmr->maps) + return -EBUSY; + + mthca_free_region(dev, fmr->ibfmr.lkey); + mthca_free_mtt(dev, fmr->mtt); + + return 0; +} + + +static inline int mthca_check_fmr(struct mthca_fmr *fmr, u64 *page_list, + int list_len, u64 iova) +{ + int page_mask; + UNREFERENCED_PARAMETER(page_list); + + if (list_len > fmr->attr.max_pages) + return -EINVAL; + + page_mask = (1 << fmr->attr.page_shift) - 1; + + /* We are getting page lists, so va must be page aligned. */ + if (iova & page_mask) + return -EINVAL; + + /* Trust the user not to pass misaligned data in page_list */ + #if 0 + for (i = 0; i < list_len; ++i) { + if (page_list[i] & ~page_mask) + return -EINVAL; + } + #endif + + if (fmr->maps >= fmr->attr.max_maps) + return -EINVAL; + + return 0; +} + + +int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, + int list_len, u64 iova) +{ + struct mthca_fmr *fmr = to_mfmr(ibfmr); + struct mthca_dev *dev = to_mdev(ibfmr->device); + struct mthca_mpt_entry mpt_entry; + u32 key; + int i, err; + CPU_2_BE64_PREP; + + err = mthca_check_fmr(fmr, page_list, list_len, iova); + if (err) + return err; + + ++fmr->maps; + + key = tavor_key_to_hw_index(fmr->ibfmr.lkey); + key += dev->limits.num_mpts; + fmr->ibfmr.lkey = fmr->ibfmr.rkey = tavor_hw_index_to_key(key); + + writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt); + + for (i = 0; i < list_len; ++i) { + __be64 mtt_entry; + u64 val = page_list[i]; + // BUG in compiler: it can't perform OR on u64 !!! We perform OR on the low dword + *(PULONG)&val |= MTHCA_MTT_FLAG_PRESENT; + mtt_entry = cl_hton64(val); + mthca_write64_raw(mtt_entry, fmr->mem.tavor.mtts + i); + } + + mpt_entry.lkey = cl_hton32(key); + mpt_entry.length = CPU_2_BE64(list_len * (1Ui64 << fmr->attr.page_shift)); + mpt_entry.start = cl_hton64(iova); + + __raw_writel((u32) mpt_entry.lkey, &fmr->mem.tavor.mpt->key); + memcpy_toio(&fmr->mem.tavor.mpt->start, &mpt_entry.start, + offsetof(struct mthca_mpt_entry, window_count) - + offsetof(struct mthca_mpt_entry, start)); + + writeb(MTHCA_MPT_STATUS_HW, fmr->mem.tavor.mpt); + + return 0; +} + +int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, + int list_len, u64 iova) +{ + struct mthca_fmr *fmr = to_mfmr(ibfmr); + struct mthca_dev *dev = to_mdev(ibfmr->device); + u32 key; + int i, err; + CPU_2_BE64_PREP; + + err = mthca_check_fmr(fmr, page_list, list_len, iova); + if (err) + return err; + + ++fmr->maps; + + key = arbel_key_to_hw_index(fmr->ibfmr.lkey); + if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT) + key += SINAI_FMR_KEY_INC; + else + key += dev->limits.num_mpts; + fmr->ibfmr.lkey = fmr->ibfmr.rkey = arbel_hw_index_to_key(key); + + *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW; + + wmb(); + + for (i = 0; i < list_len; ++i) { + // BUG in compiler: it can't perform OR on u64 !!! We perform OR on the low dword + u64 val = page_list[i]; + *(PULONG)&val |= MTHCA_MTT_FLAG_PRESENT; + fmr->mem.arbel.mtts[i] = cl_hton64(val); + } + + fmr->mem.arbel.mpt->key = cl_hton32(key); + fmr->mem.arbel.mpt->lkey = cl_hton32(key); + fmr->mem.arbel.mpt->length = CPU_2_BE64(list_len * (1Ui64 << fmr->attr.page_shift)); + fmr->mem.arbel.mpt->start = cl_hton64(iova); + + wmb(); + + *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_HW; + + wmb(); + + return 0; +} + + +void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr) +{ + u32 key; + + if (!fmr->maps) + return; + + key = tavor_key_to_hw_index(fmr->ibfmr.lkey); + key &= dev->limits.num_mpts - 1; + fmr->ibfmr.lkey = fmr->ibfmr.rkey = tavor_hw_index_to_key(key); + + fmr->maps = 0; + + writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt); +} + + +void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr) +{ + u32 key; + + if (!fmr->maps) + return; + + key = arbel_key_to_hw_index(fmr->ibfmr.lkey); + key &= dev->limits.num_mpts - 1; + fmr->ibfmr.lkey = fmr->ibfmr.rkey = arbel_hw_index_to_key(key); + + fmr->maps = 0; + + *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW; +} + +int mthca_init_mr_table(struct mthca_dev *dev) +{ + int err, i; + + err = mthca_alloc_init(&dev->mr_table.mpt_alloc, + (u32)dev->limits.num_mpts, + (u32)~0, (u32)dev->limits.reserved_mrws); + if (err) + return err; + + if (!mthca_is_memfree(dev) && + (dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN)) + dev->limits.fmr_reserved_mtts = 0; + else + dev->mthca_flags |= MTHCA_FLAG_FMR; + + if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT) + HCA_PRINT(TRACE_LEVEL_INFORMATION ,HCA_DBG_MEMORY ,("Memory key throughput optimization activated.\n")); + + err = mthca_buddy_init(&dev->mr_table.mtt_buddy, + fls(dev->limits.num_mtt_segs - 1)); + + if (err) + goto err_mtt_buddy; + + dev->mr_table.tavor_fmr.mpt_base = NULL; + dev->mr_table.tavor_fmr.mtt_base = NULL; + + if (dev->limits.fmr_reserved_mtts) { + i = fls(dev->limits.fmr_reserved_mtts - 1); + + if (i >= 31) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_MEMORY ,("Unable to reserve 2^31 FMR MTTs.\n")); + err = -EINVAL; + goto err_fmr_mpt; + } + + dev->mr_table.tavor_fmr.mpt_base = + ioremap(dev->mr_table.mpt_base, + (1 << i) * sizeof (struct mthca_mpt_entry), + &dev->mr_table.tavor_fmr.mpt_base_size); + + if (!dev->mr_table.tavor_fmr.mpt_base) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_MEMORY ,("MPT ioremap for FMR failed.\n")); + err = -ENOMEM; + goto err_fmr_mpt; + } + + dev->mr_table.tavor_fmr.mtt_base = + ioremap(dev->mr_table.mtt_base, + (1 << i) * MTHCA_MTT_SEG_SIZE, + &dev->mr_table.tavor_fmr.mtt_base_size ); + + if (!dev->mr_table.tavor_fmr.mtt_base) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_MEMORY ,("MTT ioremap for FMR failed.\n")); + err = -ENOMEM; + goto err_fmr_mtt; + } + + err = mthca_buddy_init(&dev->mr_table.tavor_fmr.mtt_buddy, i); + if (err) + goto err_fmr_mtt_buddy; + + /* Prevent regular MRs from using FMR keys */ + err = mthca_buddy_alloc(&dev->mr_table.mtt_buddy, i); + if (err) + goto err_reserve_fmr; + + dev->mr_table.fmr_mtt_buddy = + &dev->mr_table.tavor_fmr.mtt_buddy; + } else + dev->mr_table.fmr_mtt_buddy = &dev->mr_table.mtt_buddy; + + /* FMR table is always the first, take reserved MTTs out of there */ + if (dev->limits.reserved_mtts) { + i = fls(dev->limits.reserved_mtts - 1); + + if (mthca_alloc_mtt_range(dev, i, + dev->mr_table.fmr_mtt_buddy) == -1) { + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_MEMORY,("MTT table of order %d is too small.\n", + dev->mr_table.fmr_mtt_buddy->max_order)); + err = -ENOMEM; + goto err_reserve_mtts; + } + } + + return 0; + +err_reserve_mtts: +err_reserve_fmr: + if (dev->limits.fmr_reserved_mtts) + mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy); + +err_fmr_mtt_buddy: + if (dev->mr_table.tavor_fmr.mtt_base) + iounmap(dev->mr_table.tavor_fmr.mtt_base, + dev->mr_table.tavor_fmr.mtt_base_size); + +err_fmr_mtt: + if (dev->mr_table.tavor_fmr.mpt_base) + iounmap(dev->mr_table.tavor_fmr.mpt_base, + dev->mr_table.tavor_fmr.mpt_base_size); + +err_fmr_mpt: + mthca_buddy_cleanup(&dev->mr_table.mtt_buddy); + +err_mtt_buddy: + mthca_alloc_cleanup(&dev->mr_table.mpt_alloc); + + return err; +} + +void mthca_cleanup_mr_table(struct mthca_dev *dev) +{ + /* XXX check if any MRs are still allocated? */ + if (dev->limits.fmr_reserved_mtts) + mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy); + + mthca_buddy_cleanup(&dev->mr_table.mtt_buddy); + + if (dev->mr_table.tavor_fmr.mtt_base) + iounmap(dev->mr_table.tavor_fmr.mtt_base, + dev->mr_table.tavor_fmr.mtt_base_size); + if (dev->mr_table.tavor_fmr.mpt_base) + iounmap(dev->mr_table.tavor_fmr.mpt_base, + dev->mr_table.tavor_fmr.mpt_base_size); + + mthca_alloc_cleanup(&dev->mr_table.mpt_alloc); +} + + diff --git a/branches/IBFD/hw/mthca/kernel/mthca_pd.c b/branches/IBFD/hw/mthca/kernel/mthca_pd.c new file mode 100644 index 00000000..a39dc401 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca_pd.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include "mthca_dev.h" + + +#ifdef ALLOC_PRAGMA +#pragma alloc_text (PAGE, mthca_init_pd_table) +#pragma alloc_text (PAGE, mthca_cleanup_pd_table) +#endif + +int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd) +{ + int err = 0; + + pd->privileged = privileged; + + atomic_set(&pd->sqp_count, 0); + pd->pd_num = mthca_alloc(&dev->pd_table.alloc); + if (pd->pd_num == -1) + return -ENOMEM; + + if (privileged) { + err = mthca_mr_alloc_notrans(dev, pd->pd_num, + MTHCA_MPT_FLAG_LOCAL_READ | + MTHCA_MPT_FLAG_LOCAL_WRITE, + &pd->ntmr); + if (err) + mthca_free(&dev->pd_table.alloc, pd->pd_num); + } + + return err; +} + +void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd) +{ + if (pd->privileged) + mthca_free_mr(dev, &pd->ntmr); + mthca_free(&dev->pd_table.alloc, pd->pd_num); +} + +int mthca_init_pd_table(struct mthca_dev *dev) +{ + return mthca_alloc_init(&dev->pd_table.alloc, + dev->limits.num_pds, + (1 << 24) - 1, + dev->limits.reserved_pds); +} + +void mthca_cleanup_pd_table(struct mthca_dev *dev) +{ + /* XXX check if any PDs are still allocated? */ + mthca_alloc_cleanup(&dev->pd_table.alloc); +} + diff --git a/branches/IBFD/hw/mthca/kernel/mthca_profile.c b/branches/IBFD/hw/mthca/kernel/mthca_profile.c new file mode 100644 index 00000000..873c1e51 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca_profile.c @@ -0,0 +1,286 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + + +#include "mthca_profile.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_profile.tmh" +#endif + +enum { + MTHCA_RES_QP, + MTHCA_RES_EEC, + MTHCA_RES_SRQ, + MTHCA_RES_CQ, + MTHCA_RES_EQP, + MTHCA_RES_EEEC, + MTHCA_RES_EQ, + MTHCA_RES_RDB, + MTHCA_RES_MCG, + MTHCA_RES_MPT, + MTHCA_RES_MTT, + MTHCA_RES_UAR, + MTHCA_RES_UDAV, + MTHCA_RES_UARC, + MTHCA_RES_NUM +}; + +enum { + MTHCA_NUM_EQS = 32, + MTHCA_NUM_PDS = 1 << 15 +}; + +u64 mthca_make_profile(struct mthca_dev *dev, + struct mthca_profile *request, + struct mthca_dev_lim *dev_lim, + struct mthca_init_hca_param *init_hca) +{ + struct mthca_resource { + u64 size; + u64 start; + int type; + int num; + int log_num; + }; + + u64 mem_base, mem_avail; + u64 total_size = 0; + struct mthca_resource *profile; + struct mthca_resource tmp; + int i, j; + + profile = kmalloc(MTHCA_RES_NUM * sizeof *profile, GFP_KERNEL); + if (!profile) + return (u64)-ENOMEM; + + RtlZeroMemory(profile, MTHCA_RES_NUM * sizeof *profile); + + profile[MTHCA_RES_QP].size = dev_lim->qpc_entry_sz; + profile[MTHCA_RES_EEC].size = dev_lim->eec_entry_sz; + profile[MTHCA_RES_SRQ].size = dev_lim->srq_entry_sz; + profile[MTHCA_RES_CQ].size = dev_lim->cqc_entry_sz; + profile[MTHCA_RES_EQP].size = dev_lim->eqpc_entry_sz; + profile[MTHCA_RES_EEEC].size = dev_lim->eeec_entry_sz; + profile[MTHCA_RES_EQ].size = dev_lim->eqc_entry_sz; + profile[MTHCA_RES_RDB].size = MTHCA_RDB_ENTRY_SIZE; + profile[MTHCA_RES_MCG].size = MTHCA_MGM_ENTRY_SIZE; + profile[MTHCA_RES_MPT].size = dev_lim->mpt_entry_sz; + profile[MTHCA_RES_MTT].size = MTHCA_MTT_SEG_SIZE; + profile[MTHCA_RES_UAR].size = dev_lim->uar_scratch_entry_sz; + profile[MTHCA_RES_UDAV].size = MTHCA_AV_SIZE; + profile[MTHCA_RES_UARC].size = request->uarc_size; + + profile[MTHCA_RES_QP].num = request->num_qp; + profile[MTHCA_RES_SRQ].num = request->num_srq; + profile[MTHCA_RES_EQP].num = request->num_qp; + profile[MTHCA_RES_RDB].num = request->num_qp * request->rdb_per_qp; + profile[MTHCA_RES_CQ].num = request->num_cq; + profile[MTHCA_RES_EQ].num = MTHCA_NUM_EQS; + profile[MTHCA_RES_MCG].num = request->num_mcg; + profile[MTHCA_RES_MPT].num = request->num_mpt; + profile[MTHCA_RES_MTT].num = request->num_mtt; + profile[MTHCA_RES_UAR].num = request->num_uar; + profile[MTHCA_RES_UARC].num = request->num_uar; + profile[MTHCA_RES_UDAV].num = request->num_udav; + + for (i = 0; i < MTHCA_RES_NUM; ++i) { + profile[i].type = i; + profile[i].log_num = max(ffs(profile[i].num) - 1, 0); + profile[i].size *= profile[i].num; + if (mthca_is_memfree(dev)) + profile[i].size = max(profile[i].size, (u64) PAGE_SIZE); + } + + if (mthca_is_memfree(dev)) { + mem_base = 0; + mem_avail = dev_lim->hca.arbel.max_icm_sz; + } else { + mem_base = dev->ddr_start; + mem_avail = dev->fw.tavor.fw_start - dev->ddr_start; + } + + /* + * Sort the resources in decreasing order of size. Since they + * all have sizes that are powers of 2, we'll be able to keep + * resources aligned to their size and pack them without gaps + * using the sorted order. + */ + for (i = MTHCA_RES_NUM; i > 0; --i) + for (j = 1; j < i; ++j) { + if (profile[j].size > profile[j - 1].size) { + tmp = profile[j]; + profile[j] = profile[j - 1]; + profile[j - 1] = tmp; + } + } + + for (i = 0; i < MTHCA_RES_NUM; ++i) { + if (profile[i].size) { + profile[i].start = mem_base + total_size; + total_size += profile[i].size; + } + if (total_size > mem_avail) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_LOW,("Profile requires 0x%I64x bytes; " + "won't in 0x%I64x bytes of context memory.\n", + (u64) total_size, + (u64) mem_avail)); + kfree(profile); + return (u64)-ENOMEM; + } + + if (profile[i].size) + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("profile[%2d]--%2d/%2d @ 0x%16I64x " + "(size 0x%8I64x)\n", + i, profile[i].type, profile[i].log_num, + (u64) profile[i].start, + (u64) profile[i].size)); + } + + if (mthca_is_memfree(dev)){ + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("HCA context memory: reserving %d KB\n", + (int) (total_size >> 10))); + }else{ + HCA_PRINT(TRACE_LEVEL_VERBOSE,HCA_DBG_LOW,("HCA memory: allocated %d KB/%d KB (%d KB free)\n", + (int) (total_size >> 10), (int) (mem_avail >> 10), + (int) ((mem_avail - total_size) >> 10))); + } + for (i = 0; i < MTHCA_RES_NUM; ++i) { + int mc_entry_sz = MTHCA_MGM_ENTRY_SIZE; + int mtt_seg_sz = MTHCA_MTT_SEG_SIZE; + + switch (profile[i].type) { + case MTHCA_RES_QP: + dev->limits.num_qps = profile[i].num; + init_hca->qpc_base = profile[i].start; + init_hca->log_num_qps = (u8)profile[i].log_num; + break; + case MTHCA_RES_EEC: + dev->limits.num_eecs = profile[i].num; + init_hca->eec_base = profile[i].start; + init_hca->log_num_eecs = (u8)profile[i].log_num; + break; + case MTHCA_RES_SRQ: + dev->limits.num_srqs = profile[i].num; + init_hca->srqc_base = profile[i].start; + init_hca->log_num_srqs = (u8)profile[i].log_num; + break; + case MTHCA_RES_CQ: + dev->limits.num_cqs = profile[i].num; + init_hca->cqc_base = profile[i].start; + init_hca->log_num_cqs = (u8)profile[i].log_num; + break; + case MTHCA_RES_EQP: + init_hca->eqpc_base = profile[i].start; + break; + case MTHCA_RES_EEEC: + init_hca->eeec_base = profile[i].start; + break; + case MTHCA_RES_EQ: + dev->limits.num_eqs = profile[i].num; + init_hca->eqc_base = profile[i].start; + init_hca->log_num_eqs = (u8)profile[i].log_num; + break; + case MTHCA_RES_RDB: + for (dev->qp_table.rdb_shift = 0; + request->num_qp << dev->qp_table.rdb_shift < profile[i].num; + ++dev->qp_table.rdb_shift) + ; /* nothing */ + dev->qp_table.rdb_base = (u32) profile[i].start; + init_hca->rdb_base = profile[i].start; + break; + case MTHCA_RES_MCG: + dev->limits.num_mgms = profile[i].num >> 1; + dev->limits.num_amgms = profile[i].num >> 1; + init_hca->mc_base = profile[i].start; + init_hca->log_mc_entry_sz = (u16)(ffs(mc_entry_sz) - 1); + init_hca->log_mc_table_sz = (u8)profile[i].log_num; + init_hca->mc_hash_sz = (u16)(1 << (profile[i].log_num - 1)); + break; + case MTHCA_RES_MPT: + dev->limits.num_mpts = profile[i].num; + dev->mr_table.mpt_base = profile[i].start; + init_hca->mpt_base = profile[i].start; + init_hca->log_mpt_sz = (u8)profile[i].log_num; + break; + case MTHCA_RES_MTT: + dev->limits.num_mtt_segs = profile[i].num; + dev->mr_table.mtt_base = profile[i].start; + init_hca->mtt_base = profile[i].start; + init_hca->mtt_seg_sz = (u8)(ffs(mtt_seg_sz) - 7); + break; + case MTHCA_RES_UAR: + dev->limits.num_uars = profile[i].num; + init_hca->uar_scratch_base = profile[i].start; + break; + case MTHCA_RES_UDAV: + dev->av_table.ddr_av_base = profile[i].start; + dev->av_table.num_ddr_avs = profile[i].num; + break; + case MTHCA_RES_UARC: + dev->uar_table.uarc_size = request->uarc_size; + dev->uar_table.uarc_base = profile[i].start; + init_hca->uarc_base = profile[i].start; + init_hca->log_uarc_sz = (u8)(ffs(request->uarc_size) - 13); + init_hca->log_uar_sz = (u8)(ffs(request->num_uar) - 1); + break; + default: + break; + } + } + + /* + * PDs don't take any HCA memory, but we assign them as part + * of the HCA profile anyway. + */ + dev->limits.num_pds = MTHCA_NUM_PDS; + + /* + * For Tavor, FMRs use ioremapped PCI memory. For 32 bit + * systems it may use too much vmalloc space to map all MTT + * memory, so we reserve some MTTs for FMR access, taking them + * out of the MR pool. They don't use additional memory, but + * we assign them as part of the HCA profile anyway. + */ + if (mthca_is_memfree(dev)) + dev->limits.fmr_reserved_mtts = 0; + else + dev->limits.fmr_reserved_mtts = request->fmr_reserved_mtts; + + kfree(profile); + return total_size; +} diff --git a/branches/IBFD/hw/mthca/kernel/mthca_profile.h b/branches/IBFD/hw/mthca/kernel/mthca_profile.h new file mode 100644 index 00000000..f1887c58 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca_profile.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#ifndef MTHCA_PROFILE_H +#define MTHCA_PROFILE_H + +#include "mthca_dev.h" +#include "mthca_cmd.h" + +struct mthca_profile { + int num_qp; + int rdb_per_qp; + int num_srq; + int num_cq; + int num_mcg; + int num_mpt; + int num_mtt; + int num_udav; + int num_uar; + int uarc_size; + int fmr_reserved_mtts; +}; + +u64 mthca_make_profile(struct mthca_dev *mdev, + struct mthca_profile *request, + struct mthca_dev_lim *dev_lim, + struct mthca_init_hca_param *init_hca); + +#endif /* MTHCA_PROFILE_H */ diff --git a/branches/IBFD/hw/mthca/kernel/mthca_provider.c b/branches/IBFD/hw/mthca/kernel/mthca_provider.c new file mode 100644 index 00000000..90979cd8 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca_provider.c @@ -0,0 +1,1327 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include + +#include "mx_abi.h" +#include "mthca_dev.h" +#include "mt_pa_cash.h" + +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_provider.tmh" +#endif +#include "mthca_cmd.h" +#include "mthca_memfree.h" + +static void init_query_mad(struct ib_smp *mad) +{ + mad->base_version = 1; + mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + mad->class_version = 1; + mad->method = IB_MGMT_METHOD_GET; +} + +int mthca_query_device(struct ib_device *ibdev, + struct ib_device_attr *props) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + struct mthca_dev* mdev = to_mdev(ibdev); + + u8 status; + + RtlZeroMemory(props, sizeof *props); + + if (mthca_is_livefish(mdev)) { + props->max_pd = 1; + props->vendor_id = mdev->ext->hcaConfig.VendorID; + props->vendor_part_id = mdev->ext->hcaConfig.DeviceID; + return 0; + } + + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; + + err = mthca_MAD_IFC(mdev, 1, 1, + 1, NULL, NULL, in_mad, out_mad, &status); + if (err) + goto out; + if (status) { + err = -EINVAL; + goto out; + } + + props->fw_ver = mdev->fw_ver; + props->device_cap_flags = mdev->device_cap_flags; + props->vendor_id = cl_ntoh32(*(__be32 *) (out_mad->data + 36)) & + 0xffffff; + props->vendor_part_id = cl_ntoh16(*(__be16 *) (out_mad->data + 30)); + props->hw_ver = cl_ntoh32(*(__be32 *) (out_mad->data + 32)); + memcpy(&props->sys_image_guid, out_mad->data + 4, 8); + props->max_mr_size = ~0Ui64; + props->page_size_cap = mdev->limits.page_size_cap; + props->max_qp = mdev->limits.num_qps - mdev->limits.reserved_qps; + props->max_qp_wr = mdev->limits.max_wqes; + props->max_sge = mdev->limits.max_sg; + props->max_cq = mdev->limits.num_cqs - mdev->limits.reserved_cqs; + props->max_cqe = mdev->limits.max_cqes; + props->max_mr = mdev->limits.num_mpts - mdev->limits.reserved_mrws; + props->max_pd = mdev->limits.num_pds - mdev->limits.reserved_pds; + props->max_qp_rd_atom = 1 << mdev->qp_table.rdb_shift; + props->max_qp_init_rd_atom = mdev->limits.max_qp_init_rdma; + props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; + props->max_srq = mdev->limits.num_srqs - mdev->limits.reserved_srqs; + props->max_srq_wr = mdev->limits.max_srq_wqes; + if (mthca_is_memfree(mdev)) + --props->max_srq_wr; + props->max_srq_sge = mdev->limits.max_srq_sge; + props->local_ca_ack_delay = (u8)mdev->limits.local_ca_ack_delay; + props->atomic_cap = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ? + IB_ATOMIC_LOCAL : IB_ATOMIC_NONE; + props->max_pkeys = (u16)mdev->limits.pkey_table_len; + props->max_mcast_grp = mdev->limits.num_mgms + mdev->limits.num_amgms; + props->max_mcast_qp_attach = MTHCA_QP_PER_MGM; + props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * + props->max_mcast_grp; + + /* + * If Sinai memory key optimization is being used, then only + * the 8-bit key portion will change. For other HCAs, the + * unused index bits will also be used for FMR remapping. + */ + if (mdev->mthca_flags & MTHCA_FLAG_SINAI_OPT) + props->max_map_per_fmr = 255; + else + props->max_map_per_fmr = + (1 << (32 - long_log2(mdev->limits.num_mpts))) - 1; + + err = 0; + out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +int mthca_query_port(struct ib_device *ibdev, + u8 port, struct ib_port_attr *props) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + u8 status; + + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; + in_mad->attr_mod = cl_hton32(port); + + err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1, + port, NULL, NULL, in_mad, out_mad, + &status); + if (err) + goto out; + if (status) { + err = -EINVAL; + goto out; + } + + RtlZeroMemory(props, sizeof *props); + props->lid = cl_ntoh16(*(__be16 *) (out_mad->data + 16)); + props->lmc = out_mad->data[34] & 0x7; + props->sm_lid = cl_ntoh16(*(__be16 *) (out_mad->data + 18)); + props->sm_sl = out_mad->data[36] & 0xf; + props->state = out_mad->data[32] & 0xf; + props->phys_state = out_mad->data[33] >> 4; + props->port_cap_flags = cl_ntoh32(*(__be32 *) (out_mad->data + 20)); + props->gid_tbl_len = to_mdev(ibdev)->limits.gid_table_len; + props->max_msg_sz = 0x80000000; + props->pkey_tbl_len = (u16)to_mdev(ibdev)->limits.pkey_table_len; + props->bad_pkey_cntr = cl_ntoh16(*(__be16 *) (out_mad->data + 46)); + props->qkey_viol_cntr = cl_ntoh16(*(__be16 *) (out_mad->data + 48)); + props->active_width = out_mad->data[31] & 0xf; + props->active_speed = out_mad->data[35] >> 4; + props->max_mtu = out_mad->data[41] & 0xf; + props->active_mtu = out_mad->data[36] >> 4; + props->subnet_timeout = out_mad->data[51] & 0x1f; + + out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +int mthca_modify_port(struct ib_device *ibdev, + u8 port, int port_modify_mask, + struct ib_port_modify *props) +{ + struct mthca_set_ib_param set_ib; + struct ib_port_attr attr; + int err; + u8 status; + + if (down_interruptible(&to_mdev(ibdev)->cap_mask_mutex)) + return -EFAULT; + + err = mthca_query_port(ibdev, port, &attr); + if (err) + goto out; + + set_ib.set_si_guid = 0; + set_ib.reset_qkey_viol = !!(port_modify_mask & IB_PORT_RESET_QKEY_CNTR); + + set_ib.cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) & + ~props->clr_port_cap_mask; + + err = mthca_SET_IB(to_mdev(ibdev), &set_ib, port, &status); + if (err) + goto out; + if (status) { + err = -EINVAL; + goto out; + } + +out: + up(&to_mdev(ibdev)->cap_mask_mutex); + return err; +} + +static int mthca_query_pkey_chunk(struct ib_device *ibdev, + u8 port, u16 index, u16 pkey[32]) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + u8 status; + + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; + in_mad->attr_mod = cl_hton32(index / 32); + + err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1, + port, NULL, NULL, in_mad, out_mad, + &status); + if (err) + goto out; + if (status) { + err = -EINVAL; + goto out; + } + + { // copy the results + int i; + __be16 *pkey_chunk = (__be16 *)out_mad->data; + for (i=0; i<32; ++i) + pkey[i] = cl_ntoh16(pkey_chunk[i]); + } + + out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +static int mthca_query_gid_chunk(struct ib_device *ibdev, u8 port, + int index, union ib_gid gid[8]) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + u8 status; + __be64 subnet_prefix; + + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; + in_mad->attr_mod = cl_hton32(port); + + err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1, + port, NULL, NULL, in_mad, out_mad, + &status); + if (err) + goto out; + if (status) { + err = -EINVAL; + goto out; + } + + memcpy(&subnet_prefix, out_mad->data + 8, 8); + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; + in_mad->attr_mod = cl_hton32(index / 8); + + err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1, + port, NULL, NULL, in_mad, out_mad, + &status); + if (err) + goto out; + if (status) { + err = -EINVAL; + goto out; + } + + { // copy the results + int i; + __be64 *guid = (__be64 *)out_mad->data; + for (i=0; i<8; ++i) { + gid[i].global.subnet_prefix = subnet_prefix; + gid[i].global.interface_id = guid[i]; + } + } + + out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev, + ci_umv_buf_t* const p_umv_buf) +{ + struct ibv_get_context_resp uresp; + struct mthca_ucontext *context; + int err; + + RtlZeroMemory(&uresp, sizeof uresp); + + uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps; + if (mthca_is_memfree(to_mdev(ibdev))) + uresp.uarc_size = to_mdev(ibdev)->uar_table.uarc_size; + else + uresp.uarc_size = 0; + + context = kzalloc(sizeof *context, GFP_KERNEL); + if (!context) { + err = -ENOMEM; + goto err_nomem; + } + + if (mthca_is_livefish(to_mdev(ibdev))) + goto done; + + err = mthca_uar_alloc(to_mdev(ibdev), &context->uar); + if (err) + goto err_uar_alloc; + + /* + * map uar to user space + */ + + /* map UAR to kernel */ + context->kva = ioremap((io_addr_t)context->uar.pfn << PAGE_SHIFT, PAGE_SIZE,&context->uar_size); + if (!context->kva) { + HCA_PRINT( TRACE_LEVEL_ERROR, HCA_DBG_LOW ,("Couldn't map kernel access region, aborting.\n") ); + err = -ENOMEM; + goto err_ioremap; + } + + /* build MDL */ + context->mdl = IoAllocateMdl( context->kva, (ULONG)context->uar_size, + FALSE, TRUE, NULL ); + if( !context->mdl ) { + err = -ENOMEM; + goto err_alloc_mdl; + } + MmBuildMdlForNonPagedPool( context->mdl ); + + /* Map the memory into the calling process's address space. */ + __try { + context->ibucontext.user_uar = MmMapLockedPagesSpecifyCache( context->mdl, + UserMode, MmNonCached, NULL, FALSE, NormalPagePriority ); + } + __except(EXCEPTION_EXECUTE_HANDLER) { + err = -EACCES; + goto err_map; + } + + /* user_db_tab */ + context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev)); + if (IS_ERR(context->db_tab)) { + err = PTR_ERR(context->db_tab); + goto err_init_user; + } + +done: + err = ib_copy_to_umv_buf(p_umv_buf, &uresp, sizeof uresp); + if (err) + goto err_copy_to_umv_buf; + + context->ibucontext.device = ibdev; + + atomic_set(&context->ibucontext.usecnt, 0); + return &context->ibucontext; + +err_copy_to_umv_buf: + mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, + context->db_tab); +err_init_user: + MmUnmapLockedPages( context->ibucontext.user_uar, context->mdl ); +err_map: + IoFreeMdl(context->mdl); +err_alloc_mdl: + iounmap(context->kva, PAGE_SIZE); +err_ioremap: + mthca_uar_free(to_mdev(ibdev), &context->uar); +err_uar_alloc: + kfree(context); +err_nomem: + return ERR_PTR(err); +} + + int mthca_dealloc_ucontext(struct ib_ucontext *context) +{ + struct mthca_ucontext *mucontext = to_mucontext(context); + + if (mthca_is_livefish(to_mdev(context->device))) + goto done; + mthca_cleanup_user_db_tab(to_mdev(context->device), &mucontext->uar, + mucontext->db_tab); + MmUnmapLockedPages( mucontext->ibucontext.user_uar, mucontext->mdl ); + IoFreeMdl(mucontext->mdl); + iounmap(mucontext->kva, PAGE_SIZE); + mthca_uar_free(to_mdev(context->device), &mucontext->uar); +done: + kfree(mucontext); + + return 0; +} + +struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + ci_umv_buf_t* const p_umv_buf) +{ + int err; + struct mthca_pd *pd; + struct ibv_alloc_pd_resp resp; + + /* sanity check */ + if (p_umv_buf && p_umv_buf->command) { + if (p_umv_buf->output_size < sizeof(struct ibv_alloc_pd_resp)) { + err = -EINVAL; + goto err_param; + } + } + + pd = kmalloc(sizeof *pd, GFP_KERNEL); + if (!pd) { + err = -ENOMEM; + goto err_mem; + } + + if (mthca_is_livefish(to_mdev(ibdev))) + goto done; + + err = mthca_pd_alloc(to_mdev(ibdev), !context, pd); + if (err) { + goto err_pd_alloc; + } + +done: + if (p_umv_buf && p_umv_buf->command) { + resp.pd_handle = (u64)(UINT_PTR)pd; + resp.pdn = pd->pd_num; + if (ib_copy_to_umv_buf(p_umv_buf, &resp, sizeof(struct ibv_alloc_pd_resp))) { + err = -EFAULT; + goto err_copy; + } + } + + return &pd->ibpd; + +err_copy: + mthca_pd_free(to_mdev(ibdev), pd); +err_pd_alloc: + kfree(pd); +err_mem: +err_param: + return ERR_PTR(err); +} + +int mthca_dealloc_pd(struct ib_pd *pd) +{ + if (mthca_is_livefish(to_mdev(pd->device))) + goto done; + + mthca_pd_free(to_mdev(pd->device), to_mpd(pd)); + +done: + kfree(pd); + return 0; +} + +static struct ib_ah *mthca_ah_create(struct ib_pd *pd, + struct ib_ah_attr *ah_attr) +{ + int err; + struct mthca_ah *ah; + + ah = kzalloc(sizeof *ah, GFP_ATOMIC); + if (!ah) + return ERR_PTR(-ENOMEM); + + err = mthca_create_ah(to_mdev(pd->device), to_mpd(pd), ah_attr, ah); + if (err) { + kfree(ah); + return ERR_PTR(err); + } + + return &ah->ibah; +} + +static int mthca_ah_destroy(struct ib_ah *ah) +{ + mthca_destroy_ah(to_mdev(ah->device), to_mah(ah)); + kfree(ah); + + return 0; +} + +static struct ib_srq *mthca_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + ci_umv_buf_t* const p_umv_buf) +{ + struct ibv_create_srq ucmd = { 0 }; + struct mthca_ucontext *context = NULL; + struct mthca_srq *srq; + int err; + + srq = kzalloc(sizeof *srq, GFP_KERNEL); + if (!srq) + return ERR_PTR(-ENOMEM); + + if (pd->ucontext) { + context = to_mucontext(pd->ucontext); + + if (ib_copy_from_umv_buf(&ucmd, p_umv_buf, sizeof ucmd)) { + err = -EFAULT; + goto err_free; + } + err = mthca_map_user_db(to_mdev(pd->device), &context->uar, + context->db_tab, ucmd.db_index, + ucmd.db_page, NULL); + + if (err) + goto err_free; + + srq->mr.ibmr.lkey = ucmd.lkey; + srq->db_index = ucmd.db_index; + } + + err = mthca_alloc_srq(to_mdev(pd->device), to_mpd(pd), + &init_attr->attr, srq); + + if (err && pd->ucontext) + mthca_unmap_user_db(to_mdev(pd->device), &context->uar, + context->db_tab, ucmd.db_index); + + if (err) + goto err_free; + + if (context && ib_copy_to_umv_buf(p_umv_buf, &srq->srqn, sizeof (u32))) { + mthca_free_srq(to_mdev(pd->device), srq); + err = -EFAULT; + goto err_free; + } + + return &srq->ibsrq; + +err_free: + kfree(srq); + + return ERR_PTR(err); +} + +static int mthca_destroy_srq(struct ib_srq *srq) +{ + struct mthca_ucontext *context; + + if (srq->ucontext) { + context = to_mucontext(srq->ucontext); + + mthca_unmap_user_db(to_mdev(srq->device), &context->uar, + context->db_tab, to_msrq(srq)->db_index); + } + + mthca_free_srq(to_mdev(srq->device), to_msrq(srq)); + kfree(srq); + + return 0; +} + +static struct ib_qp *mthca_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + ci_umv_buf_t* const p_umv_buf) +{ + struct ibv_create_qp ucmd = {0}; + struct mthca_qp *qp = NULL; + struct mthca_ucontext *context = NULL; + int err; + + switch (init_attr->qp_type) { + case IB_QPT_RELIABLE_CONN: + case IB_QPT_UNRELIABLE_CONN: + case IB_QPT_UNRELIABLE_DGRM: + { + + qp = kmalloc(sizeof *qp, GFP_KERNEL); + if (!qp) { + err = -ENOMEM; + goto err_mem; + } + + if (pd->ucontext) { + context = to_mucontext(pd->ucontext); + + if (ib_copy_from_umv_buf(&ucmd, p_umv_buf, sizeof ucmd)) { + err = -EFAULT; + goto err_copy; + } + + err = mthca_map_user_db(to_mdev(pd->device), &context->uar, + context->db_tab, + ucmd.sq_db_index, ucmd.sq_db_page, NULL); + if (err) + goto err_map1; + + err = mthca_map_user_db(to_mdev(pd->device), &context->uar, + context->db_tab, + ucmd.rq_db_index, ucmd.rq_db_page, NULL); + if (err) + goto err_map2; + + qp->mr.ibmr.lkey = ucmd.lkey; + qp->sq.db_index = ucmd.sq_db_index; + qp->rq.db_index = ucmd.rq_db_index; + } + + err = mthca_alloc_qp(to_mdev(pd->device), to_mpd(pd), + to_mcq(init_attr->send_cq), + to_mcq(init_attr->recv_cq), + init_attr->qp_type, init_attr->sq_sig_type, + &init_attr->cap, qp); + + if (err) + if (pd->ucontext) + goto err_alloc_qp_user; + else + goto err_copy; + + qp->ibqp.qp_num = qp->qpn; + break; + } + case IB_QPT_QP0: + case IB_QPT_QP1: + { + /* Don't allow userspace to create special QPs */ + if (pd->ucontext) { + err = -EINVAL; + goto err_inval; + } + + qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL); + if (!qp) { + err = -ENOMEM; + goto err_mem; + } + + qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_QP0 ? 0 : 1; + + err = mthca_alloc_sqp(to_mdev(pd->device), to_mpd(pd), + to_mcq(init_attr->send_cq), + to_mcq(init_attr->recv_cq), + init_attr->sq_sig_type, &init_attr->cap, + qp->ibqp.qp_num, init_attr->port_num, + to_msqp(qp)); + if (err) + goto err_alloc_sqp; + + break; + } + default: + /* Don't support raw QPs */ + err = -ENOSYS; + goto err_unsupported; + } + + init_attr->cap.max_send_wr = qp->sq.max; + init_attr->cap.max_recv_wr = qp->rq.max; + init_attr->cap.max_send_sge = qp->sq.max_gs; + init_attr->cap.max_recv_sge = qp->rq.max_gs; + init_attr->cap.max_inline_data = qp->max_inline_data; + + return &qp->ibqp; + + +err_alloc_qp_user: + if (pd->ucontext) + mthca_unmap_user_db(to_mdev(pd->device), + &context->uar, context->db_tab, ucmd.rq_db_index); +err_map2: + if (pd->ucontext) + mthca_unmap_user_db(to_mdev(pd->device), + &context->uar, context->db_tab, ucmd.sq_db_index); +err_map1: err_copy: err_alloc_sqp: + if (qp) + kfree(qp); +err_mem: err_inval: err_unsupported: + return ERR_PTR(err); +} + +static int mthca_destroy_qp(struct ib_qp *qp) +{ + if (qp->ucontext) { + mthca_unmap_user_db(to_mdev(qp->device), + &to_mucontext(qp->ucontext)->uar, + to_mucontext(qp->ucontext)->db_tab, + to_mqp(qp)->sq.db_index); + mthca_unmap_user_db(to_mdev(qp->device), + &to_mucontext(qp->ucontext)->uar, + to_mucontext(qp->ucontext)->db_tab, + to_mqp(qp)->rq.db_index); + } + mthca_free_qp(to_mdev(qp->device), to_mqp(qp)); + kfree(qp); + return 0; +} + +static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, + struct ib_ucontext *context, + ci_umv_buf_t* const p_umv_buf) +{ + struct ibv_create_cq ucmd = {0}; + struct mthca_cq *cq; + int nent; + int err; + void *u_arm_db_page = 0; + + if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes) + return ERR_PTR(-EINVAL); + + if (context) { + if (ib_copy_from_umv_buf(&ucmd, p_umv_buf, sizeof ucmd)) + return ERR_PTR(-EFAULT); + + err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab, + ucmd.set_db_index, ucmd.set_db_page, NULL); + if (err) + return ERR_PTR(err); + + err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab, + ucmd.arm_db_index, ucmd.arm_db_page, NULL); + if (err) + goto err_unmap_set; + + err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab, + ucmd.u_arm_db_index, + (u64)(ULONG_PTR)PAGE_ALIGN(ucmd.u_arm_db_page), + &u_arm_db_page); + if (err) + goto err_unmap_arm; + } + + cq = kmalloc(sizeof *cq, GFP_KERNEL); + if (!cq) { + err = -ENOMEM; + goto err_unmap_ev; + } + + if (context) { + cq->mr.ibmr.lkey = ucmd.lkey; + cq->set_ci_db_index = ucmd.set_db_index; + cq->arm_db_index = ucmd.arm_db_index; + cq->u_arm_db_index = ucmd.u_arm_db_index; + cq->p_u_arm_sn = (int*)((char*)u_arm_db_page + BYTE_OFFSET(ucmd.u_arm_db_page)); + } + + for (nent = 1; nent <= entries; nent <<= 1) + ; /* nothing */ + + err = mthca_init_cq(to_mdev(ibdev), nent, + context ? to_mucontext(context) : NULL, + context ? ucmd.mr.pdn : to_mdev(ibdev)->driver_pd.pd_num, + cq); + if (err) + goto err_free; + + if (context ) { + struct ibv_create_cq_resp *create_cq_resp = (struct ibv_create_cq_resp *)(void*)p_umv_buf->p_inout_buf; + create_cq_resp->cqn = cq->cqn; + } + + HCA_PRINT( TRACE_LEVEL_INFORMATION, HCA_DBG_LOW , + ("uctx %p, cq_hndl %p, cq_num %#x, cqe %#x\n", + context, &cq->ibcq, cq->cqn, cq->ibcq.cqe ) ); + + return &cq->ibcq; + +err_free: + kfree(cq); + +err_unmap_ev: + if (context) + mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab, ucmd.u_arm_db_index); + +err_unmap_arm: + if (context) + mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab, ucmd.arm_db_index); + +err_unmap_set: + if (context) + mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab, ucmd.set_db_index); + + return ERR_PTR(err); +} + +static int mthca_destroy_cq(struct ib_cq *cq) +{ + if (cq->ucontext) { + mthca_unmap_user_db(to_mdev(cq->device), + &to_mucontext(cq->ucontext)->uar, + to_mucontext(cq->ucontext)->db_tab, + to_mcq(cq)->u_arm_db_index); + mthca_unmap_user_db(to_mdev(cq->device), + &to_mucontext(cq->ucontext)->uar, + to_mucontext(cq->ucontext)->db_tab, + to_mcq(cq)->arm_db_index); + mthca_unmap_user_db(to_mdev(cq->device), + &to_mucontext(cq->ucontext)->uar, + to_mucontext(cq->ucontext)->db_tab, + to_mcq(cq)->set_ci_db_index); + } + mthca_free_cq(to_mdev(cq->device), to_mcq(cq)); + kfree(cq); + + return 0; +} + +static +mthca_mpt_access_t +map_qp_mpt( + IN mthca_qp_access_t qp_acl) +{ +#define ACL_MTHCA(mfl,ifl) if (qp_acl & mfl) mpt_acl |= ifl + mthca_mpt_access_t mpt_acl = 0; + + ACL_MTHCA(MTHCA_ACCESS_REMOTE_READ,MTHCA_MPT_FLAG_REMOTE_READ); + ACL_MTHCA(MTHCA_ACCESS_REMOTE_WRITE,MTHCA_MPT_FLAG_REMOTE_WRITE); + ACL_MTHCA(MTHCA_ACCESS_REMOTE_ATOMIC,MTHCA_MPT_FLAG_ATOMIC); + ACL_MTHCA(MTHCA_ACCESS_LOCAL_WRITE,MTHCA_MPT_FLAG_LOCAL_WRITE); + + return (mpt_acl | MTHCA_MPT_FLAG_LOCAL_READ); +} + +struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, mthca_qp_access_t acc) +{ + struct mthca_mr *mr; + int err; + + mr = kzalloc(sizeof *mr, GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + err = mthca_mr_alloc_notrans(to_mdev(pd->device), + to_mpd(pd)->pd_num, + map_qp_mpt(acc), mr); + + if (err) { + kfree(mr); + return ERR_PTR(err); + } + + return &mr->ibmr; +} + +static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *buffer_list, + int num_phys_buf, + mthca_qp_access_t acc, + u64 *iova_start) +{ + struct mthca_mr *mr; + u64 *page_list; + u64 total_size; + u64 mask; + int shift; + int npages; + int err; + int i, j, n; + + /* First check that we have enough alignment */ + if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) + return ERR_PTR(-EINVAL); + + if (num_phys_buf > 1 && + ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) + return ERR_PTR(-EINVAL); + + mask = 0; + total_size = 0; + for (i = 0; i < num_phys_buf; ++i) { + if (i != 0) + mask |= buffer_list[i].addr; + if (i != num_phys_buf - 1) + mask |= buffer_list[i].addr + buffer_list[i].size; + + total_size += buffer_list[i].size; + } + + if (mask & ~PAGE_MASK) + return ERR_PTR(-EINVAL); + + /* Find largest page shift we can use to cover buffers */ + for (shift = PAGE_SHIFT; shift < 31; ++shift) + if (num_phys_buf > 1) { + if ((1Ui64 << shift) & mask) + break; + } else { + if (1Ui64 << shift >= + buffer_list[0].size + + (buffer_list[0].addr & ((1Ui64 << shift) - 1))) + break; + } + + buffer_list[0].size += buffer_list[0].addr & ((1Ui64 << shift) - 1); + buffer_list[0].addr &= ~0Ui64 << shift; + + mr = kzalloc(sizeof *mr, GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + npages = 0; + for (i = 0; i < num_phys_buf; ++i) + npages += (int)((buffer_list[i].size + (1Ui64 << shift) - 1) >> shift); + + if (!npages) + return &mr->ibmr; + + page_list = kmalloc(npages * sizeof *page_list, GFP_KERNEL); + if (!page_list) { + kfree(mr); + return ERR_PTR(-ENOMEM); + } + + n = 0; + for (i = 0; i < num_phys_buf; ++i) + for (j = 0; + j < (buffer_list[i].size + (1Ui64 << shift) - 1) >> shift; + ++j) + page_list[n++] = buffer_list[i].addr + ((u64) j << shift); + + HCA_PRINT( TRACE_LEVEL_VERBOSE ,HCA_DBG_LOW ,("Registering memory at %I64x (iova %I64x) " + "in PD %x; shift %d, npages %d.\n", + (u64) buffer_list[0].addr, + (u64) *iova_start, + to_mpd(pd)->pd_num, + shift, npages)); + + err = mthca_mr_alloc_phys(to_mdev(pd->device), + to_mpd(pd)->pd_num, + page_list, shift, npages, + *iova_start, total_size, + map_qp_mpt(acc), mr); + + if (err) { + kfree(page_list); + kfree(mr); + return ERR_PTR(err); + } + + kfree(page_list); + return &mr->ibmr; +} + +static struct ib_mr *mthca_reg_virt_mr(struct ib_pd *pd, + void* __ptr64 vaddr, uint64_t length, uint64_t hca_va, + mthca_qp_access_t acc, boolean_t um_call) +{ + struct mthca_dev *dev = to_mdev(pd->device); + struct mthca_mr *mr; + u64 *pages; + int err = 0; + uint32_t i, n; + mt_iobuf_t *iobuf_p; + mt_iobuf_iter_t iobuf_iter; + ib_access_t ib_acc; + + /* + * Be friendly to WRITE_MTT command and leave two + * empty slots for the index and reserved fields of the mailbox. + */ + int max_buf_list_size = PAGE_SIZE / sizeof (u64) - 2; + + HCA_ENTER(HCA_DBG_MEMORY); + + mr = kzalloc(sizeof *mr, GFP_KERNEL); + if (!mr) { + err = -ENOMEM; + goto err_nomem; + } + + /* + * We ask for writable memory if any access flags other than + * "remote read" are set. "Local write" and "remote write" + * obviously require write access. "Remote atomic" can do + * things like fetch and add, which will modify memory, and + * "MW bind" can change permissions by binding a window. + */ + + // try register the buffer + iobuf_p = &mr->iobuf; + iobuf_init( (u64)vaddr, length, um_call, iobuf_p); + ib_acc = (acc & ~MTHCA_ACCESS_REMOTE_READ) ? IB_AC_LOCAL_WRITE : 0; + err = iobuf_register_with_cash( (u64)vaddr, length, um_call, + &ib_acc, iobuf_p ); + if (err) + goto err_reg_mem; + mr->iobuf_used = TRUE; + + // allocate MTT's + mr->mtt = mthca_alloc_mtt(dev, iobuf_p->nr_pages); + if (IS_ERR(mr->mtt)) { + err = PTR_ERR(mr->mtt); + goto err_alloc_mtt; + } + + // allocate buffer_list for writing MTT's + pages = (u64 *) kmalloc(PAGE_SIZE,GFP_KERNEL); + if (!pages) { + err = -ENOMEM; + goto err_pages; + } + + // write MTT's + iobuf_iter_init( iobuf_p, &iobuf_iter ); + n = 0; + for (;;) { + // get up to max_buf_list_size page physical addresses + i = iobuf_get_tpt_seg( iobuf_p, &iobuf_iter, max_buf_list_size, pages ); + if (!i) + break; + + //TODO: convert physical adresses to dma one's + + // write 'i' dma addresses + err = mthca_write_mtt(dev, mr->mtt, n, pages, i); + if (err) + goto err_write_mtt; + n += i; + if (n >= iobuf_p->nr_pages) + break; + } + + CL_ASSERT(n == iobuf_p->nr_pages); + + // write MPT + err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, PAGE_SHIFT, hca_va, + length, map_qp_mpt(acc), mr); + if (err) + goto err_mt_alloc; + + // secure memory + if (!pd->ucontext) + goto done; + __try { + mr->secure_handle = MmSecureVirtualMemory ( vaddr, (SIZE_T)length, + (ib_acc & IB_AC_LOCAL_WRITE) ? PAGE_READWRITE : PAGE_READONLY ); + if (mr->secure_handle == NULL) { + err = -EFAULT; + goto err_secure; + } + } + __except (EXCEPTION_EXECUTE_HANDLER) { + NTSTATUS Status = GetExceptionCode(); + UNUSED_PARAM_WOWPP(Status); + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_MEMORY , + ("Exception 0x%x on MmSecureVirtualMemory(), addr %p, size %I64d, access %#x\n", + Status, vaddr, length, acc )); + err = -EFAULT; + goto err_secure; + } + +done: + free_page((void*) pages); + + HCA_EXIT(HCA_DBG_MEMORY); + return &mr->ibmr; + +err_secure: +err_mt_alloc: +err_write_mtt: + free_page((void*) pages); +err_pages: + mthca_free_mtt(dev, mr->mtt); +err_alloc_mtt: + iobuf_deregister(iobuf_p); +err_reg_mem: + kfree(mr); +err_nomem: + + HCA_EXIT(HCA_DBG_MEMORY); + return ERR_PTR(err); +} + +int mthca_dereg_mr(struct ib_mr *mr) +{ + struct mthca_mr *mmr = to_mmr(mr); + struct mthca_dev* dev = to_mdev(mr->device); + + if (mmr->secure_handle) + MmUnsecureVirtualMemory ( mmr->secure_handle ); + mthca_free_mr(dev, mmr); + if (mmr->iobuf_used) + iobuf_deregister_with_cash(&mmr->iobuf); + kfree(mmr); + return 0; +} + +static struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, mthca_qp_access_t acc, + struct ib_fmr_attr *fmr_attr) +{ + struct mthca_fmr *fmr; + int err; + + fmr = kzalloc(sizeof *fmr, GFP_KERNEL); + if (!fmr) + return ERR_PTR(-ENOMEM); + + RtlCopyMemory(&fmr->attr, fmr_attr, sizeof *fmr_attr); + err = mthca_fmr_alloc(to_mdev(pd->device), to_mpd(pd)->pd_num, + map_qp_mpt(acc), fmr); + + if (err) { + kfree(fmr); + return ERR_PTR(err); + } + + return &fmr->ibfmr; +} + +static int mthca_dealloc_fmr(struct ib_fmr *fmr) +{ + struct mthca_fmr *mfmr = to_mfmr(fmr); + int err; + + err = mthca_free_fmr(to_mdev(fmr->device), mfmr); + if (err) + return err; + + kfree(mfmr); + return 0; +} + +static int mthca_unmap_fmr(struct list_head *fmr_list) +{ + struct ib_fmr *fmr; + int err; + u8 status; + struct mthca_dev *mdev = NULL; + + list_for_each_entry(fmr, fmr_list, list,struct ib_fmr) { + if (mdev && to_mdev(fmr->device) != mdev) + return -EINVAL; + mdev = to_mdev(fmr->device); + } + + if (!mdev) + return 0; + + if (mthca_is_memfree(mdev)) { + list_for_each_entry(fmr, fmr_list, list,struct ib_fmr) + mthca_arbel_fmr_unmap(mdev, to_mfmr(fmr)); + + wmb(); + } else + list_for_each_entry(fmr, fmr_list, list,struct ib_fmr) + mthca_tavor_fmr_unmap(mdev, to_mfmr(fmr)); + + err = mthca_SYNC_TPT(mdev, &status); + if (err) + return err; + if (status) + return -EINVAL; + return 0; +} + +static int mthca_init_node_data(struct mthca_dev *dev) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + u8 status; + + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; + + err = mthca_MAD_IFC(dev, 1, 1, + 1, NULL, NULL, in_mad, out_mad, + &status); + if (err) + goto out; + if (status) { + err = -EINVAL; + goto out; + } + + memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8); + +out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +int mthca_register_device(struct mthca_dev *dev) +{ + int ret; + + ret = mthca_init_node_data(dev); + if (ret) + return ret; + + strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX); + dev->ib_dev.node_type = IB_NODE_CA; + dev->ib_dev.phys_port_cnt = (u8)dev->limits.num_ports; + dev->ib_dev.mdev = dev; + dev->ib_dev.query_device = mthca_query_device; + dev->ib_dev.query_port = mthca_query_port; + dev->ib_dev.modify_port = mthca_modify_port; + dev->ib_dev.query_pkey_chunk = mthca_query_pkey_chunk; + dev->ib_dev.query_gid_chunk = mthca_query_gid_chunk; + dev->ib_dev.alloc_ucontext = mthca_alloc_ucontext; + dev->ib_dev.dealloc_ucontext = mthca_dealloc_ucontext; + dev->ib_dev.alloc_pd = mthca_alloc_pd; + dev->ib_dev.dealloc_pd = mthca_dealloc_pd; + dev->ib_dev.create_ah = mthca_ah_create; + dev->ib_dev.destroy_ah = mthca_ah_destroy; + + if (dev->mthca_flags & MTHCA_FLAG_SRQ) { + dev->ib_dev.create_srq = mthca_create_srq; + dev->ib_dev.modify_srq = mthca_modify_srq; + dev->ib_dev.query_srq = mthca_query_srq; + dev->ib_dev.destroy_srq = mthca_destroy_srq; + + if (mthca_is_memfree(dev)) + dev->ib_dev.post_srq_recv = mthca_arbel_post_srq_recv; + else + dev->ib_dev.post_srq_recv = mthca_tavor_post_srq_recv; + } + + dev->ib_dev.create_qp = mthca_create_qp; + dev->ib_dev.modify_qp = mthca_modify_qp; + dev->ib_dev.destroy_qp = mthca_destroy_qp; + dev->ib_dev.create_cq = mthca_create_cq; + dev->ib_dev.destroy_cq = mthca_destroy_cq; + dev->ib_dev.poll_cq = mthca_poll_cq; + dev->ib_dev.get_dma_mr = mthca_get_dma_mr; + dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr; + dev->ib_dev.reg_virt_mr = mthca_reg_virt_mr; + dev->ib_dev.dereg_mr = mthca_dereg_mr; + + if (dev->mthca_flags & MTHCA_FLAG_FMR) { + dev->ib_dev.alloc_fmr = mthca_alloc_fmr; + dev->ib_dev.unmap_fmr = mthca_unmap_fmr; + dev->ib_dev.dealloc_fmr = mthca_dealloc_fmr; + if (mthca_is_memfree(dev)) + dev->ib_dev.map_phys_fmr = mthca_arbel_map_phys_fmr; + else + dev->ib_dev.map_phys_fmr = mthca_tavor_map_phys_fmr; + } + + dev->ib_dev.attach_mcast = mthca_multicast_attach; + dev->ib_dev.detach_mcast = mthca_multicast_detach; + dev->ib_dev.process_mad = mthca_process_mad; + + if (mthca_is_memfree(dev)) { + dev->ib_dev.req_notify_cq = mthca_arbel_arm_cq; + dev->ib_dev.post_send = mthca_arbel_post_send; + dev->ib_dev.post_recv = mthca_arbel_post_recv; + } else { + dev->ib_dev.req_notify_cq = mthca_tavor_arm_cq; + dev->ib_dev.post_send = mthca_tavor_post_send; + dev->ib_dev.post_recv = mthca_tavor_post_recv; + } + + KeInitializeMutex(&dev->cap_mask_mutex, 0); + + ret = ib_register_device(&dev->ib_dev); + if (ret) + return ret; + + mthca_start_catas_poll(dev); + + return 0; +} + +void mthca_unregister_device(struct mthca_dev *dev) +{ + mthca_stop_catas_poll(dev); + ib_unregister_device(&dev->ib_dev); +} diff --git a/branches/IBFD/hw/mthca/kernel/mthca_provider.h b/branches/IBFD/hw/mthca/kernel/mthca_provider.h new file mode 100644 index 00000000..7395f0fe --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca_provider.h @@ -0,0 +1,404 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#ifndef MTHCA_PROVIDER_H +#define MTHCA_PROVIDER_H + +#include +#include +#include + +typedef uint32_t mthca_mpt_access_t; +#define MTHCA_MPT_FLAG_ATOMIC (1 << 14) +#define MTHCA_MPT_FLAG_REMOTE_WRITE (1 << 13) +#define MTHCA_MPT_FLAG_REMOTE_READ (1 << 12) +#define MTHCA_MPT_FLAG_LOCAL_WRITE (1 << 11) +#define MTHCA_MPT_FLAG_LOCAL_READ (1 << 10) + +union mthca_buf { + struct scatterlist direct; + struct scatterlist *page_list; +}; + +struct mthca_uar { + PFN_NUMBER pfn; + int index; +}; + +struct mthca_user_db_table; + +struct mthca_ucontext { + struct ib_ucontext ibucontext; + struct mthca_uar uar; + struct mthca_user_db_table *db_tab; + // for user UAR + PMDL mdl; + PVOID kva; + SIZE_T uar_size; +}; + +struct mthca_mtt; + +struct mthca_mr { + //NB: the start of this structure is to be equal to mlnx_mro_t ! + //NB: the structure was not inserted here for not to mix driver and provider structures + struct ib_mr ibmr; + struct mthca_mtt *mtt; + int iobuf_used; + mt_iobuf_t iobuf; + void *secure_handle; +}; + +struct mthca_fmr { + struct ib_fmr ibfmr; + struct ib_fmr_attr attr; + struct mthca_mtt *mtt; + int maps; + union { + struct { + struct mthca_mpt_entry __iomem *mpt; + u64 __iomem *mtts; + } tavor; + struct { + struct mthca_mpt_entry *mpt; + __be64 *mtts; + } arbel; + } mem; +}; + +struct mthca_pd { + struct ib_pd ibpd; + u32 pd_num; + atomic_t sqp_count; + struct mthca_mr ntmr; + int privileged; +}; + +struct mthca_eq { + struct mthca_dev *dev; + int eqn; + int eq_num; + u32 eqn_mask; + u32 cons_index; + u16 msi_x_vector; + u16 msi_x_entry; + int have_irq; + int nent; + struct scatterlist *page_list; + struct mthca_mr mr; + KDPC dpc; /* DPC for MSI-X interrupts */ + spinlock_t lock; /* spinlock for simult DPCs */ +}; + +struct mthca_av; + +enum mthca_ah_type { + MTHCA_AH_ON_HCA, + MTHCA_AH_PCI_POOL, + MTHCA_AH_KMALLOC +}; + +struct mthca_ah { + struct ib_ah ibah; + enum mthca_ah_type type; + u32 key; + struct mthca_av *av; + dma_addr_t avdma; +}; + +/* + * Quick description of our CQ/QP locking scheme: + * + * We have one global lock that protects dev->cq/qp_table. Each + * struct mthca_cq/qp also has its own lock. An individual qp lock + * may be taken inside of an individual cq lock. Both cqs attached to + * a qp may be locked, with the send cq locked first. No other + * nesting should be done. + * + * Each struct mthca_cq/qp also has an atomic_t ref count. The + * pointer from the cq/qp_table to the struct counts as one reference. + * This reference also is good for access through the consumer API, so + * modifying the CQ/QP etc doesn't need to take another reference. + * Access because of a completion being polled does need a reference. + * + * Finally, each struct mthca_cq/qp has a wait_queue_head_t for the + * destroy function to sleep on. + * + * This means that access from the consumer API requires nothing but + * taking the struct's lock. + * + * Access because of a completion event should go as follows: + * - lock cq/qp_table and look up struct + * - increment ref count in struct + * - drop cq/qp_table lock + * - lock struct, do your thing, and unlock struct + * - decrement ref count; if zero, wake up waiters + * + * To destroy a CQ/QP, we can do the following: + * - lock cq/qp_table, remove pointer, unlock cq/qp_table lock + * - decrement ref count + * - wait_event until ref count is zero + * + * It is the consumer's responsibilty to make sure that no QP + * operations (WQE posting or state modification) are pending when the + * QP is destroyed. Also, the consumer must make sure that calls to + * qp_modify are serialized. + * + * Possible optimizations (wait for profile data to see if/where we + * have locks bouncing between CPUs): + * - split cq/qp table lock into n separate (cache-aligned) locks, + * indexed (say) by the page in the table + * - split QP struct lock into three (one for common info, one for the + * send queue and one for the receive queue) + */ +//TODO: check correctness of the above requirement: "It is the consumer's responsibilty to make sure that no QP +// operations (WQE posting or state modification) are pending when the QP is destroyed" + +struct mthca_cq { + struct ib_cq ibcq; + void *cq_context; // leo: for IBAL shim + spinlock_t lock; + atomic_t refcount; + int cqn; + u32 cons_index; + int is_direct; + int is_kernel; + + /* Next fields are Arbel only */ + int set_ci_db_index; + __be32 *set_ci_db; + int arm_db_index; + __be32 *arm_db; + int arm_sn; + int u_arm_db_index; + int *p_u_arm_sn; + + union mthca_buf queue; + struct mthca_mr mr; + wait_queue_head_t wait; + KMUTEX mutex; +}; + +struct mthca_srq { + struct ib_srq ibsrq; + spinlock_t lock; + atomic_t refcount; + int srqn; + int max; + int max_gs; + int wqe_shift; + int first_free; + int last_free; + u16 counter; /* Arbel only */ + int db_index; /* Arbel only */ + __be32 *db; /* Arbel only */ + void *last; + + int is_direct; + u64 *wrid; + union mthca_buf queue; + struct mthca_mr mr; + + wait_queue_head_t wait; + KMUTEX mutex; + void *srq_context; +}; + +struct mthca_wq { + spinlock_t lock; + int max; + unsigned next_ind; + unsigned last_comp; + unsigned head; + unsigned tail; + void *last; + int max_gs; + int wqe_shift; + + int db_index; /* Arbel only */ + __be32 *db; +}; + +struct mthca_qp { + struct ib_qp ibqp; + void *qp_context; // leo: for IBAL shim + //TODO: added just because absense of ibv_query_qp + // thereafter it may be worth to be replaced by struct ib_qp_attr qp_attr; + struct ib_qp_init_attr qp_init_attr; // leo: for query_qp + atomic_t refcount; + u32 qpn; + int is_direct; + u8 transport; + u8 state; + u8 atomic_rd_en; + u8 resp_depth; + + struct mthca_mr mr; + + struct mthca_wq rq; + struct mthca_wq sq; + enum ib_sig_type sq_policy; + int send_wqe_offset; + int max_inline_data; + + u64 *wrid; + union mthca_buf queue; + + wait_queue_head_t wait; + KMUTEX mutex; +}; + +struct mthca_sqp { + struct mthca_qp qp; + int port; + int pkey_index; + u32 qkey; + u32 send_psn; + struct ib_ud_header ud_header; + struct scatterlist sg; +}; + +static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct mthca_ucontext, ibucontext); +} + +static inline struct mthca_fmr *to_mfmr(struct ib_fmr *ibfmr) +{ + return container_of(ibfmr, struct mthca_fmr, ibfmr); +} + +static inline struct mthca_mr *to_mmr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct mthca_mr, ibmr); +} + +static inline struct mthca_pd *to_mpd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct mthca_pd, ibpd); +} + +static inline struct mthca_ah *to_mah(struct ib_ah *ibah) +{ + return container_of(ibah, struct mthca_ah, ibah); +} + +static inline struct mthca_cq *to_mcq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct mthca_cq, ibcq); +} + +static inline struct mthca_srq *to_msrq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct mthca_srq, ibsrq); +} + +static inline struct mthca_qp *to_mqp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct mthca_qp, ibqp); +} + +static inline struct mthca_sqp *to_msqp(struct mthca_qp *qp) +{ + return container_of(qp, struct mthca_sqp, qp); +} + +static inline uint8_t start_port(struct ib_device *device) +{ + return device->node_type == IB_NODE_SWITCH ? 0 : 1; +} + +static inline uint8_t end_port(struct ib_device *device) +{ + return device->node_type == IB_NODE_SWITCH ? 0 : device->phys_port_cnt; +} + +static inline int ib_copy_from_umv_buf(void *dest, ci_umv_buf_t* const p_umv_buf, size_t len) +{ + RtlCopyMemory(dest, p_umv_buf->p_inout_buf, len); + return 0; +} + +static inline int ib_copy_to_umv_buf(ci_umv_buf_t* const p_umv_buf, void *src, size_t len) +{ + if (p_umv_buf->output_size < len) { + p_umv_buf->status = IB_INSUFFICIENT_MEMORY; + p_umv_buf->output_size = 0; + return -EFAULT; + } + RtlCopyMemory(p_umv_buf->p_inout_buf, src, len); + p_umv_buf->status = IB_SUCCESS; + p_umv_buf->output_size = (uint32_t)len; + return 0; +} + + + +// API +int mthca_query_device(struct ib_device *ibdev, + struct ib_device_attr *props); + +int mthca_query_port(struct ib_device *ibdev, + u8 port, struct ib_port_attr *props); + +int mthca_modify_port(struct ib_device *ibdev, + u8 port, int port_modify_mask, + struct ib_port_modify *props); + +struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + ci_umv_buf_t* const p_umv_buf); + +int mthca_dealloc_pd(struct ib_pd *pd); + +int mthca_dereg_mr(struct ib_mr *mr); + +int mthca_query_srq(struct ib_srq *ibsrq, ib_srq_attr_t *srq_attr); + +struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev, + ci_umv_buf_t* const p_umv_buf); + +int mthca_dealloc_ucontext(struct ib_ucontext *context); + +struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, mthca_qp_access_t acc); + +int mthca_poll_cq_list( + IN struct ib_cq *ibcq, + IN OUT ib_wc_t** const pp_free_wclist, + OUT ib_wc_t** const pp_done_wclist ); + + +#endif /* MTHCA_PROVIDER_H */ diff --git a/branches/IBFD/hw/mthca/kernel/mthca_qp.c b/branches/IBFD/hw/mthca/kernel/mthca_qp.c new file mode 100644 index 00000000..1f932a1f --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca_qp.c @@ -0,0 +1,2369 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include +#include +#include + +#include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_qp.tmh" +#endif +#include "mthca_cmd.h" +#include "mthca_memfree.h" +#include "mthca_wqe.h" + + +#ifdef ALLOC_PRAGMA +#pragma alloc_text (PAGE, mthca_init_qp_table) +#pragma alloc_text (PAGE, mthca_cleanup_qp_table) +#endif + +enum { + MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE, + MTHCA_ACK_REQ_FREQ = 10, + MTHCA_FLIGHT_LIMIT = 9, + MTHCA_UD_HEADER_SIZE = 72, /* largest UD header possible */ + MTHCA_INLINE_HEADER_SIZE = 4, /* data segment overhead for inline */ + MTHCA_INLINE_CHUNK_SIZE = 16 /* inline data segment chunk */ +}; + +enum { + MTHCA_QP_STATE_RST = 0, + MTHCA_QP_STATE_INIT = 1, + MTHCA_QP_STATE_RTR = 2, + MTHCA_QP_STATE_RTS = 3, + MTHCA_QP_STATE_SQE = 4, + MTHCA_QP_STATE_SQD = 5, + MTHCA_QP_STATE_ERR = 6, + MTHCA_QP_STATE_DRAINING = 7 +}; + +enum { + MTHCA_QP_ST_RC = 0x0, + MTHCA_QP_ST_UC = 0x1, + MTHCA_QP_ST_RD = 0x2, + MTHCA_QP_ST_UD = 0x3, + MTHCA_QP_ST_MLX = 0x7 +}; + +enum { + MTHCA_QP_PM_MIGRATED = 0x3, + MTHCA_QP_PM_ARMED = 0x0, + MTHCA_QP_PM_REARM = 0x1 +}; + +enum { + /* qp_context flags */ + MTHCA_QP_BIT_DE = 1 << 8, + /* params1 */ + MTHCA_QP_BIT_SRE = 1 << 15, + MTHCA_QP_BIT_SWE = 1 << 14, + MTHCA_QP_BIT_SAE = 1 << 13, + MTHCA_QP_BIT_SIC = 1 << 4, + MTHCA_QP_BIT_SSC = 1 << 3, + /* params2 */ + MTHCA_QP_BIT_RRE = 1 << 15, + MTHCA_QP_BIT_RWE = 1 << 14, + MTHCA_QP_BIT_RAE = 1 << 13, + MTHCA_QP_BIT_RIC = 1 << 4, + MTHCA_QP_BIT_RSC = 1 << 3 +}; + +#pragma pack(push,1) +struct mthca_qp_path { + __be32 port_pkey; + u8 rnr_retry; + u8 g_mylmc; + __be16 rlid; + u8 ackto; + u8 mgid_index; + u8 static_rate; + u8 hop_limit; + __be32 sl_tclass_flowlabel; + u8 rgid[16]; +} ; + +struct mthca_qp_context { + __be32 flags; + __be32 tavor_sched_queue; /* Reserved on Arbel */ + u8 mtu_msgmax; + u8 rq_size_stride; /* Reserved on Tavor */ + u8 sq_size_stride; /* Reserved on Tavor */ + u8 rlkey_arbel_sched_queue; /* Reserved on Tavor */ + __be32 usr_page; + __be32 local_qpn; + __be32 remote_qpn; + u32 reserved1[2]; + struct mthca_qp_path pri_path; + struct mthca_qp_path alt_path; + __be32 rdd; + __be32 pd; + __be32 wqe_base; + __be32 wqe_lkey; + __be32 params1; + __be32 reserved2; + __be32 next_send_psn; + __be32 cqn_snd; + __be32 snd_wqe_base_l; /* Next send WQE on Tavor */ + __be32 snd_db_index; /* (debugging only entries) */ + __be32 last_acked_psn; + __be32 ssn; + __be32 params2; + __be32 rnr_nextrecvpsn; + __be32 ra_buff_indx; + __be32 cqn_rcv; + __be32 rcv_wqe_base_l; /* Next recv WQE on Tavor */ + __be32 rcv_db_index; /* (debugging only entries) */ + __be32 qkey; + __be32 srqn; + __be32 rmsn; + __be16 rq_wqe_counter; /* reserved on Tavor */ + __be16 sq_wqe_counter; /* reserved on Tavor */ + u32 reserved3[18]; +} ; + +struct mthca_qp_param { + __be32 opt_param_mask; + u32 reserved1; + struct mthca_qp_context context; + u32 reserved2[62]; +} ; +#pragma pack(pop) + +enum { + MTHCA_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0, + MTHCA_QP_OPTPAR_RRE = 1 << 1, + MTHCA_QP_OPTPAR_RAE = 1 << 2, + MTHCA_QP_OPTPAR_RWE = 1 << 3, + MTHCA_QP_OPTPAR_PKEY_INDEX = 1 << 4, + MTHCA_QP_OPTPAR_Q_KEY = 1 << 5, + MTHCA_QP_OPTPAR_RNR_TIMEOUT = 1 << 6, + MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH = 1 << 7, + MTHCA_QP_OPTPAR_SRA_MAX = 1 << 8, + MTHCA_QP_OPTPAR_RRA_MAX = 1 << 9, + MTHCA_QP_OPTPAR_PM_STATE = 1 << 10, + MTHCA_QP_OPTPAR_PORT_NUM = 1 << 11, + MTHCA_QP_OPTPAR_RETRY_COUNT = 1 << 12, + MTHCA_QP_OPTPAR_ALT_RNR_RETRY = 1 << 13, + MTHCA_QP_OPTPAR_ACK_TIMEOUT = 1 << 14, + MTHCA_QP_OPTPAR_RNR_RETRY = 1 << 15, + MTHCA_QP_OPTPAR_SCHED_QUEUE = 1 << 16 +}; + +static const u8 mthca_opcode[] = { + MTHCA_OPCODE_RDMA_WRITE, + MTHCA_OPCODE_RDMA_WRITE_IMM, + MTHCA_OPCODE_SEND, + MTHCA_OPCODE_SEND_IMM, + MTHCA_OPCODE_RDMA_READ, + MTHCA_OPCODE_ATOMIC_CS, + MTHCA_OPCODE_ATOMIC_FA +}; + + +enum { RC, UC, UD, RD, RDEE, MLX, NUM_TRANS }; + +static struct _state_table { + int trans; + u32 req_param[NUM_TRANS]; + u32 opt_param[NUM_TRANS]; +} state_table[IBQPS_ERR + 1][IBQPS_ERR + 1]= {0}; + +static void fill_state_table() +{ + struct _state_table *t; + RtlZeroMemory( state_table, sizeof(state_table) ); + + /* IBQPS_RESET */ + t = &state_table[IBQPS_RESET][0]; + t[IBQPS_RESET].trans = MTHCA_TRANS_ANY2RST; + t[IBQPS_ERR].trans = MTHCA_TRANS_ANY2ERR; + + t[IBQPS_INIT].trans = MTHCA_TRANS_RST2INIT; + t[IBQPS_INIT].req_param[UD] = IB_QP_PKEY_INDEX |IB_QP_PORT |IB_QP_QKEY; + t[IBQPS_INIT].req_param[UC] = IB_QP_PKEY_INDEX |IB_QP_PORT |IB_QP_ACCESS_FLAGS; + t[IBQPS_INIT].req_param[RC] = IB_QP_PKEY_INDEX |IB_QP_PORT |IB_QP_ACCESS_FLAGS; + t[IBQPS_INIT].req_param[MLX] = IB_QP_PKEY_INDEX |IB_QP_QKEY; + t[IBQPS_INIT].opt_param[MLX] = IB_QP_PORT; + + /* IBQPS_INIT */ + t = &state_table[IBQPS_INIT][0]; + t[IBQPS_RESET].trans = MTHCA_TRANS_ANY2RST; + t[IBQPS_ERR].trans = MTHCA_TRANS_ANY2ERR; + + t[IBQPS_INIT].trans = MTHCA_TRANS_INIT2INIT; + t[IBQPS_INIT].opt_param[UD] = IB_QP_PKEY_INDEX |IB_QP_PORT |IB_QP_QKEY; + t[IBQPS_INIT].opt_param[UC] = IB_QP_PKEY_INDEX |IB_QP_PORT |IB_QP_ACCESS_FLAGS; + t[IBQPS_INIT].opt_param[RC] = IB_QP_PKEY_INDEX |IB_QP_PORT |IB_QP_ACCESS_FLAGS; + t[IBQPS_INIT].opt_param[MLX] = IB_QP_PKEY_INDEX |IB_QP_QKEY; + + t[IBQPS_RTR].trans = MTHCA_TRANS_INIT2RTR; + t[IBQPS_RTR].req_param[UC] = + IB_QP_AV |IB_QP_PATH_MTU |IB_QP_DEST_QPN |IB_QP_RQ_PSN; + t[IBQPS_RTR].req_param[RC] = + IB_QP_AV |IB_QP_PATH_MTU |IB_QP_DEST_QPN |IB_QP_RQ_PSN |IB_QP_MAX_DEST_RD_ATOMIC |IB_QP_MIN_RNR_TIMER; + t[IBQPS_RTR].opt_param[UD] = IB_QP_PKEY_INDEX |IB_QP_QKEY; + t[IBQPS_RTR].opt_param[UC] = IB_QP_PKEY_INDEX |IB_QP_ALT_PATH |IB_QP_ACCESS_FLAGS; + t[IBQPS_RTR].opt_param[RC] = IB_QP_PKEY_INDEX |IB_QP_ALT_PATH |IB_QP_ACCESS_FLAGS; + t[IBQPS_RTR].opt_param[MLX] = IB_QP_PKEY_INDEX |IB_QP_QKEY; + +/* IBQPS_RTR */ + t = &state_table[IBQPS_RTR][0]; + t[IBQPS_RESET].trans = MTHCA_TRANS_ANY2RST; + t[IBQPS_ERR].trans = MTHCA_TRANS_ANY2ERR; + + t[IBQPS_RTS].trans = MTHCA_TRANS_RTR2RTS; + t[IBQPS_RTS].req_param[UD] = IB_QP_SQ_PSN; + t[IBQPS_RTS].req_param[UC] = IB_QP_SQ_PSN; + t[IBQPS_RTS].req_param[RC] = + IB_QP_TIMEOUT |IB_QP_RETRY_CNT |IB_QP_RNR_RETRY |IB_QP_SQ_PSN |IB_QP_MAX_QP_RD_ATOMIC; + t[IBQPS_RTS].req_param[MLX] = IB_QP_SQ_PSN; + t[IBQPS_RTS].opt_param[UD] = IB_QP_CUR_STATE |IB_QP_QKEY; + t[IBQPS_RTS].opt_param[UC] = + IB_QP_CUR_STATE |IB_QP_ALT_PATH |IB_QP_ACCESS_FLAGS |IB_QP_PATH_MIG_STATE; + t[IBQPS_RTS].opt_param[RC] = IB_QP_CUR_STATE |IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS |IB_QP_MIN_RNR_TIMER |IB_QP_PATH_MIG_STATE; + t[IBQPS_RTS].opt_param[MLX] = IB_QP_CUR_STATE |IB_QP_QKEY; + + /* IBQPS_RTS */ + t = &state_table[IBQPS_RTS][0]; + t[IBQPS_RESET].trans = MTHCA_TRANS_ANY2RST; + t[IBQPS_ERR].trans = MTHCA_TRANS_ANY2ERR; + + t[IBQPS_RTS].trans = MTHCA_TRANS_RTS2RTS; + t[IBQPS_RTS].opt_param[UD] = IB_QP_CUR_STATE |IB_QP_QKEY; + t[IBQPS_RTS].opt_param[UC] = IB_QP_ACCESS_FLAGS |IB_QP_ALT_PATH |IB_QP_PATH_MIG_STATE; + t[IBQPS_RTS].opt_param[RC] = IB_QP_ACCESS_FLAGS | + IB_QP_ALT_PATH |IB_QP_PATH_MIG_STATE |IB_QP_MIN_RNR_TIMER; + t[IBQPS_RTS].opt_param[MLX] = IB_QP_CUR_STATE |IB_QP_QKEY; + + t[IBQPS_SQD].trans = MTHCA_TRANS_RTS2SQD; + t[IBQPS_SQD].opt_param[UD] = IB_QP_EN_SQD_ASYNC_NOTIFY; + t[IBQPS_SQD].opt_param[UC] = IB_QP_EN_SQD_ASYNC_NOTIFY; + t[IBQPS_SQD].opt_param[RC] = IB_QP_EN_SQD_ASYNC_NOTIFY; + t[IBQPS_SQD].opt_param[MLX] = IB_QP_EN_SQD_ASYNC_NOTIFY; + + /* IBQPS_SQD */ + t = &state_table[IBQPS_SQD][0]; + t[IBQPS_RESET].trans = MTHCA_TRANS_ANY2RST; + t[IBQPS_ERR].trans = MTHCA_TRANS_ANY2ERR; + + t[IBQPS_RTS].trans = MTHCA_TRANS_SQD2RTS; + t[IBQPS_RTS].opt_param[UD] = IB_QP_CUR_STATE |IB_QP_QKEY; + t[IBQPS_RTS].opt_param[UC] = IB_QP_CUR_STATE | + IB_QP_ALT_PATH |IB_QP_ACCESS_FLAGS |IB_QP_PATH_MIG_STATE; + t[IBQPS_RTS].opt_param[RC] = IB_QP_CUR_STATE |IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS |IB_QP_MIN_RNR_TIMER |IB_QP_PATH_MIG_STATE; + t[IBQPS_RTS].opt_param[MLX] = IB_QP_CUR_STATE |IB_QP_QKEY; + + t[IBQPS_SQD].trans = MTHCA_TRANS_SQD2SQD; + t[IBQPS_SQD].opt_param[UD] = IB_QP_PKEY_INDEX |IB_QP_QKEY; + t[IBQPS_SQD].opt_param[UC] = IB_QP_AV | IB_QP_CUR_STATE | + IB_QP_ALT_PATH |IB_QP_ACCESS_FLAGS |IB_QP_PKEY_INDEX |IB_QP_PATH_MIG_STATE; + t[IBQPS_SQD].opt_param[RC] = IB_QP_AV |IB_QP_TIMEOUT |IB_QP_RETRY_CNT |IB_QP_RNR_RETRY | + IB_QP_MAX_QP_RD_ATOMIC |IB_QP_MAX_DEST_RD_ATOMIC |IB_QP_CUR_STATE |IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS |IB_QP_PKEY_INDEX |IB_QP_MIN_RNR_TIMER |IB_QP_PATH_MIG_STATE; + t[IBQPS_SQD].opt_param[MLX] = IB_QP_PKEY_INDEX |IB_QP_QKEY; + + /* IBQPS_SQE */ + t = &state_table[IBQPS_SQE][0]; + t[IBQPS_RESET].trans = MTHCA_TRANS_ANY2RST; + t[IBQPS_ERR].trans = MTHCA_TRANS_ANY2ERR; + + t[IBQPS_RTS].trans = MTHCA_TRANS_SQERR2RTS; + t[IBQPS_RTS].opt_param[UD] = IB_QP_CUR_STATE |IB_QP_QKEY; + t[IBQPS_RTS].opt_param[UC] = IB_QP_CUR_STATE | IB_QP_ACCESS_FLAGS; +// t[IBQPS_RTS].opt_param[RC] = IB_QP_CUR_STATE |IB_QP_MIN_RNR_TIMER; + t[IBQPS_RTS].opt_param[MLX] = IB_QP_CUR_STATE |IB_QP_QKEY; + + /* IBQPS_ERR */ + t = &state_table[IBQPS_ERR][0]; + t[IBQPS_RESET].trans = MTHCA_TRANS_ANY2RST; + t[IBQPS_ERR].trans = MTHCA_TRANS_ANY2ERR; + +}; + + +static int is_sqp(struct mthca_dev *dev, struct mthca_qp *qp) +{ + return qp->qpn >= (u32)dev->qp_table.sqp_start && + qp->qpn <= (u32)dev->qp_table.sqp_start + 3; +} + +static int is_qp0(struct mthca_dev *dev, struct mthca_qp *qp) +{ + return qp->qpn >= (u32)dev->qp_table.sqp_start && + qp->qpn <= (u32)(dev->qp_table.sqp_start + 1); +} + + +static void dump_wqe(u32 print_lvl, u32 *wqe_ptr , struct mthca_qp *qp_ptr) +{ + __be32 *wqe = wqe_ptr; + + UNUSED_PARAM_WOWPP(qp_ptr); + UNUSED_PARAM_WOWPP(print_lvl); + + (void) wqe; /* avoid warning if mthca_dbg compiled away... */ + HCA_PRINT(print_lvl,HCA_DBG_QP,("WQE contents QPN 0x%06x \n",qp_ptr->qpn)); + HCA_PRINT(print_lvl,HCA_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",0 + , cl_ntoh32(wqe[0]), cl_ntoh32(wqe[1]), cl_ntoh32(wqe[2]), cl_ntoh32(wqe[3]))); + HCA_PRINT(print_lvl,HCA_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",4 + , cl_ntoh32(wqe[4]), cl_ntoh32(wqe[5]), cl_ntoh32(wqe[6]), cl_ntoh32(wqe[7]))); + HCA_PRINT(print_lvl,HCA_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",8 + , cl_ntoh32(wqe[8]), cl_ntoh32(wqe[9]), cl_ntoh32(wqe[10]), cl_ntoh32(wqe[11]))); + HCA_PRINT(print_lvl,HCA_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",12 + , cl_ntoh32(wqe[12]), cl_ntoh32(wqe[13]), cl_ntoh32(wqe[14]), cl_ntoh32(wqe[15]))); + +} + + +static void *get_recv_wqe(struct mthca_qp *qp, int n) +{ + if (qp->is_direct) + return (u8*)qp->queue.direct.page + (n << qp->rq.wqe_shift); + else + return (u8*)qp->queue.page_list[(n << qp->rq.wqe_shift) >> PAGE_SHIFT].page + + ((n << qp->rq.wqe_shift) & (PAGE_SIZE - 1)); +} + +static void *get_send_wqe(struct mthca_qp *qp, int n) +{ + if (qp->is_direct) + return (u8*)qp->queue.direct.page + qp->send_wqe_offset + + (n << qp->sq.wqe_shift); + else + return (u8*)qp->queue.page_list[(qp->send_wqe_offset + + (n << qp->sq.wqe_shift)) >> + PAGE_SHIFT].page + + ((qp->send_wqe_offset + (n << qp->sq.wqe_shift)) & + (PAGE_SIZE - 1)); +} + +static void mthca_wq_init(struct mthca_wq *wq) +{ + spin_lock_init(&wq->lock); + wq->next_ind = 0; + wq->last_comp = wq->max - 1; + wq->head = 0; + wq->tail = 0; +} + +void mthca_qp_event(struct mthca_dev *dev, u32 qpn, + enum ib_event_type event_type, u8 vendor_code) +{ + struct mthca_qp *qp; + struct ib_event event; + SPIN_LOCK_PREP(lh); + + spin_lock(&dev->qp_table.lock, &lh); + qp = mthca_array_get(&dev->qp_table.qp, qpn & (dev->limits.num_qps - 1)); + if (qp) + atomic_inc(&qp->refcount); + spin_unlock(&lh); + + if (!qp) { + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_QP,("QP %06x Async event for bogus \n", qpn)); + return; + } + + event.device = &dev->ib_dev; + event.event = event_type; + event.element.qp = &qp->ibqp; + event.vendor_specific = vendor_code; + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_QP,("QP %06x Async event event_type 0x%x vendor_code 0x%x\n", + qpn,event_type,vendor_code)); + if (qp->ibqp.event_handler) + qp->ibqp.event_handler(&event, qp->ibqp.qp_context); + + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); +} + +static int to_mthca_state(enum ib_qp_state ib_state) +{ + switch (ib_state) { + case IBQPS_RESET: return MTHCA_QP_STATE_RST; + case IBQPS_INIT: return MTHCA_QP_STATE_INIT; + case IBQPS_RTR: return MTHCA_QP_STATE_RTR; + case IBQPS_RTS: return MTHCA_QP_STATE_RTS; + case IBQPS_SQD: return MTHCA_QP_STATE_SQD; + case IBQPS_SQE: return MTHCA_QP_STATE_SQE; + case IBQPS_ERR: return MTHCA_QP_STATE_ERR; + default: return -1; + } +} + +static int to_mthca_st(int transport) +{ + switch (transport) { + case RC: return MTHCA_QP_ST_RC; + case UC: return MTHCA_QP_ST_UC; + case UD: return MTHCA_QP_ST_UD; + case RD: return MTHCA_QP_ST_RD; + case MLX: return MTHCA_QP_ST_MLX; + default: return -1; + } +} + +static void store_attrs(struct mthca_sqp *sqp, struct ib_qp_attr *attr, + int attr_mask) +{ + if (attr_mask & IB_QP_PKEY_INDEX) + sqp->pkey_index = attr->pkey_index; + if (attr_mask & IB_QP_QKEY) + sqp->qkey = attr->qkey; + if (attr_mask & IB_QP_SQ_PSN) + sqp->send_psn = attr->sq_psn; +} + +static void init_port(struct mthca_dev *dev, int port) +{ + int err; + u8 status; + struct mthca_init_ib_param param; + + RtlZeroMemory(¶m, sizeof param); + + param.port_width = dev->limits.port_width_cap; + param.vl_cap = dev->limits.vl_cap; + param.mtu_cap = dev->limits.mtu_cap; + param.gid_cap = (u16)dev->limits.gid_table_len; + param.pkey_cap = (u16)dev->limits.pkey_table_len; + + err = mthca_INIT_IB(dev, ¶m, port, &status); + if (err) + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("INIT_IB failed, return code %d.\n", err)); + if (status) + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("INIT_IB returned status %02x.\n", status)); +} + + +static __be32 get_hw_access_flags(struct mthca_qp *qp, struct ib_qp_attr *attr, + int attr_mask) +{ + u8 dest_rd_atomic; + u32 access_flags; + u32 hw_access_flags = 0; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + dest_rd_atomic = attr->max_dest_rd_atomic; + else + dest_rd_atomic = qp->resp_depth; + + if (attr_mask & IB_QP_ACCESS_FLAGS) + access_flags = attr->qp_access_flags; + else + access_flags = qp->atomic_rd_en; + + if (!dest_rd_atomic) + access_flags &= MTHCA_ACCESS_REMOTE_WRITE; + + if (access_flags & MTHCA_ACCESS_REMOTE_READ) + hw_access_flags |= MTHCA_QP_BIT_RRE; + if (access_flags & MTHCA_ACCESS_REMOTE_ATOMIC) + hw_access_flags |= MTHCA_QP_BIT_RAE; + if (access_flags & MTHCA_ACCESS_REMOTE_WRITE) + hw_access_flags |= MTHCA_QP_BIT_RWE; + + return cl_hton32(hw_access_flags); +} + +int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) +{ + struct mthca_dev *dev = to_mdev(ibqp->device); + struct mthca_qp *qp = to_mqp(ibqp); + enum ib_qp_state cur_state, new_state; + struct mthca_mailbox *mailbox; + struct mthca_qp_param *qp_param; + struct mthca_qp_context *qp_context; + u32 req_param, opt_param; + u32 sqd_event = 0; + u8 status; + int err = -EINVAL; + SPIN_LOCK_PREP(lhs); + SPIN_LOCK_PREP(lhr); + + down( &qp->mutex ); + + if (attr_mask & IB_QP_CUR_STATE) { + if (attr->cur_qp_state != IBQPS_RTR && + attr->cur_qp_state != IBQPS_RTS && + attr->cur_qp_state != IBQPS_SQD && + attr->cur_qp_state != IBQPS_SQE) + goto out; + else + cur_state = attr->cur_qp_state; + } else { + spin_lock_irq(&qp->sq.lock, &lhs); + spin_lock(&qp->rq.lock, &lhr); + cur_state = qp->state; + spin_unlock(&lhr); + spin_unlock_irq(&lhs); + } + + if (attr_mask & IB_QP_STATE) { + if (attr->qp_state < 0 || attr->qp_state > IBQPS_ERR) + goto out; + new_state = attr->qp_state; + } else + new_state = cur_state; + + if (state_table[cur_state][new_state].trans == MTHCA_TRANS_INVALID) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("Illegal QP transition " + "%d->%d\n", cur_state, new_state)); + goto out; + } + + req_param = state_table[cur_state][new_state].req_param[qp->transport]; + opt_param = state_table[cur_state][new_state].opt_param[qp->transport]; + + if ((req_param & attr_mask) != req_param) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("QP transition " + "%d->%d missing req attr 0x%08x\n", + cur_state, new_state, + req_param & ~attr_mask)); + //NB: IBAL doesn't use all the fields, so we can miss some mandatory flags + goto out; + } + + if (attr_mask & ~(req_param | opt_param | IB_QP_STATE)) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("QP transition (transport %d) " + "%d->%d has extra attr 0x%08x\n", + qp->transport, + cur_state, new_state, + attr_mask & ~(req_param | opt_param | + IB_QP_STATE))); + //NB: The old code sometimes uses optional flags that are not so in this code + goto out; + } + + if ((attr_mask & IB_QP_PKEY_INDEX) && + attr->pkey_index >= dev->limits.pkey_table_len) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("PKey index (%u) too large. max is %d\n", + attr->pkey_index,dev->limits.pkey_table_len-1)); + goto out; + } + + if ((attr_mask & IB_QP_PORT) && + (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("Port number (%u) is invalid\n", attr->port_num)); + goto out; + } + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && + attr->max_rd_atomic > dev->limits.max_qp_init_rdma) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("Max rdma_atomic as initiator %u too large (max is %d)\n", + attr->max_rd_atomic, dev->limits.max_qp_init_rdma)); + goto out; + } + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && + attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("Max rdma_atomic as responder %u too large (max %d)\n", + attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift)); + goto out; + } + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); + goto out; + } + qp_param = mailbox->buf; + qp_context = &qp_param->context; + RtlZeroMemory(qp_param, sizeof *qp_param); + + qp_context->flags = cl_hton32((to_mthca_state(new_state) << 28) | + (to_mthca_st(qp->transport) << 16)); + qp_context->flags |= cl_hton32(MTHCA_QP_BIT_DE); + if (!(attr_mask & IB_QP_PATH_MIG_STATE)) + qp_context->flags |= cl_hton32(MTHCA_QP_PM_MIGRATED << 11); + else { + qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_PM_STATE); + switch (attr->path_mig_state) { + case IB_APM_MIGRATED: + qp_context->flags |= cl_hton32(MTHCA_QP_PM_MIGRATED << 11); + break; + case IB_APM_REARM: + qp_context->flags |= cl_hton32(MTHCA_QP_PM_REARM << 11); + break; + case IB_APM_ARMED: + qp_context->flags |= cl_hton32(MTHCA_QP_PM_ARMED << 11); + break; + } + } + + /* leave tavor_sched_queue as 0 */ + + if (qp->transport == MLX || qp->transport == UD) + qp_context->mtu_msgmax = (IB_MTU_LEN_2048 << 5) | 11; + else if (attr_mask & IB_QP_PATH_MTU) { + if (attr->path_mtu < IB_MTU_LEN_256 || attr->path_mtu > IB_MTU_LEN_2048) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP, + ("path MTU (%u) is invalid\n", attr->path_mtu)); + goto out_mailbox; + } + qp_context->mtu_msgmax = (u8)((attr->path_mtu << 5) | 31); + } + + if (mthca_is_memfree(dev)) { + if (qp->rq.max) + qp_context->rq_size_stride = (u8)(long_log2(qp->rq.max) << 3); + qp_context->rq_size_stride |= qp->rq.wqe_shift - 4; + + if (qp->sq.max) + qp_context->sq_size_stride = (u8)(long_log2(qp->sq.max) << 3); + qp_context->sq_size_stride |= qp->sq.wqe_shift - 4; + } + + /* leave arbel_sched_queue as 0 */ + + if (qp->ibqp.ucontext) + qp_context->usr_page = + cl_hton32(to_mucontext(qp->ibqp.ucontext)->uar.index); + else + qp_context->usr_page = cl_hton32(dev->driver_uar.index); + qp_context->local_qpn = cl_hton32(qp->qpn); + if (attr_mask & IB_QP_DEST_QPN) { + qp_context->remote_qpn = cl_hton32(attr->dest_qp_num); + } + + if (qp->transport == MLX) + qp_context->pri_path.port_pkey |= + cl_hton32(to_msqp(qp)->port << 24); + else { + if (attr_mask & IB_QP_PORT) { + qp_context->pri_path.port_pkey |= + cl_hton32(attr->port_num << 24); + qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_PORT_NUM); + } + } + + if (attr_mask & IB_QP_PKEY_INDEX) { + qp_context->pri_path.port_pkey |= + cl_hton32(attr->pkey_index); + qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_PKEY_INDEX); + } + + if (attr_mask & IB_QP_RNR_RETRY) { + qp_context->pri_path.rnr_retry = attr->rnr_retry << 5; + qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_RNR_RETRY); + } + + if (attr_mask & IB_QP_AV) { + qp_context->pri_path.g_mylmc = attr->ah_attr.src_path_bits & 0x7f; + qp_context->pri_path.rlid = cl_hton16(attr->ah_attr.dlid); + //TODO: work around: set always full speed - really, it's much more complicate + qp_context->pri_path.static_rate = 0; + if (attr->ah_attr.ah_flags & IB_AH_GRH) { + qp_context->pri_path.g_mylmc |= 1 << 7; + qp_context->pri_path.mgid_index = attr->ah_attr.grh.sgid_index; + qp_context->pri_path.hop_limit = attr->ah_attr.grh.hop_limit; + qp_context->pri_path.sl_tclass_flowlabel = + cl_hton32((attr->ah_attr.sl << 28) | + (attr->ah_attr.grh.traffic_class << 20) | + (attr->ah_attr.grh.flow_label)); + memcpy(qp_context->pri_path.rgid, + attr->ah_attr.grh.dgid.raw, 16); + } else { + qp_context->pri_path.sl_tclass_flowlabel = + cl_hton32(attr->ah_attr.sl << 28); + } + qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH); + } + + if (attr_mask & IB_QP_TIMEOUT) { + qp_context->pri_path.ackto = attr->timeout << 3; + qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_ACK_TIMEOUT); + } + + /* XXX alt_path */ + + /* leave rdd as 0 */ + qp_context->pd = cl_hton32(to_mpd(ibqp->pd)->pd_num); + /* leave wqe_base as 0 (we always create an MR based at 0 for WQs) */ + qp_context->wqe_lkey = cl_hton32(qp->mr.ibmr.lkey); + qp_context->params1 = cl_hton32((unsigned long)( + (MTHCA_ACK_REQ_FREQ << 28) | + (MTHCA_FLIGHT_LIMIT << 24) | + MTHCA_QP_BIT_SWE)); + if (qp->sq_policy == IB_SIGNAL_ALL_WR) + qp_context->params1 |= cl_hton32(MTHCA_QP_BIT_SSC); + if (attr_mask & IB_QP_RETRY_CNT) { + qp_context->params1 |= cl_hton32(attr->retry_cnt << 16); + qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_RETRY_COUNT); + } + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { + if (attr->max_rd_atomic) { + qp_context->params1 |= + cl_hton32(MTHCA_QP_BIT_SRE | + MTHCA_QP_BIT_SAE); + qp_context->params1 |= + cl_hton32(fls(attr->max_rd_atomic - 1) << 21); + } + qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_SRA_MAX); + } + + if (attr_mask & IB_QP_SQ_PSN) + qp_context->next_send_psn = cl_hton32(attr->sq_psn); + qp_context->cqn_snd = cl_hton32(to_mcq(ibqp->send_cq)->cqn); + + if (mthca_is_memfree(dev)) { + qp_context->snd_wqe_base_l = cl_hton32(qp->send_wqe_offset); + qp_context->snd_db_index = cl_hton32(qp->sq.db_index); + } + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { + + if (attr->max_dest_rd_atomic) + qp_context->params2 |= + cl_hton32(fls(attr->max_dest_rd_atomic - 1) << 21); + + qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_RRA_MAX); + + } + + if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) { + qp_context->params2 |= get_hw_access_flags(qp, attr, attr_mask); + qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_RWE | + MTHCA_QP_OPTPAR_RRE | + MTHCA_QP_OPTPAR_RAE); + } + + qp_context->params2 |= cl_hton32(MTHCA_QP_BIT_RSC); + + if (ibqp->srq) + qp_context->params2 |= cl_hton32(MTHCA_QP_BIT_RIC); + + if (attr_mask & IB_QP_MIN_RNR_TIMER) { + qp_context->rnr_nextrecvpsn |= cl_hton32(attr->min_rnr_timer << 24); + qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_RNR_TIMEOUT); + } + if (attr_mask & IB_QP_RQ_PSN) + qp_context->rnr_nextrecvpsn |= cl_hton32(attr->rq_psn); + + qp_context->ra_buff_indx = + cl_hton32(dev->qp_table.rdb_base + + ((qp->qpn & (dev->limits.num_qps - 1)) * MTHCA_RDB_ENTRY_SIZE << + dev->qp_table.rdb_shift)); + + qp_context->cqn_rcv = cl_hton32(to_mcq(ibqp->recv_cq)->cqn); + + if (mthca_is_memfree(dev)) + qp_context->rcv_db_index = cl_hton32(qp->rq.db_index); + + if (attr_mask & IB_QP_QKEY) { + qp_context->qkey = cl_hton32(attr->qkey); + qp_param->opt_param_mask |= cl_hton32(MTHCA_QP_OPTPAR_Q_KEY); + } + + if (ibqp->srq) + qp_context->srqn = cl_hton32(1 << 24 | + to_msrq(ibqp->srq)->srqn); + + if (cur_state == IBQPS_RTS && new_state == IBQPS_SQD && + attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && + attr->en_sqd_async_notify) + sqd_event = (u32)(1 << 31); + + err = mthca_MODIFY_QP(dev, state_table[cur_state][new_state].trans, + qp->qpn, 0, mailbox, sqd_event, &status); + if (err) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("mthca_MODIFY_QP returned error (qp-num = 0x%x) returned status %02x " + "cur_state = %d new_state = %d attr_mask = %d req_param = %d opt_param = %d\n", + ibqp->qp_num, status, cur_state, new_state, + attr_mask, req_param, opt_param)); + goto out_mailbox; + } + if (status) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("mthca_MODIFY_QP bad status(qp-num = 0x%x) returned status %02x " + "cur_state = %d new_state = %d attr_mask = %d req_param = %d opt_param = %d\n", + ibqp->qp_num, status, cur_state, new_state, + attr_mask, req_param, opt_param)); + err = -EINVAL; + goto out_mailbox; + } + + qp->state = new_state; + if (attr_mask & IB_QP_ACCESS_FLAGS) + qp->atomic_rd_en = (u8)attr->qp_access_flags; + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + qp->resp_depth = attr->max_dest_rd_atomic; + + if (is_sqp(dev, qp)) + store_attrs(to_msqp(qp), attr, attr_mask); + + /* + * If we moved QP0 to RTR, bring the IB link up; if we moved + * QP0 to RESET or ERROR, bring the link back down. + */ + if (is_qp0(dev, qp)) { + if (cur_state != IBQPS_RTR && + new_state == IBQPS_RTR) + init_port(dev, to_msqp(qp)->port); + + if (cur_state != IBQPS_RESET && + cur_state != IBQPS_ERR && + (new_state == IBQPS_RESET || + new_state == IBQPS_ERR)) + mthca_CLOSE_IB(dev, to_msqp(qp)->port, &status); + } + + /* + * If we moved a kernel QP to RESET, clean up all old CQ + * entries and reinitialize the QP. + */ + if (new_state == IBQPS_RESET && !qp->ibqp.ucontext) { + mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn, + qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); + if (qp->ibqp.send_cq != qp->ibqp.recv_cq) + mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn, + qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); + + mthca_wq_init(&qp->sq); + qp->sq.last = get_send_wqe(qp, qp->sq.max - 1); + mthca_wq_init(&qp->rq); + qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1); + + if (mthca_is_memfree(dev)) { + *qp->sq.db = 0; + *qp->rq.db = 0; + } + } + +out_mailbox: + mthca_free_mailbox(dev, mailbox); + +out: + up( &qp->mutex ); + return err; +} + +static int mthca_max_data_size(struct mthca_dev *dev, struct mthca_qp *qp, int desc_sz) +{ + + /* + * Calculate the maximum size of WQE s/g segments, excluding + * the next segment and other non-data segments. + */ + int max_data_size = desc_sz - sizeof (struct mthca_next_seg); + + switch (qp->transport) { + case MLX: + max_data_size -= 2 * sizeof (struct mthca_data_seg); + break; + + case UD: + if (mthca_is_memfree(dev)) + max_data_size -= sizeof (struct mthca_arbel_ud_seg); + else + max_data_size -= sizeof (struct mthca_tavor_ud_seg); + break; + + default: + max_data_size -= sizeof (struct mthca_raddr_seg); + break; + } + return max_data_size; +} + +static inline int mthca_max_inline_data(int max_data_size) +{ + return max_data_size - MTHCA_INLINE_HEADER_SIZE ; +} + +static void mthca_adjust_qp_caps(struct mthca_dev *dev, + struct mthca_qp *qp) +{ + int max_data_size = mthca_max_data_size(dev, qp, + min(dev->limits.max_desc_sz, 1 << qp->sq.wqe_shift)); + + qp->max_inline_data = mthca_max_inline_data( max_data_size); + + qp->sq.max_gs = min(dev->limits.max_sg, + (int)(max_data_size / sizeof (struct mthca_data_seg))); + qp->rq.max_gs = min(dev->limits.max_sg, + (int)((min(dev->limits.max_desc_sz, 1 << qp->rq.wqe_shift) - + sizeof (struct mthca_next_seg)) / sizeof (struct mthca_data_seg))); +} + +/* + * Allocate and register buffer for WQEs. qp->rq.max, sq.max, + * rq.max_gs and sq.max_gs must all be assigned. + * mthca_alloc_wqe_buf will calculate rq.wqe_shift and + * sq.wqe_shift (as well as send_wqe_offset, is_direct, and + * queue) + */ +static int mthca_alloc_wqe_buf(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_qp *qp) +{ + int size; + int err = -ENOMEM; + + HCA_ENTER(HCA_DBG_QP); + size = sizeof (struct mthca_next_seg) + + qp->rq.max_gs * sizeof (struct mthca_data_seg); + + if (size > dev->limits.max_desc_sz) + return -EINVAL; + + for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size; + qp->rq.wqe_shift++) + ; /* nothing */ + + size = qp->sq.max_gs * sizeof (struct mthca_data_seg); + switch (qp->transport) { + case MLX: + size += 2 * sizeof (struct mthca_data_seg); + break; + + case UD: + size += mthca_is_memfree(dev) ? + sizeof (struct mthca_arbel_ud_seg) : + sizeof (struct mthca_tavor_ud_seg); + break; + + case UC: + size += sizeof (struct mthca_raddr_seg); + break; + + case RC: + size += sizeof (struct mthca_raddr_seg); + /* + * An atomic op will require an atomic segment, a + * remote address segment and one scatter entry. + */ + size = max(size, + sizeof (struct mthca_atomic_seg) + + sizeof (struct mthca_raddr_seg) + + sizeof (struct mthca_data_seg)); + break; + + default: + break; + } + + /* Make sure that we have enough space for a bind request */ + size = max(size, sizeof (struct mthca_bind_seg)); + + size += sizeof (struct mthca_next_seg); + + if (size > dev->limits.max_desc_sz) + return -EINVAL; + + for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size; + qp->sq.wqe_shift++) + ; /* nothing */ + + qp->send_wqe_offset = ALIGN(qp->rq.max << qp->rq.wqe_shift, + 1 << qp->sq.wqe_shift); + + /* + * If this is a userspace QP, we don't actually have to + * allocate anything. All we need is to calculate the WQE + * sizes and the send_wqe_offset, so we're done now. + */ + if (pd->ibpd.ucontext) + return 0; + + size = (int)(LONG_PTR)NEXT_PAGE_ALIGN(qp->send_wqe_offset + + (qp->sq.max << qp->sq.wqe_shift)); + + qp->wrid = kmalloc((qp->rq.max + qp->sq.max) * sizeof (u64), + GFP_KERNEL); + if (!qp->wrid) + goto err_out; + + err = mthca_buf_alloc(dev, size, MTHCA_MAX_DIRECT_QP_SIZE, + &qp->queue, &qp->is_direct, pd, 0, &qp->mr); + if (err) + goto err_out; + + HCA_EXIT(HCA_DBG_QP); + return 0; + +err_out: + kfree(qp->wrid); + return err; +} + +static void mthca_free_wqe_buf(struct mthca_dev *dev, + struct mthca_qp *qp) +{ + mthca_buf_free(dev, (int)(LONG_PTR)NEXT_PAGE_ALIGN(qp->send_wqe_offset + + (qp->sq.max << qp->sq.wqe_shift)), + &qp->queue, qp->is_direct, &qp->mr); + kfree(qp->wrid); +} + +static int mthca_map_memfree(struct mthca_dev *dev, + struct mthca_qp *qp) +{ + int ret; + + if (mthca_is_memfree(dev)) { + ret = mthca_table_get(dev, dev->qp_table.qp_table, qp->qpn); + if (ret) + return ret; + + ret = mthca_table_get(dev, dev->qp_table.eqp_table, qp->qpn); + if (ret) + goto err_qpc; + + ret = mthca_table_get(dev, dev->qp_table.rdb_table, + qp->qpn << dev->qp_table.rdb_shift); + if (ret) + goto err_eqpc; + + } + + return 0; + +err_eqpc: + mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn); + +err_qpc: + mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn); + + return ret; +} + +static void mthca_unmap_memfree(struct mthca_dev *dev, + struct mthca_qp *qp) +{ + mthca_table_put(dev, dev->qp_table.rdb_table, + qp->qpn << dev->qp_table.rdb_shift); + mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn); + mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn); +} + +static int mthca_alloc_memfree(struct mthca_dev *dev, + struct mthca_qp *qp) +{ + int ret = 0; + + if (mthca_is_memfree(dev)) { + qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ, + qp->qpn, &qp->rq.db); + if (qp->rq.db_index < 0) + return qp->rq.db_index; + + qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ, + qp->qpn, &qp->sq.db); + if (qp->sq.db_index < 0){ + mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index); + return qp->sq.db_index; + } + + } + + return ret; +} + +static void mthca_free_memfree(struct mthca_dev *dev, + struct mthca_qp *qp) +{ + if (mthca_is_memfree(dev)) { + mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index); + mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index); + } +} + +static int mthca_alloc_qp_common(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_cq *send_cq, + struct mthca_cq *recv_cq, + enum ib_sig_type send_policy, + struct mthca_qp *qp) +{ + int ret; + int i; + + atomic_set(&qp->refcount, 1); + init_waitqueue_head(&qp->wait); + KeInitializeMutex(&qp->mutex, 0); + + qp->state = IBQPS_RESET; + qp->atomic_rd_en = 0; + qp->resp_depth = 0; + qp->sq_policy = send_policy; + mthca_wq_init(&qp->sq); + mthca_wq_init(&qp->rq); + + UNREFERENCED_PARAMETER(send_cq); + UNREFERENCED_PARAMETER(recv_cq); + + ret = mthca_map_memfree(dev, qp); + if (ret) + return ret; + + ret = mthca_alloc_wqe_buf(dev, pd, qp); + if (ret) { + mthca_unmap_memfree(dev, qp); + return ret; + } + + mthca_adjust_qp_caps(dev, qp); + + /* + * If this is a userspace QP, we're done now. The doorbells + * will be allocated and buffers will be initialized in + * userspace. + */ + if (pd->ibpd.ucontext) + return 0; + + ret = mthca_alloc_memfree(dev, qp); + if (ret) { + mthca_free_wqe_buf(dev, qp); + mthca_unmap_memfree(dev, qp); + return ret; + } + + if (mthca_is_memfree(dev)) { + struct mthca_next_seg *next; + struct mthca_data_seg *scatter; + int size = (sizeof (struct mthca_next_seg) + + qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16; + + for (i = 0; i < qp->rq.max; ++i) { + next = get_recv_wqe(qp, i); + next->nda_op = cl_hton32(((i + 1) & (qp->rq.max - 1)) << + qp->rq.wqe_shift); + next->ee_nds = cl_hton32(size); + + for (scatter = (void *) (next + 1); + (void *) scatter < (void *) ((u8*)next + (1 << qp->rq.wqe_shift)); + ++scatter) + scatter->lkey = cl_hton32(MTHCA_INVAL_LKEY); + } + + for (i = 0; i < qp->sq.max; ++i) { + next = get_send_wqe(qp, i); + next->nda_op = cl_hton32((((i + 1) & (qp->sq.max - 1)) << + qp->sq.wqe_shift) + + qp->send_wqe_offset); + } + } + + qp->sq.last = get_send_wqe(qp, qp->sq.max - 1); + qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1); + + return 0; +} + +static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap, + struct mthca_qp *qp) +{ + int max_data_size = mthca_max_data_size(dev, qp, dev->limits.max_desc_sz); + + /* Sanity check QP size before proceeding */ + if (cap->max_send_wr > (u32)dev->limits.max_wqes || + cap->max_recv_wr > (u32)dev->limits.max_wqes || + cap->max_send_sge > (u32)dev->limits.max_sg || + cap->max_recv_sge > (u32)dev->limits.max_sg || + cap->max_inline_data > (u32)mthca_max_inline_data(max_data_size)) + return -EINVAL; + + /* + * For MLX transport we need 2 extra S/G entries: + * one for the header and one for the checksum at the end + */ + if (qp->transport == MLX && cap->max_recv_sge + 2 > (u32)dev->limits.max_sg) + return -EINVAL; + + if (mthca_is_memfree(dev)) { + qp->rq.max = cap->max_recv_wr ? + roundup_pow_of_two(cap->max_recv_wr) : 0; + qp->sq.max = cap->max_send_wr ? + roundup_pow_of_two(cap->max_send_wr) : 0; + } else { + qp->rq.max = cap->max_recv_wr; + qp->sq.max = cap->max_send_wr; + } + + qp->rq.max_gs = cap->max_recv_sge; + qp->sq.max_gs = MAX(cap->max_send_sge, + ALIGN(cap->max_inline_data + MTHCA_INLINE_HEADER_SIZE, + MTHCA_INLINE_CHUNK_SIZE) / + (int)sizeof (struct mthca_data_seg)); + + return 0; +} + +int mthca_alloc_qp(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_cq *send_cq, + struct mthca_cq *recv_cq, + enum ib_qp_type_t type, + enum ib_sig_type send_policy, + struct ib_qp_cap *cap, + struct mthca_qp *qp) +{ + int err; + SPIN_LOCK_PREP(lh); + + switch (type) { + case IB_QPT_RELIABLE_CONN: qp->transport = RC; break; + case IB_QPT_UNRELIABLE_CONN: qp->transport = UC; break; + case IB_QPT_UNRELIABLE_DGRM: qp->transport = UD; break; + default: return -EINVAL; + } + + err = mthca_set_qp_size(dev, cap, qp); + if (err) + return err; + + qp->qpn = mthca_alloc(&dev->qp_table.alloc); + if (qp->qpn == -1) + return -ENOMEM; + + err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq, + send_policy, qp); + if (err) { + mthca_free(&dev->qp_table.alloc, qp->qpn); + return err; + } + + spin_lock_irq(&dev->qp_table.lock, &lh); + mthca_array_set(&dev->qp_table.qp, + qp->qpn & (dev->limits.num_qps - 1), qp); + spin_unlock_irq(&lh); + + return 0; +} + +int mthca_alloc_sqp(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_cq *send_cq, + struct mthca_cq *recv_cq, + enum ib_sig_type send_policy, + struct ib_qp_cap *cap, + int qpn, + int port, + struct mthca_sqp *sqp) +{ + u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1; + int err; + SPIN_LOCK_PREP(lhs); + SPIN_LOCK_PREP(lhr); + SPIN_LOCK_PREP(lht); + + err = mthca_set_qp_size(dev, cap, &sqp->qp); + if (err) + return err; + + alloc_dma_zmem_map(dev, + sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE, + PCI_DMA_BIDIRECTIONAL, + &sqp->sg); + if (!sqp->sg.page) + return -ENOMEM; + + spin_lock_irq(&dev->qp_table.lock, &lht); + if (mthca_array_get(&dev->qp_table.qp, mqpn)) + err = -EBUSY; + else + mthca_array_set(&dev->qp_table.qp, mqpn, sqp); + spin_unlock_irq(&lht); + + if (err) + goto err_out; + + sqp->port = port; + sqp->qp.qpn = mqpn; + sqp->qp.transport = MLX; + + err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq, + send_policy, &sqp->qp); + if (err) + goto err_out_free; + + atomic_inc(&pd->sqp_count); + + return 0; + + err_out_free: + /* + * Lock CQs here, so that CQ polling code can do QP lookup + * without taking a lock. + */ + spin_lock_irq(&send_cq->lock, &lhs); + if (send_cq != recv_cq) + spin_lock(&recv_cq->lock, &lhr); + + spin_lock(&dev->qp_table.lock, &lht); + mthca_array_clear(&dev->qp_table.qp, mqpn); + spin_unlock(&lht); + + if (send_cq != recv_cq) + spin_unlock(&lhr); + spin_unlock_irq(&lhs); + + err_out: + free_dma_mem_map(dev, &sqp->sg, PCI_DMA_BIDIRECTIONAL); + + return err; +} + +void mthca_free_qp(struct mthca_dev *dev, + struct mthca_qp *qp) +{ + u8 status; + struct mthca_cq *send_cq; + struct mthca_cq *recv_cq; + SPIN_LOCK_PREP(lhs); + SPIN_LOCK_PREP(lhr); + SPIN_LOCK_PREP(lht); + + send_cq = to_mcq(qp->ibqp.send_cq); + recv_cq = to_mcq(qp->ibqp.recv_cq); + + /* + * Lock CQs here, so that CQ polling code can do QP lookup + * without taking a lock. + */ + spin_lock_irq(&send_cq->lock, &lhs); + if (send_cq != recv_cq) + spin_lock(&recv_cq->lock, &lhr); + + spin_lock(&dev->qp_table.lock, &lht); + mthca_array_clear(&dev->qp_table.qp, + qp->qpn & (dev->limits.num_qps - 1)); + spin_unlock(&lht); + + if (send_cq != recv_cq) + spin_unlock(&lhr); + spin_unlock_irq(&lhs); + + atomic_dec(&qp->refcount); + wait_event(&qp->wait, !atomic_read(&qp->refcount)); + + if (qp->state != IBQPS_RESET) { + mthca_MODIFY_QP(dev, MTHCA_TRANS_ANY2RST, qp->qpn, 0, NULL, 0, &status); + } + + /* + * If this is a userspace QP, the buffers, MR, CQs and so on + * will be cleaned up in userspace, so all we have to do is + * unref the mem-free tables and free the QPN in our table. + */ + if (!qp->ibqp.ucontext) { + mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn, + qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); + if (qp->ibqp.send_cq != qp->ibqp.recv_cq) + mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn, + qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); + + mthca_free_memfree(dev, qp); + mthca_free_wqe_buf(dev, qp); + } + + mthca_unmap_memfree(dev, qp); + + if (is_sqp(dev, qp)) { + atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count)); + free_dma_mem_map(dev, &to_msqp(qp)->sg, PCI_DMA_BIDIRECTIONAL); + } else + mthca_free(&dev->qp_table.alloc, qp->qpn); +} + +static enum mthca_wr_opcode conv_ibal_wr_opcode(struct _ib_send_wr *wr) +{ + + enum mthca_wr_opcode opcode = -1; //= wr->wr_type; + + switch (wr->wr_type) { + case WR_SEND: + opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_SEND_IMM : MTHCA_OPCODE_SEND; + break; + case WR_RDMA_WRITE: + opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_RDMA_WRITE_IMM : MTHCA_OPCODE_RDMA_WRITE; + break; + case WR_RDMA_READ: opcode = MTHCA_OPCODE_RDMA_READ; break; + case WR_COMPARE_SWAP: opcode = MTHCA_OPCODE_ATOMIC_CS; break; + case WR_FETCH_ADD: opcode = MTHCA_OPCODE_ATOMIC_FA; break; + default: opcode = MTHCA_OPCODE_INVALID;break; + } + return opcode; +} + +/* Create UD header for an MLX send and build a data segment for it */ +static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, + int ind, struct _ib_send_wr *wr, + struct mthca_mlx_seg *mlx, + struct mthca_data_seg *data) +{ + enum ib_wr_opcode opcode = conv_ibal_wr_opcode(wr); + int header_size; + int err; + u16 pkey; + CPU_2_BE64_PREP; + + if (!wr->dgrm.ud.h_av) { + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_AV, + ("absent AV in send wr %p\n", wr)); + return -EINVAL; + } + + ib_ud_header_init(256, /* assume a MAD */ + mthca_ah_grh_present(to_mah((struct ib_ah *)wr->dgrm.ud.h_av)), + &sqp->ud_header); + + err = mthca_read_ah(dev, to_mah((struct ib_ah *)wr->dgrm.ud.h_av), &sqp->ud_header); + if (err){ + HCA_PRINT(TRACE_LEVEL_ERROR , HCA_DBG_AV, ("read av error%p\n", + to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->av)); + return err; + } + mlx->flags &= ~cl_hton32(MTHCA_NEXT_SOLICIT | 1); + mlx->flags |= cl_hton32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) | + (sqp->ud_header.lrh.destination_lid == + IB_LID_PERMISSIVE ? MTHCA_MLX_SLR : 0) | + (sqp->ud_header.lrh.service_level << 8)); + mlx->rlid = sqp->ud_header.lrh.destination_lid; + mlx->vcrc = 0; + + switch (opcode) { + case MTHCA_OPCODE_SEND: + sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; + sqp->ud_header.immediate_present = 0; + break; + case MTHCA_OPCODE_SEND_IMM: + sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; + sqp->ud_header.immediate_present = 1; + sqp->ud_header.immediate_data = wr->immediate_data; + break; + default: + return -EINVAL; + } + + sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; + if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) + sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; + sqp->ud_header.bth.solicited_event = (u8)!!(wr->send_opt & IB_SEND_OPT_SOLICITED); + if (!sqp->qp.ibqp.qp_num) + ib_get_cached_pkey(&dev->ib_dev, (u8)sqp->port, + sqp->pkey_index, &pkey); + else + ib_get_cached_pkey(&dev->ib_dev, (u8)sqp->port, + wr->dgrm.ud.pkey_index, &pkey); + sqp->ud_header.bth.pkey = cl_hton16(pkey); + sqp->ud_header.bth.destination_qpn = wr->dgrm.ud.remote_qp; + sqp->ud_header.bth.psn = cl_hton32((sqp->send_psn++) & ((1 << 24) - 1)); + sqp->ud_header.deth.qkey = wr->dgrm.ud.remote_qkey & 0x00000080 ? + cl_hton32(sqp->qkey) : wr->dgrm.ud.remote_qkey; + sqp->ud_header.deth.source_qpn = cl_hton32(sqp->qp.ibqp.qp_num); + + header_size = ib_ud_header_pack(&sqp->ud_header, + (u8*)sqp->sg.page + + ind * MTHCA_UD_HEADER_SIZE); + + data->byte_count = cl_hton32(header_size); + data->lkey = cl_hton32(to_mpd(sqp->qp.ibqp.pd)->ntmr.ibmr.lkey); + data->addr = CPU_2_BE64(sqp->sg.dma_address + + ind * MTHCA_UD_HEADER_SIZE); + + return 0; +} + +static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq, + struct ib_cq *ib_cq) +{ + unsigned cur; + struct mthca_cq *cq; + SPIN_LOCK_PREP(lh); + + cur = wq->head - wq->tail; + if (likely((int)cur + nreq < wq->max)) + return 0; + + cq = to_mcq(ib_cq); + spin_lock_dpc(&cq->lock, &lh); + cur = wq->head - wq->tail; + spin_unlock_dpc(&lh); + + return (int)cur + nreq >= wq->max; +} + +int mthca_tavor_post_send(struct ib_qp *ibqp, struct _ib_send_wr *wr, + struct _ib_send_wr **bad_wr) +{ + struct mthca_dev *dev = to_mdev(ibqp->device); + struct mthca_qp *qp = to_mqp(ibqp); + u8 *wqe; + u8 *prev_wqe; + int err = 0; + int nreq; + int i; + int size; + int size0 = 0; + u32 f0 = unlikely(wr->send_opt & IB_SEND_OPT_FENCE) ? MTHCA_SEND_DOORBELL_FENCE : 0; + int ind; + u8 op0 = 0; + enum ib_wr_opcode opcode; + SPIN_LOCK_PREP(lh); + + spin_lock_irqsave(&qp->sq.lock, &lh); + + /* XXX check that state is OK to post send */ + + ind = qp->sq.next_ind; + + for (nreq = 0; wr; ++nreq, wr = wr->p_next) { + if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP,("SQ %06x full (%u head, %u tail," + " %d max, %d nreq)\n", qp->qpn, + qp->sq.head, qp->sq.tail, + qp->sq.max, nreq)); + err = -ENOMEM; + if (bad_wr) + *bad_wr = wr; + goto out; + } + + wqe = get_send_wqe(qp, ind); + prev_wqe = qp->sq.last; + qp->sq.last = wqe; + opcode = conv_ibal_wr_opcode(wr); + + ((struct mthca_next_seg *) wqe)->nda_op = 0; + ((struct mthca_next_seg *) wqe)->ee_nds = 0; + ((struct mthca_next_seg *) wqe)->flags = + ((wr->send_opt & IB_SEND_OPT_SIGNALED) ? + cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) | + ((wr->send_opt & IB_SEND_OPT_SOLICITED) ? + cl_hton32(MTHCA_NEXT_SOLICIT) : 0) | + cl_hton32(1); + if (opcode == MTHCA_OPCODE_SEND_IMM|| + opcode == MTHCA_OPCODE_RDMA_WRITE_IMM) + ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data; + + wqe += sizeof (struct mthca_next_seg); + size = sizeof (struct mthca_next_seg) / 16; + + switch (qp->transport) { + case RC: + switch (opcode) { + case MTHCA_OPCODE_ATOMIC_CS: + case MTHCA_OPCODE_ATOMIC_FA: + ((struct mthca_raddr_seg *) wqe)->raddr = + cl_hton64(wr->remote_ops.vaddr); + ((struct mthca_raddr_seg *) wqe)->rkey = + wr->remote_ops.rkey; + ((struct mthca_raddr_seg *) wqe)->reserved = 0; + + wqe += sizeof (struct mthca_raddr_seg); + + if (opcode == MTHCA_OPCODE_ATOMIC_CS) { + ((struct mthca_atomic_seg *) wqe)->swap_add = + cl_hton64(wr->remote_ops.atomic2); + ((struct mthca_atomic_seg *) wqe)->compare = + cl_hton64(wr->remote_ops.atomic1); + } else { + ((struct mthca_atomic_seg *) wqe)->swap_add = + cl_hton64(wr->remote_ops.atomic1); + ((struct mthca_atomic_seg *) wqe)->compare = 0; + } + + wqe += sizeof (struct mthca_atomic_seg); + size += (sizeof (struct mthca_raddr_seg) + + sizeof (struct mthca_atomic_seg)) / 16 ; + break; + + case MTHCA_OPCODE_RDMA_READ: + case MTHCA_OPCODE_RDMA_WRITE: + case MTHCA_OPCODE_RDMA_WRITE_IMM: + ((struct mthca_raddr_seg *) wqe)->raddr = + cl_hton64(wr->remote_ops.vaddr); + ((struct mthca_raddr_seg *) wqe)->rkey = + wr->remote_ops.rkey; + ((struct mthca_raddr_seg *) wqe)->reserved = 0; + wqe += sizeof (struct mthca_raddr_seg); + size += sizeof (struct mthca_raddr_seg) / 16; + break; + + default: + /* No extra segments required for sends */ + break; + } + + break; + + case UC: + switch (opcode) { + case MTHCA_OPCODE_RDMA_WRITE: + case MTHCA_OPCODE_RDMA_WRITE_IMM: + ((struct mthca_raddr_seg *) wqe)->raddr = + cl_hton64(wr->remote_ops.vaddr); + ((struct mthca_raddr_seg *) wqe)->rkey = + wr->remote_ops.rkey; + ((struct mthca_raddr_seg *) wqe)->reserved = 0; + wqe += sizeof (struct mthca_raddr_seg); + size += sizeof (struct mthca_raddr_seg) / 16; + break; + + default: + /* No extra segments required for sends */ + break; + } + + break; + + case UD: + ((struct mthca_tavor_ud_seg *) wqe)->lkey = + cl_hton32(to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->key); + ((struct mthca_tavor_ud_seg *) wqe)->av_addr = + cl_hton64(to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->avdma); + ((struct mthca_tavor_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp; + ((struct mthca_tavor_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey; + + wqe += sizeof (struct mthca_tavor_ud_seg); + size += sizeof (struct mthca_tavor_ud_seg) / 16; + break; + + case MLX: + err = build_mlx_header(dev, to_msqp(qp), ind, wr, + (void*)(wqe - sizeof (struct mthca_next_seg)), + (void*)wqe); + if (err) { + if (bad_wr) + *bad_wr = wr; + goto out; + } + wqe += sizeof (struct mthca_data_seg); + size += sizeof (struct mthca_data_seg) / 16; + break; + } + + if ((int)(int)wr->num_ds > qp->sq.max_gs) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("SQ %06x too many gathers\n",qp->qpn)); + err = -EINVAL; + if (bad_wr) + *bad_wr = wr; + goto out; + } + if (wr->send_opt & IB_SEND_OPT_INLINE) { + if (wr->num_ds) { + struct mthca_inline_seg *seg = (struct mthca_inline_seg *)wqe; + uint32_t s = 0; + + wqe += sizeof *seg; + for (i = 0; i < (int)wr->num_ds; ++i) { + struct _ib_local_ds *sge = &wr->ds_array[i]; + + s += sge->length; + + if (s > (uint32_t)qp->max_inline_data) { + err = -EINVAL; + if (bad_wr) + *bad_wr = wr; + goto out; + } + + memcpy(wqe, (void *) (ULONG_PTR) sge->vaddr, + sge->length); + wqe += sge->length; + } + + seg->byte_count = cl_hton32(MTHCA_INLINE_SEG | s); + size += align(s + sizeof *seg, 16) / 16; + } + } else { + + for (i = 0; i < (int)wr->num_ds; ++i) { + ((struct mthca_data_seg *) wqe)->byte_count = + cl_hton32(wr->ds_array[i].length); + ((struct mthca_data_seg *) wqe)->lkey = + cl_hton32(wr->ds_array[i].lkey); + ((struct mthca_data_seg *) wqe)->addr = + cl_hton64(wr->ds_array[i].vaddr); + wqe += sizeof (struct mthca_data_seg); + size += sizeof (struct mthca_data_seg) / 16; + HCA_PRINT(TRACE_LEVEL_VERBOSE ,HCA_DBG_QP ,("SQ %06x [%02x] lkey 0x%08x vaddr 0x%I64x 0x%x\n",qp->qpn,i, + (wr->ds_array[i].lkey),(wr->ds_array[i].vaddr),wr->ds_array[i].length)); + } + } + + /* Add one more inline data segment for ICRC */ + if (qp->transport == MLX) { + ((struct mthca_data_seg *) wqe)->byte_count = + cl_hton32((unsigned long)((1 << 31) | 4)); + ((u32 *) wqe)[1] = 0; + wqe += sizeof (struct mthca_data_seg); + size += sizeof (struct mthca_data_seg) / 16; + } + + qp->wrid[ind + qp->rq.max] = wr->wr_id; + + if (opcode == MTHCA_OPCODE_INVALID) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("SQ %06x opcode invalid\n",qp->qpn)); + err = -EINVAL; + if (bad_wr) + *bad_wr = wr; + goto out; + } + + ((struct mthca_next_seg *) prev_wqe)->nda_op = + cl_hton32(((ind << qp->sq.wqe_shift) + + qp->send_wqe_offset) |opcode); + wmb(); + ((struct mthca_next_seg *) prev_wqe)->ee_nds = + cl_hton32((size0 ? 0 : MTHCA_NEXT_DBD) | size | + ((wr->send_opt & IB_SEND_OPT_FENCE) ? + MTHCA_NEXT_FENCE : 0)); + + if (!size0) { + size0 = size; + op0 = opcode; + } + + dump_wqe( TRACE_LEVEL_VERBOSE, (u32*)qp->sq.last,qp); + + ++ind; + if (unlikely(ind >= qp->sq.max)) + ind -= qp->sq.max; + } + +out: + if (likely(nreq)) { + __be32 doorbell[2]; + + doorbell[0] = cl_hton32(((qp->sq.next_ind << qp->sq.wqe_shift) + + qp->send_wqe_offset) | f0 | op0); + doorbell[1] = cl_hton32((qp->qpn << 8) | size0); + + wmb(); + + mthca_write64(doorbell, + dev->kar + MTHCA_SEND_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + } + + qp->sq.next_ind = ind; + qp->sq.head += nreq; + + spin_unlock_irqrestore(&lh); + return err; +} + +int mthca_tavor_post_recv(struct ib_qp *ibqp, struct _ib_recv_wr *wr, + struct _ib_recv_wr **bad_wr) +{ + struct mthca_dev *dev = to_mdev(ibqp->device); + struct mthca_qp *qp = to_mqp(ibqp); + __be32 doorbell[2]; + int err = 0; + int nreq; + int i; + int size; + int size0 = 0; + int ind; + u8 *wqe; + u8 *prev_wqe; + SPIN_LOCK_PREP(lh); + + spin_lock_irqsave(&qp->rq.lock, &lh); + + /* XXX check that state is OK to post receive */ + + ind = qp->rq.next_ind; + + for (nreq = 0; wr; ++nreq, wr = wr->p_next) { + if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) { + nreq = 0; + + doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0); + doorbell[1] = cl_hton32(qp->qpn << 8); + + wmb(); + + mthca_write64(doorbell, dev->kar + MTHCA_RECV_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + + qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB; + size0 = 0; + } + if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP,("RQ %06x full (%u head, %u tail," + " %d max, %d nreq)\n", qp->qpn, + qp->rq.head, qp->rq.tail, + qp->rq.max, nreq)); + err = -ENOMEM; + if (bad_wr) + *bad_wr = wr; + goto out; + } + + wqe = get_recv_wqe(qp, ind); + prev_wqe = qp->rq.last; + qp->rq.last = wqe; + + ((struct mthca_next_seg *) wqe)->nda_op = 0; + ((struct mthca_next_seg *) wqe)->ee_nds = + cl_hton32(MTHCA_NEXT_DBD); + ((struct mthca_next_seg *) wqe)->flags = 0; + + wqe += sizeof (struct mthca_next_seg); + size = sizeof (struct mthca_next_seg) / 16; + + if (unlikely((int)wr->num_ds > qp->rq.max_gs)) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("RQ %06x too many gathers\n",qp->qpn)); + err = -EINVAL; + if (bad_wr) + *bad_wr = wr; + goto out; + } + + for (i = 0; i < (int)wr->num_ds; ++i) { + ((struct mthca_data_seg *) wqe)->byte_count = + cl_hton32(wr->ds_array[i].length); + ((struct mthca_data_seg *) wqe)->lkey = + cl_hton32(wr->ds_array[i].lkey); + ((struct mthca_data_seg *) wqe)->addr = + cl_hton64(wr->ds_array[i].vaddr); + wqe += sizeof (struct mthca_data_seg); + size += sizeof (struct mthca_data_seg) / 16; +// HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("RQ %06x [%02x] lkey 0x%08x vaddr 0x%I64x 0x %x 0x%08x\n",i,qp->qpn, +// (wr->ds_array[i].lkey),(wr->ds_array[i].vaddr),wr->ds_array[i].length, wr->wr_id)); + } + + qp->wrid[ind] = wr->wr_id; + + ((struct mthca_next_seg *) prev_wqe)->nda_op = + cl_hton32((ind << qp->rq.wqe_shift) | 1); + wmb(); + ((struct mthca_next_seg *) prev_wqe)->ee_nds = + cl_hton32(MTHCA_NEXT_DBD | size); + + if (!size0) + size0 = size; + + dump_wqe(TRACE_LEVEL_VERBOSE, (u32*)wqe ,qp); + + ++ind; + if (unlikely(ind >= qp->rq.max)) + ind -= qp->rq.max; + } + +out: + if (likely(nreq)) { + doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0); + doorbell[1] = cl_hton32((qp->qpn << 8) | (nreq & 255)); + + wmb(); + + mthca_write64(doorbell, dev->kar + MTHCA_RECV_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + } + + qp->rq.next_ind = ind; + qp->rq.head += nreq; + + spin_unlock_irqrestore(&lh); + return err; +} + +int mthca_arbel_post_send(struct ib_qp *ibqp, struct _ib_send_wr *wr, + struct _ib_send_wr **bad_wr) +{ + struct mthca_dev *dev = to_mdev(ibqp->device); + struct mthca_qp *qp = to_mqp(ibqp); + __be32 doorbell[2]; + u8 *wqe; + u8 *prev_wqe; + int err = 0; + int nreq; + int i; + int size; + int size0 = 0; + u32 f0 = unlikely(wr->send_opt & IB_SEND_OPT_FENCE) ? MTHCA_SEND_DOORBELL_FENCE : 0; + int ind; + u8 op0 = 0; + enum ib_wr_opcode opcode; + SPIN_LOCK_PREP(lh); + + spin_lock_irqsave(&qp->sq.lock, &lh); + + /* XXX check that state is OK to post send */ + + ind = qp->sq.head & (qp->sq.max - 1); + + for (nreq = 0; wr; ++nreq, wr = wr->p_next) { + if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) { + nreq = 0; + doorbell[0] = cl_hton32((MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) | + ((qp->sq.head & 0xffff) << 8) |f0 | op0); + doorbell[1] = cl_hton32((qp->qpn << 8) | size0); + qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB; + size0 = 0; + f0 = unlikely(wr->send_opt & IB_SEND_OPT_FENCE) ? MTHCA_SEND_DOORBELL_FENCE : 0; + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + *qp->sq.db = cl_hton32(qp->sq.head & 0xffff); + + /* + * Make sure doorbell record is written before we + * write MMIO send doorbell. + */ + wmb(); + mthca_write64(doorbell, dev->kar + MTHCA_SEND_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + } + + if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP,("SQ %06x full (%u head, %u tail," + " %d max, %d nreq)\n", qp->qpn, + qp->sq.head, qp->sq.tail, + qp->sq.max, nreq)); + err = -ENOMEM; + if (bad_wr) + *bad_wr = wr; + goto out; + } + + wqe = get_send_wqe(qp, ind); + prev_wqe = qp->sq.last; + qp->sq.last = wqe; + opcode = conv_ibal_wr_opcode(wr); + + ((struct mthca_next_seg *) wqe)->flags = + ((wr->send_opt & IB_SEND_OPT_SIGNALED) ? + cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) | + ((wr->send_opt & IB_SEND_OPT_SOLICITED) ? + cl_hton32(MTHCA_NEXT_SOLICIT) : 0) | + cl_hton32(1); + if (opcode == MTHCA_OPCODE_SEND_IMM|| + opcode == MTHCA_OPCODE_RDMA_WRITE_IMM) + ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data; + + wqe += sizeof (struct mthca_next_seg); + size = sizeof (struct mthca_next_seg) / 16; + + switch (qp->transport) { + case RC: + switch (opcode) { + case MTHCA_OPCODE_ATOMIC_CS: + case MTHCA_OPCODE_ATOMIC_FA: + ((struct mthca_raddr_seg *) wqe)->raddr = + cl_hton64(wr->remote_ops.vaddr); + ((struct mthca_raddr_seg *) wqe)->rkey = + wr->remote_ops.rkey; + ((struct mthca_raddr_seg *) wqe)->reserved = 0; + + wqe += sizeof (struct mthca_raddr_seg); + + if (opcode == MTHCA_OPCODE_ATOMIC_CS) { + ((struct mthca_atomic_seg *) wqe)->swap_add = + cl_hton64(wr->remote_ops.atomic2); + ((struct mthca_atomic_seg *) wqe)->compare = + cl_hton64(wr->remote_ops.atomic1); + } else { + ((struct mthca_atomic_seg *) wqe)->swap_add = + cl_hton64(wr->remote_ops.atomic1); + ((struct mthca_atomic_seg *) wqe)->compare = 0; + } + + wqe += sizeof (struct mthca_atomic_seg); + size += (sizeof (struct mthca_raddr_seg) + + sizeof (struct mthca_atomic_seg)) / 16 ; + break; + + case MTHCA_OPCODE_RDMA_READ: + case MTHCA_OPCODE_RDMA_WRITE: + case MTHCA_OPCODE_RDMA_WRITE_IMM: + ((struct mthca_raddr_seg *) wqe)->raddr = + cl_hton64(wr->remote_ops.vaddr); + ((struct mthca_raddr_seg *) wqe)->rkey = + wr->remote_ops.rkey; + ((struct mthca_raddr_seg *) wqe)->reserved = 0; + wqe += sizeof (struct mthca_raddr_seg); + size += sizeof (struct mthca_raddr_seg) / 16; + break; + + default: + /* No extra segments required for sends */ + break; + } + + break; + + case UC: + switch (opcode) { + case MTHCA_OPCODE_RDMA_WRITE: + case MTHCA_OPCODE_RDMA_WRITE_IMM: + ((struct mthca_raddr_seg *) wqe)->raddr = + cl_hton64(wr->remote_ops.vaddr); + ((struct mthca_raddr_seg *) wqe)->rkey = + wr->remote_ops.rkey; + ((struct mthca_raddr_seg *) wqe)->reserved = 0; + wqe += sizeof (struct mthca_raddr_seg); + size += sizeof (struct mthca_raddr_seg) / 16; + break; + + default: + /* No extra segments required for sends */ + break; + } + + break; + + case UD: + memcpy(((struct mthca_arbel_ud_seg *) wqe)->av, + to_mah((struct ib_ah *)wr->dgrm.ud.h_av)->av, MTHCA_AV_SIZE); + ((struct mthca_arbel_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp; + ((struct mthca_arbel_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey; + + wqe += sizeof (struct mthca_arbel_ud_seg); + size += sizeof (struct mthca_arbel_ud_seg) / 16; + break; + + case MLX: + err = build_mlx_header(dev, to_msqp(qp), ind, wr, + (void*)(wqe - sizeof (struct mthca_next_seg)), + (void*)wqe); + if (err) { + if (bad_wr) + *bad_wr = wr; + goto out; + } + wqe += sizeof (struct mthca_data_seg); + size += sizeof (struct mthca_data_seg) / 16; + break; + } + + if ((int)wr->num_ds > qp->sq.max_gs) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("SQ %06x full too many gathers\n",qp->qpn)); + err = -EINVAL; + if (bad_wr) + *bad_wr = wr; + goto out; + } + if (wr->send_opt & IB_SEND_OPT_INLINE) { + if (wr->num_ds) { + struct mthca_inline_seg *seg = (struct mthca_inline_seg *)wqe; + uint32_t s = 0; + + wqe += sizeof *seg; + for (i = 0; i < (int)wr->num_ds; ++i) { + struct _ib_local_ds *sge = &wr->ds_array[i]; + + s += sge->length; + + if (s > (uint32_t)qp->max_inline_data) { + err = -EINVAL; + if (bad_wr) + *bad_wr = wr; + goto out; + } + + memcpy(wqe, (void *) (uintptr_t) sge->vaddr, + sge->length); + wqe += sge->length; + } + + seg->byte_count = cl_hton32(MTHCA_INLINE_SEG | s); + size += align(s + sizeof *seg, 16) / 16; + } + } else { + for (i = 0; i < (int)wr->num_ds; ++i) { + ((struct mthca_data_seg *) wqe)->byte_count = + cl_hton32(wr->ds_array[i].length); + ((struct mthca_data_seg *) wqe)->lkey = + cl_hton32(wr->ds_array[i].lkey); + ((struct mthca_data_seg *) wqe)->addr = + cl_hton64(wr->ds_array[i].vaddr); + wqe += sizeof (struct mthca_data_seg); + size += sizeof (struct mthca_data_seg) / 16; + } + } + + /* Add one more inline data segment for ICRC */ + if (qp->transport == MLX) { + ((struct mthca_data_seg *) wqe)->byte_count = + cl_hton32((unsigned long)((1 << 31) | 4)); + ((u32 *) wqe)[1] = 0; + wqe += sizeof (struct mthca_data_seg); + size += sizeof (struct mthca_data_seg) / 16; + } + + qp->wrid[ind + qp->rq.max] = wr->wr_id; + + if (opcode == MTHCA_OPCODE_INVALID) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("SQ %06x opcode invalid\n",qp->qpn)); + err = -EINVAL; + if (bad_wr) + *bad_wr = wr; + goto out; + } + + ((struct mthca_next_seg *) prev_wqe)->nda_op = + cl_hton32(((ind << qp->sq.wqe_shift) + + qp->send_wqe_offset) |opcode); + wmb(); + ((struct mthca_next_seg *) prev_wqe)->ee_nds = + cl_hton32(MTHCA_NEXT_DBD | size | + ((wr->send_opt & IB_SEND_OPT_FENCE) ? + MTHCA_NEXT_FENCE : 0)); + + if (!size0) { + size0 = size; + op0 = opcode; + } + + ++ind; + if (unlikely(ind >= qp->sq.max)) + ind -= qp->sq.max; + } + +out: + if (likely(nreq)) { + doorbell[0] = cl_hton32((nreq << 24) | + ((qp->sq.head & 0xffff) << 8) |f0 | op0); + doorbell[1] = cl_hton32((qp->qpn << 8) | size0); + qp->sq.head += nreq; + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + *qp->sq.db = cl_hton32(qp->sq.head & 0xffff); + + /* + * Make sure doorbell record is written before we + * write MMIO send doorbell. + */ + wmb(); + mthca_write64(doorbell, + dev->kar + MTHCA_SEND_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + } + + spin_unlock_irqrestore(&lh); + return err; +} + +int mthca_arbel_post_recv(struct ib_qp *ibqp, struct _ib_recv_wr *wr, + struct _ib_recv_wr **bad_wr) +{ + struct mthca_qp *qp = to_mqp(ibqp); + int err = 0; + int nreq; + int ind; + int i; + u8 *wqe; + SPIN_LOCK_PREP(lh); + + spin_lock_irqsave(&qp->rq.lock, &lh); + + /* XXX check that state is OK to post receive */ + + ind = qp->rq.head & (qp->rq.max - 1); + + for (nreq = 0; wr; ++nreq, wr = wr->p_next) { + if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { + HCA_PRINT(TRACE_LEVEL_ERROR,HCA_DBG_QP,("RQ %06x full (%u head, %u tail," + " %d max, %d nreq)\n", qp->qpn, + qp->rq.head, qp->rq.tail, + qp->rq.max, nreq)); + err = -ENOMEM; + if (bad_wr) + *bad_wr = wr; + goto out; + } + + wqe = get_recv_wqe(qp, ind); + + ((struct mthca_next_seg *) wqe)->flags = 0; + + wqe += sizeof (struct mthca_next_seg); + + if (unlikely((int)wr->num_ds > qp->rq.max_gs)) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP ,("RQ %06x full too many scatter\n",qp->qpn)); + err = -EINVAL; + if (bad_wr) + *bad_wr = wr; + goto out; + } + + for (i = 0; i < (int)wr->num_ds; ++i) { + ((struct mthca_data_seg *) wqe)->byte_count = + cl_hton32(wr->ds_array[i].length); + ((struct mthca_data_seg *) wqe)->lkey = + cl_hton32(wr->ds_array[i].lkey); + ((struct mthca_data_seg *) wqe)->addr = + cl_hton64(wr->ds_array[i].vaddr); + wqe += sizeof (struct mthca_data_seg); + } + + if (i < qp->rq.max_gs) { + ((struct mthca_data_seg *) wqe)->byte_count = 0; + ((struct mthca_data_seg *) wqe)->lkey = cl_hton32(MTHCA_INVAL_LKEY); + ((struct mthca_data_seg *) wqe)->addr = 0; + } + + qp->wrid[ind] = wr->wr_id; + + ++ind; + if (unlikely(ind >= qp->rq.max)) + ind -= qp->rq.max; + } +out: + if (likely(nreq)) { + qp->rq.head += nreq; + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + *qp->rq.db = cl_hton32(qp->rq.head & 0xffff); + } + + spin_unlock_irqrestore(&lh); + return err; +} + +void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, + int index, int *dbd, __be32 *new_wqe) +{ + struct mthca_next_seg *next; + + UNREFERENCED_PARAMETER(dev); + + /* + * For SRQs, all WQEs generate a CQE, so we're always at the + * end of the doorbell chain. + */ + if (qp->ibqp.srq) { + *new_wqe = 0; + return; + } + + if (is_send) + next = get_send_wqe(qp, index); + else + next = get_recv_wqe(qp, index); + + *dbd = !!(next->ee_nds & cl_hton32(MTHCA_NEXT_DBD)); + if (next->ee_nds & cl_hton32(0x3f)) + *new_wqe = (next->nda_op & cl_hton32((unsigned long)~0x3f)) | + (next->ee_nds & cl_hton32(0x3f)); + else + *new_wqe = 0; +} + +int mthca_init_qp_table(struct mthca_dev *dev) +{ + int err; + u8 status; + int i; + + spin_lock_init(&dev->qp_table.lock); + fill_state_table(); + + /* + * We reserve 2 extra QPs per port for the special QPs. The + * special QP for port 1 has to be even, so round up. + */ + dev->qp_table.sqp_start = (dev->limits.reserved_qps + 1) & ~1UL; + err = mthca_alloc_init(&dev->qp_table.alloc, + dev->limits.num_qps, + (1 << 24) - 1, + dev->qp_table.sqp_start + + MTHCA_MAX_PORTS * 2); + if (err) + return err; + + err = mthca_array_init(&dev->qp_table.qp, + dev->limits.num_qps); + if (err) { + mthca_alloc_cleanup(&dev->qp_table.alloc); + return err; + } + + for (i = 0; i < 2; ++i) { + err = mthca_CONF_SPECIAL_QP(dev, i ? IB_QPT_QP1 : IB_QPT_QP0, + dev->qp_table.sqp_start + i * 2, + &status); + if (err) + goto err_out; + if (status) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_QP,("CONF_SPECIAL_QP returned " + "status %02x, aborting.\n", + status)); + err = -EINVAL; + goto err_out; + } + } + return 0; + + err_out: + mthca_CONF_SPECIAL_QP(dev, IB_QPT_QP1, 0, &status); + mthca_CONF_SPECIAL_QP(dev, IB_QPT_QP0, 0, &status); + + mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps); + mthca_alloc_cleanup(&dev->qp_table.alloc); + + return err; +} + +void mthca_cleanup_qp_table(struct mthca_dev *dev) +{ + u8 status; + + mthca_CONF_SPECIAL_QP(dev, IB_QPT_QP1, 0, &status); + mthca_CONF_SPECIAL_QP(dev, IB_QPT_QP0, 0, &status); + + mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps); + mthca_alloc_cleanup(&dev->qp_table.alloc); +} + + + diff --git a/branches/IBFD/hw/mthca/kernel/mthca_srq.c b/branches/IBFD/hw/mthca/kernel/mthca_srq.c new file mode 100644 index 00000000..784d5f49 --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca_srq.c @@ -0,0 +1,751 @@ +/* + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include "mt_l2w.h" +#include "mthca_dev.h" +#if defined(EVENT_TRACING) +#ifdef offsetof +#undef offsetof +#endif +#include "mthca_srq.tmh" +#endif +#include "mthca_cmd.h" +#include "mthca_memfree.h" +#include "mthca_wqe.h" + + +#ifdef ALLOC_PRAGMA +#pragma alloc_text (PAGE, mthca_init_srq_table) +#pragma alloc_text (PAGE, mthca_cleanup_srq_table) +#endif + + +enum { + MTHCA_MAX_DIRECT_SRQ_SIZE = 4 * PAGE_SIZE +}; + +struct mthca_tavor_srq_context { + __be64 wqe_base_ds; /* low 6 bits is descriptor size */ + __be32 state_pd; + __be32 lkey; + __be32 uar; + __be16 limit_watermark; + __be16 wqe_cnt; + u32 reserved[2]; +}; + +struct mthca_arbel_srq_context { + __be32 state_logsize_srqn; + __be32 lkey; + __be32 db_index; + __be32 logstride_usrpage; + __be64 wqe_base; + __be32 eq_pd; + __be16 limit_watermark; + __be16 wqe_cnt; + u16 reserved1; + __be16 wqe_counter; + u32 reserved2[3]; +}; + +static void *get_wqe(struct mthca_srq *srq, int n) +{ + if (srq->is_direct) + return (u8*)srq->queue.direct.page + (n << srq->wqe_shift); + else + return (u8*)srq->queue.page_list[(n << srq->wqe_shift) >> PAGE_SHIFT].page + + ((n << srq->wqe_shift) & (PAGE_SIZE - 1)); +} + +/* + * Return a pointer to the location within a WQE that we're using as a + * link when the WQE is in the free list. We use the imm field + * because in the Tavor case, posting a WQE may overwrite the next + * segment of the previous WQE, but a receive WQE will never touch the + * imm field. This avoids corrupting our free list if the previous + * WQE has already completed and been put on the free list when we + * post the next WQE. + */ +static inline int *wqe_to_link(void *wqe) +{ + return (int *) ((u8*)wqe + offsetof(struct mthca_next_seg, imm)); +} + +static void mthca_tavor_init_srq_context(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_srq *srq, + struct mthca_tavor_srq_context *context) +{ + CPU_2_BE64_PREP; + + RtlZeroMemory(context, sizeof *context); + + context->wqe_base_ds = CPU_2_BE64(1Ui64 << (srq->wqe_shift - 4)); + context->state_pd = cl_hton32(pd->pd_num); + context->lkey = cl_hton32(srq->mr.ibmr.lkey); + + if (pd->ibpd.ucontext) + context->uar = + cl_hton32(to_mucontext(pd->ibpd.ucontext)->uar.index); + else + context->uar = cl_hton32(dev->driver_uar.index); +} + +static void mthca_arbel_init_srq_context(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_srq *srq, + struct mthca_arbel_srq_context *context) +{ + int logsize; + + RtlZeroMemory(context, sizeof *context); + + logsize = long_log2(srq->max); + context->state_logsize_srqn = cl_hton32(logsize << 24 | srq->srqn); + context->lkey = cl_hton32(srq->mr.ibmr.lkey); + context->db_index = cl_hton32(srq->db_index); + context->logstride_usrpage = cl_hton32((srq->wqe_shift - 4) << 29); + if (pd->ibpd.ucontext) + context->logstride_usrpage |= + cl_hton32(to_mucontext(pd->ibpd.ucontext)->uar.index); + else + context->logstride_usrpage |= cl_hton32(dev->driver_uar.index); + context->eq_pd = cl_hton32(MTHCA_EQ_ASYNC << 24 | pd->pd_num); +} + +static void mthca_free_srq_buf(struct mthca_dev *dev, struct mthca_srq *srq) +{ + mthca_buf_free(dev, srq->max << srq->wqe_shift, &srq->queue, + srq->is_direct, &srq->mr); + kfree(srq->wrid); +} + +static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd, + struct mthca_srq *srq) +{ + struct mthca_data_seg *scatter; + u8 *wqe; + int err; + int i; + + if (pd->ibpd.ucontext) + return 0; + + srq->wrid = kmalloc(srq->max * sizeof (u64), GFP_KERNEL); + if (!srq->wrid) + return -ENOMEM; + + err = mthca_buf_alloc(dev, srq->max << srq->wqe_shift, + MTHCA_MAX_DIRECT_SRQ_SIZE, + &srq->queue, &srq->is_direct, pd, 1, &srq->mr); + if (err) { + kfree(srq->wrid); + return err; + } + + /* + * Now initialize the SRQ buffer so that all of the WQEs are + * linked into the list of free WQEs. In addition, set the + * scatter list L_Keys to the sentry value of 0x100. + */ + for (i = 0; i < srq->max; ++i) { + wqe = get_wqe(srq, i); + + *wqe_to_link(wqe) = i < srq->max - 1 ? i + 1 : -1; + + for (scatter = (struct mthca_data_seg *)(wqe + sizeof (struct mthca_next_seg)); + (void *) scatter < (void*)(wqe + (1 << srq->wqe_shift)); + ++scatter) + scatter->lkey = cl_hton32(MTHCA_INVAL_LKEY); + } + + srq->last = get_wqe(srq, srq->max - 1); + + return 0; +} + +int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, + ib_srq_attr_t *attr, struct mthca_srq *srq) +{ + struct mthca_mailbox *mailbox; + u8 status; + int ds; + int err; + SPIN_LOCK_PREP(lh); + + /* Sanity check SRQ size before proceeding */ + if ((int)attr->max_wr > dev->limits.max_srq_wqes || + (int)attr->max_sge > dev->limits.max_srq_sge) + return -EINVAL; + + srq->max = attr->max_wr; + srq->max_gs = attr->max_sge; + srq->counter = 0; + + if (mthca_is_memfree(dev)) + srq->max = roundup_pow_of_two(srq->max + 1); + + ds = max(64UL, + roundup_pow_of_two(sizeof (struct mthca_next_seg) + + srq->max_gs * sizeof (struct mthca_data_seg))); + + if (!mthca_is_memfree(dev) && (ds > dev->limits.max_desc_sz)) + return -EINVAL; + + srq->wqe_shift = long_log2(ds); + + srq->srqn = mthca_alloc(&dev->srq_table.alloc); + if (srq->srqn == -1) + return -ENOMEM; + + if (mthca_is_memfree(dev)) { + err = mthca_table_get(dev, dev->srq_table.table, srq->srqn); + if (err) + goto err_out; + + if (!pd->ibpd.ucontext) { + srq->db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SRQ, + srq->srqn, &srq->db); + if (srq->db_index < 0) { + err = -ENOMEM; + goto err_out_icm; + } + } + } + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); + goto err_out_db; + } + + err = mthca_alloc_srq_buf(dev, pd, srq); + if (err) + goto err_out_mailbox; + + spin_lock_init(&srq->lock); + atomic_set(&srq->refcount, 1); + init_waitqueue_head(&srq->wait); + KeInitializeMutex(&srq->mutex, 0); + + if (mthca_is_memfree(dev)) + mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf); + else + mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf); + + err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn, &status); + + if (err) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_SRQ ,( "SW2HW_SRQ failed (%d)\n", err)); + goto err_out_free_buf; + } + if (status) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_SRQ ,( "SW2HW_SRQ returned status 0x%02x\n", + status)); + err = -EINVAL; + goto err_out_free_buf; + } + + spin_lock_irq(&dev->srq_table.lock, &lh); + if (mthca_array_set(&dev->srq_table.srq, + srq->srqn & (dev->limits.num_srqs - 1), + srq)) { + spin_unlock_irq(&lh); + goto err_out_free_srq; + } + spin_unlock_irq(&lh); + + mthca_free_mailbox(dev, mailbox); + + srq->first_free = 0; + srq->last_free = srq->max - 1; + + attr->max_wr = (mthca_is_memfree(dev)) ? srq->max - 1 : srq->max; + attr->max_sge = srq->max_gs; + + return 0; + +err_out_free_srq: + err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn, &status); + if (err) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_SRQ ,( "HW2SW_SRQ failed (%d)\n", err)); + } else if (status) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_SRQ ,( "HW2SW_SRQ returned status 0x%02x\n", status)); + } + +err_out_free_buf: + if (!pd->ibpd.ucontext) + mthca_free_srq_buf(dev, srq); + +err_out_mailbox: + mthca_free_mailbox(dev, mailbox); + +err_out_db: + if (!pd->ibpd.ucontext && mthca_is_memfree(dev)) + mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index); + +err_out_icm: + mthca_table_put(dev, dev->srq_table.table, srq->srqn); + +err_out: + mthca_free(&dev->srq_table.alloc, srq->srqn); + + return err; +} + +void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq) +{ + struct mthca_mailbox *mailbox; + int err; + u8 status; + SPIN_LOCK_PREP(lh); + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_SRQ ,( "No memory for mailbox to free SRQ.\n")); + return; + } + + err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn, &status); + if (err) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_SRQ ,( "HW2SW_SRQ failed (%d)\n", err)); + } else if (status) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_SRQ ,( "HW2SW_SRQ returned status 0x%02x\n", status)); + } + + spin_lock_irq(&dev->srq_table.lock, &lh); + mthca_array_clear(&dev->srq_table.srq, + srq->srqn & (dev->limits.num_srqs - 1)); + atomic_dec(&srq->refcount); + spin_unlock_irq(&lh); + + wait_event(&srq->wait, !atomic_read(&srq->refcount)); + + if (!srq->ibsrq.ucontext) { + mthca_free_srq_buf(dev, srq); + if (mthca_is_memfree(dev)) + mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index); + } + + mthca_table_put(dev, dev->srq_table.table, srq->srqn); + mthca_free(&dev->srq_table.alloc, srq->srqn); + mthca_free_mailbox(dev, mailbox); +} + +int mthca_modify_srq(struct ib_srq *ibsrq, ib_srq_attr_t *attr, + ib_srq_attr_mask_t attr_mask) +{ + struct mthca_dev *dev = to_mdev(ibsrq->device); + struct mthca_srq *srq = to_msrq(ibsrq); + int ret; + u8 status; + + /* We don't support resizing SRQs (yet?) */ + if (attr_mask & IB_SRQ_MAX_WR) + return -ENOSYS; + + if (attr_mask & IB_SRQ_LIMIT) { + u32 max_wr = mthca_is_memfree(dev) ? srq->max - 1 : srq->max; + if (attr->srq_limit > max_wr) + return -ERANGE; + + down(&srq->mutex); + ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit, &status); + up(&srq->mutex); + + if (ret) + return ret; + if (status) + return -EINVAL; + } + + return 0; +} + +int mthca_query_srq(struct ib_srq *ibsrq, ib_srq_attr_t *srq_attr) +{ + struct mthca_dev *dev = to_mdev(ibsrq->device); + struct mthca_srq *srq = to_msrq(ibsrq); + struct mthca_mailbox *mailbox; + struct mthca_arbel_srq_context *arbel_ctx; + struct mthca_tavor_srq_context *tavor_ctx; + u8 status; + int err; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + err = mthca_QUERY_SRQ(dev, srq->srqn, mailbox, &status); + if (err) + goto out; + + if (mthca_is_memfree(dev)) { + arbel_ctx = mailbox->buf; + srq_attr->srq_limit = cl_ntoh16(arbel_ctx->limit_watermark); + } else { + tavor_ctx = mailbox->buf; + srq_attr->srq_limit = cl_ntoh16(tavor_ctx->limit_watermark); + } + + srq_attr->max_wr = (mthca_is_memfree(dev)) ? srq->max - 1 : srq->max; + srq_attr->max_sge = srq->max_gs; + +out: + mthca_free_mailbox(dev, mailbox); + + return err; +} + +void mthca_srq_event(struct mthca_dev *dev, u32 srqn, + enum ib_event_type event_type, u8 vendor_code) +{ + struct mthca_srq *srq; + struct ib_event event; + SPIN_LOCK_PREP(lh); + + spin_lock(&dev->srq_table.lock, &lh); + srq = mthca_array_get(&dev->srq_table.srq, srqn & (dev->limits.num_srqs - 1)); + if (srq) + atomic_inc(&srq->refcount); + spin_unlock(&lh); + + if (!srq) { + HCA_PRINT(TRACE_LEVEL_WARNING ,HCA_DBG_SRQ ,( "Async event for bogus SRQ %08x\n", srqn)); + return; + } + + if (!srq->ibsrq.event_handler) + goto out; + + event.device = &dev->ib_dev; + event.event = event_type; + event.element.srq = &srq->ibsrq; + event.vendor_specific = vendor_code; + HCA_PRINT(TRACE_LEVEL_WARNING,HCA_DBG_SRQ, + ("SRQ %06x Async event event_type 0x%x vendor_code 0x%x\n", + srqn,event_type,vendor_code)); + if (srq->ibsrq.event_handler) + srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context); + +out: + if (atomic_dec_and_test(&srq->refcount)) + wake_up(&srq->wait); +} + +/* + * This function must be called with IRQs disabled. + */ +void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr) +{ + int ind; + SPIN_LOCK_PREP(lh); + + ind = wqe_addr >> srq->wqe_shift; + + spin_lock(&srq->lock, &lh); + + if (likely(srq->first_free >= 0)) + *wqe_to_link(get_wqe(srq, srq->last_free)) = ind; + else + srq->first_free = ind; + + *wqe_to_link(get_wqe(srq, ind)) = -1; + srq->last_free = ind; + + spin_unlock(&lh); +} + +int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct _ib_recv_wr *wr, + struct _ib_recv_wr **bad_wr) +{ + struct mthca_dev *dev = to_mdev(ibsrq->device); + struct mthca_srq *srq = to_msrq(ibsrq); + __be32 doorbell[2]; + int err = 0; + int first_ind; + int ind; + int next_ind; + int nreq; + int i; + u8 *wqe; + u8 *prev_wqe; + CPU_2_BE64_PREP; + SPIN_LOCK_PREP(lh); + + spin_lock_irqsave(&srq->lock, &lh); + + first_ind = srq->first_free; + + for (nreq = 0; wr; wr = wr->p_next) { + ind = srq->first_free; + + if (ind < 0) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SRQ ,( "SRQ %06x full\n", srq->srqn)); + err = -ENOMEM; + *bad_wr = wr; + break; + } + + wqe = get_wqe(srq, ind); + next_ind = *wqe_to_link(wqe); + + if (next_ind < 0) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SRQ ,( "SRQ %06x full\n", srq->srqn)); + err = -ENOMEM; + *bad_wr = wr; + break; + } + + prev_wqe = srq->last; + srq->last = wqe; + + ((struct mthca_next_seg *) wqe)->nda_op = 0; + ((struct mthca_next_seg *) wqe)->ee_nds = 0; + /* flags field will always remain 0 */ + + wqe += sizeof (struct mthca_next_seg); + + if (unlikely((int)wr->num_ds > srq->max_gs)) { + err = -EINVAL; + *bad_wr = wr; + srq->last = prev_wqe; + break; + } + + for (i = 0; i < (int)wr->num_ds; ++i) { + ((struct mthca_data_seg *) wqe)->byte_count = + cl_hton32(wr->ds_array[i].length); + ((struct mthca_data_seg *) wqe)->lkey = + cl_hton32(wr->ds_array[i].lkey); + ((struct mthca_data_seg *) wqe)->addr = + CPU_2_BE64(wr->ds_array[i].vaddr); + wqe += sizeof (struct mthca_data_seg); + } + + if (i < srq->max_gs) { + ((struct mthca_data_seg *) wqe)->byte_count = 0; + ((struct mthca_data_seg *) wqe)->lkey = cl_hton32(MTHCA_INVAL_LKEY); + ((struct mthca_data_seg *) wqe)->addr = 0; + } + + ((struct mthca_next_seg *) prev_wqe)->nda_op = + cl_hton32((ind << srq->wqe_shift) | 1); + wmb(); + ((struct mthca_next_seg *) prev_wqe)->ee_nds = + cl_hton32(MTHCA_NEXT_DBD); + + srq->wrid[ind] = wr->wr_id; + srq->first_free = next_ind; + + ++nreq; + if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) { + nreq = 0; + + doorbell[0] = cl_hton32(first_ind << srq->wqe_shift); + doorbell[1] = cl_hton32(srq->srqn << 8); + + /* + * Make sure that descriptors are written + * before doorbell is rung. + */ + wmb(); + + mthca_write64(doorbell, + dev->kar + MTHCA_RECV_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + + first_ind = srq->first_free; + } + } + + if (likely(nreq)) { + doorbell[0] = cl_hton32(first_ind << srq->wqe_shift); + doorbell[1] = cl_hton32((srq->srqn << 8) | nreq); + + /* + * Make sure that descriptors are written before + * doorbell is rung. + */ + wmb(); + + mthca_write64(doorbell, + dev->kar + MTHCA_RECV_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + } + + spin_unlock_irqrestore(&lh); + return err; +} + +int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct _ib_recv_wr *wr, + struct _ib_recv_wr **bad_wr) +{ + struct mthca_srq *srq = to_msrq(ibsrq); + int err = 0; + int ind; + int next_ind; + int nreq; + int i; + u8 *wqe; + CPU_2_BE64_PREP; + SPIN_LOCK_PREP(lh); + + spin_lock_irqsave(&srq->lock, &lh); + + for (nreq = 0; wr; ++nreq, wr = wr->p_next) { + ind = srq->first_free; + + if (ind < 0) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SRQ ,( "SRQ %06x full\n", srq->srqn)); + err = -ENOMEM; + *bad_wr = wr; + break; + } + + wqe = get_wqe(srq, ind); + next_ind = *wqe_to_link(wqe); + + if (next_ind < 0) { + HCA_PRINT(TRACE_LEVEL_ERROR ,HCA_DBG_SRQ ,( "SRQ %06x full\n", srq->srqn)); + err = -ENOMEM; + *bad_wr = wr; + break; + } + + ((struct mthca_next_seg *) wqe)->nda_op = + cl_hton32((next_ind << srq->wqe_shift) | 1); + ((struct mthca_next_seg *) wqe)->ee_nds = 0; + /* flags field will always remain 0 */ + + wqe += sizeof (struct mthca_next_seg); + + if (unlikely((int)wr->num_ds > srq->max_gs)) { + err = -EINVAL; + *bad_wr = wr; + break; + } + + for (i = 0; i < (int)wr->num_ds; ++i) { + ((struct mthca_data_seg *) wqe)->byte_count = + cl_hton32(wr->ds_array[i].length); + ((struct mthca_data_seg *) wqe)->lkey = + cl_hton32(wr->ds_array[i].lkey); + ((struct mthca_data_seg *) wqe)->addr = + CPU_2_BE64(wr->ds_array[i].vaddr); + wqe += sizeof (struct mthca_data_seg); + } + + if (i < srq->max_gs) { + ((struct mthca_data_seg *) wqe)->byte_count = 0; + ((struct mthca_data_seg *) wqe)->lkey = cl_hton32(MTHCA_INVAL_LKEY); + ((struct mthca_data_seg *) wqe)->addr = 0; + } + + srq->wrid[ind] = wr->wr_id; + srq->first_free = next_ind; + } + + if (likely(nreq)) { + srq->counter = (u16)(srq->counter + nreq); + + /* + * Make sure that descriptors are written before + * we write doorbell record. + */ + wmb(); + *srq->db = cl_hton32(srq->counter); + } + + spin_unlock_irqrestore(&lh); + return err; +} + +int mthca_max_srq_sge(struct mthca_dev *dev) +{ + if (mthca_is_memfree(dev)) + return dev->limits.max_sg; + + /* + * SRQ allocations are based on powers of 2 for Tavor, + * (although they only need to be multiples of 16 bytes). + * + * Therefore, we need to base the max number of sg entries on + * the largest power of 2 descriptor size that is <= to the + * actual max WQE descriptor size, rather than return the + * max_sg value given by the firmware (which is based on WQE + * sizes as multiples of 16, not powers of 2). + * + * If SRQ implementation is changed for Tavor to be based on + * multiples of 16, the calculation below can be deleted and + * the FW max_sg value returned. + */ + return min( (uint32_t)dev->limits.max_sg, + ((1 << (fls(dev->limits.max_desc_sz) - 1)) - + sizeof (struct mthca_next_seg)) / + sizeof (struct mthca_data_seg)); +} + +int mthca_init_srq_table(struct mthca_dev *dev) +{ + int err; + + if (!(dev->mthca_flags & MTHCA_FLAG_SRQ)) + return 0; + + spin_lock_init(&dev->srq_table.lock); + + err = mthca_alloc_init(&dev->srq_table.alloc, + dev->limits.num_srqs, + dev->limits.num_srqs - 1, + dev->limits.reserved_srqs); + if (err) + return err; + + err = mthca_array_init(&dev->srq_table.srq, + dev->limits.num_srqs); + if (err) + mthca_alloc_cleanup(&dev->srq_table.alloc); + + return err; +} + +void mthca_cleanup_srq_table(struct mthca_dev *dev) +{ + if (!(dev->mthca_flags & MTHCA_FLAG_SRQ)) + return; + + mthca_array_cleanup(&dev->srq_table.srq, dev->limits.num_srqs); + mthca_alloc_cleanup(&dev->srq_table.alloc); +} diff --git a/branches/IBFD/hw/mthca/kernel/mthca_uar.c b/branches/IBFD/hw/mthca/kernel/mthca_uar.c new file mode 100644 index 00000000..b5bb7b3b --- /dev/null +++ b/branches/IBFD/hw/mthca/kernel/mthca_uar.c @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include "mthca_dev.h" +#include "mthca_memfree.h" + +int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar) +{ + uar->index = mthca_alloc(&dev->uar_table.alloc); + if (uar->index == -1) + return -ENOMEM; + + uar->pfn = (unsigned long)(pci_resource_start(dev, HCA_BAR_TYPE_UAR) >> PAGE_SHIFT) + uar->index; + + return 0; +} + +void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar) +{ + mthca_free(&dev->uar_table.alloc, uar->index); +} + +int mthca_init_uar_table(struct mthca_dev *dev) +{ + int ret; + + ret = mthca_alloc_init(&dev->uar_table.alloc, + dev->limits.num_uars, + dev->limits.num_uars - 1, + dev->limits.reserved_uars); + if (ret) + return ret; + + ret = mthca_init_db_tab(dev); + if (ret) + mthca_alloc_cleanup(&dev->uar_table.alloc); + + return ret; +} + +void mthca_cleanup_uar_table(struct mthca_dev *dev) +{ + mthca_cleanup_db_tab(dev); + + /* XXX check if any UARs are still allocated? */ + mthca_alloc_cleanup(&dev->uar_table.alloc); +} diff --git a/branches/IBFD/hw/mthca/mt_utils.c b/branches/IBFD/hw/mthca/mt_utils.c new file mode 100644 index 00000000..3d2124a8 --- /dev/null +++ b/branches/IBFD/hw/mthca/mt_utils.c @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include + +/* Nth element of the table contains the index of the first set bit of N; 8 - for N=0 */ +char g_set_bit_tbl[256]; + +/* Nth element of the table contains the index of the first 0 bit of N; 8 - for N=255 */ +char g_clr_bit_tbl[256]; + +void fill_bit_tbls() +{ + unsigned long i; + for (i=0; i<256; ++i) { + g_set_bit_tbl[i] = (char)(_ffs_raw(&i,0) - 1); + g_clr_bit_tbl[i] = (char)(_ffz_raw(&i,0) - 1); + } + g_set_bit_tbl[0] = g_clr_bit_tbl[255] = 8; +} + + diff --git a/branches/IBFD/hw/mthca/mt_utils.h b/branches/IBFD/hw/mthca/mt_utils.h new file mode 100644 index 00000000..ddbcf389 --- /dev/null +++ b/branches/IBFD/hw/mthca/mt_utils.h @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + + +#ifndef MT_UTILS_H +#define MT_UTILS_H + +// Nth element of the table contains the index of the first set bit of N; 8 - for N=0 +extern char g_set_bit_tbl[256]; +// Nth element of the table contains the index of the first cleared bit of N; 8 - for N=0 +extern char g_clr_bit_tbl[256]; + +// DECLARE_BITMAP +#define BITS_PER_LONG 32 +#define BITS_TO_LONGS(bits) \ + (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) + +/* +* fls: find last bit set. +* returns: 0 - if not found or N+1, if found Nth bit +*/ + +static __inline int fls(int x) +{ + int r = 32; + + if (!x) + return 0; + if (!(x & 0xffff0000u)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xff000000u)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xf0000000u)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xc0000000u)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x80000000u)) { + x <<= 1; + r -= 1; + } + return r; +} + +/** +* _ffs_raw - find the first one bit in a word +* @addr: The address to start the search at +* @offset: The bitnumber to start searching at +* +* returns: 0 - if not found or N+1, if found Nth bit +*/ +static __inline int _ffs_raw(const unsigned long *addr, int offset) +{ + //TODO: not an effective code - is better in Assembler + int mask; + int rbc; + int ix; + if (!*addr) return 0; + mask = 1 << offset; + rbc = BITS_PER_LONG - offset; + for (ix=0; ix + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * Specifically: + * - Do not use pointer types -- pass pointers in uint64_t instead. + * - Make sure that any structure larger than 4 bytes is padded to a + * multiple of 8 bytes. Otherwise the structure size will be + * different between 32-bit and 64-bit architectures. + */ + +struct ibv_get_context_resp { + uint64_t uar_addr; + uint64_t pd_handle; + uint32_t pdn; + uint32_t qp_tab_size; + uint32_t uarc_size; + uint32_t vend_id; + uint16_t dev_id; + uint16_t reserved[3]; +}; + +struct ibv_alloc_pd_resp { + uint64_t pd_handle; + uint32_t pdn; + uint32_t reserved; +}; + +struct ibv_reg_mr { + uint64_t start; + uint64_t length; + uint64_t hca_va; + uint32_t access_flags; + uint32_t pdn; + uint64_t pd_handle; +}; + +struct ibv_reg_mr_resp { + uint64_t mr_handle; + uint32_t lkey; + uint32_t rkey; +}; + +struct ibv_create_cq { + struct ibv_reg_mr mr; + uint64_t arm_db_page; + uint64_t set_db_page; + uint64_t u_arm_db_page; + uint64_t user_handle; + uint32_t arm_db_index; + uint32_t set_db_index; + uint32_t u_arm_db_index; + uint32_t cqe; + uint32_t lkey; /* used only by kernel */ + uint32_t reserved; +}; + +struct ibv_create_cq_resp { + uint64_t user_handle; + uint64_t cq_handle; + struct ibv_reg_mr_resp mr; + uint32_t cqe; + uint32_t cqn; +}; + +struct ibv_create_srq { + uint64_t user_handle; + struct ibv_reg_mr mr; + uint32_t lkey; /* used only in kernel */ + uint32_t db_index; + uint64_t db_page; +}; + +struct ibv_create_srq_resp { + struct ibv_reg_mr_resp mr; + uint64_t srq_handle; + uint64_t user_handle; + uint32_t max_wr; + uint32_t max_sge; + uint32_t srqn; + uint32_t reserved; +}; + +struct ibv_create_qp { + uint64_t sq_db_page; + uint64_t rq_db_page; + uint32_t sq_db_index; + uint32_t rq_db_index; + struct ibv_reg_mr mr; + uint64_t user_handle; + uint64_t send_cq_handle; + uint64_t recv_cq_handle; + uint64_t srq_handle; + uint32_t max_send_wr; + uint32_t max_recv_wr; + uint32_t max_send_sge; + uint32_t max_recv_sge; + uint32_t max_inline_data; + uint32_t lkey; /* used only in kernel */ + uint8_t sq_sig_all; + uint8_t qp_type; + uint8_t is_srq; + uint8_t reserved[5]; +}; + +struct ibv_create_qp_resp { + struct ibv_reg_mr_resp mr; + uint64_t user_handle; + uint64_t qp_handle; + uint32_t qpn; + uint32_t max_send_wr; + uint32_t max_recv_wr; + uint32_t max_send_sge; + uint32_t max_recv_sge; + uint32_t max_inline_data; +}; + +struct ibv_modify_qp_resp { + enum ibv_qp_attr_mask attr_mask; + uint8_t qp_state; + uint8_t reserved[3]; +}; + +struct ibv_create_ah { + uint64_t user_handle; + struct ibv_reg_mr mr; +}; + +struct ibv_create_ah_resp { + uint64_t user_handle; + uint64_t start; + struct ibv_reg_mr_resp mr; +}; + + +#endif /* MX_ABI_H */ + diff --git a/branches/IBFD/hw/mthca/user/Makefile b/branches/IBFD/hw/mthca/user/Makefile new file mode 100644 index 00000000..bffacaa7 --- /dev/null +++ b/branches/IBFD/hw/mthca/user/Makefile @@ -0,0 +1,7 @@ +# +# DO NOT EDIT THIS FILE!!! Edit .\sources. if you want to add a new source +# file to this component. This file merely indirects to the real make file +# that is shared by all the driver components of the OpenIB Windows project. +# + +!INCLUDE ..\..\..\inc\openib.def diff --git a/branches/IBFD/hw/mthca/user/SOURCES b/branches/IBFD/hw/mthca/user/SOURCES new file mode 100644 index 00000000..92f6c653 --- /dev/null +++ b/branches/IBFD/hw/mthca/user/SOURCES @@ -0,0 +1,80 @@ +TRUNK=..\..\.. + +!if $(FREEBUILD) +TARGETNAME=mthcau +!else +TARGETNAME=mthcaud +!endif + +TARGETPATH=$(TRUNK)\bin\user\obj$(BUILD_ALT_DIR) +TARGETTYPE=DYNLINK +DLLDEF=$(O)\mlnx_uvp.def +USE_MSVCRT=1 +DLLENTRY=DllMain + +!if $(FREEBUILD) +ENABLE_EVENT_TRACING=1 +!else +#ENABLE_EVENT_TRACING=1 +!endif + +SOURCES= \ + \ + ..\mt_utils.c \ + \ + mlnx_uvp.rc \ + mlnx_ual_av.c \ + mlnx_ual_ca.c \ + mlnx_ual_cq.c \ + mlnx_ual_main.c \ + mlnx_ual_mcast.c \ + mlnx_ual_mrw.c \ + mlnx_ual_osbypass.c \ + mlnx_ual_pd.c \ + mlnx_ual_qp.c \ + mlnx_ual_srq.c \ + \ + mlnx_uvp_debug.c \ + mlnx_uvp.c \ + mlnx_uvp_ah.c \ + mlnx_uvp_cq.c \ + mlnx_uvp_memfree.c \ + mlnx_uvp_qp.c \ + mlnx_uvp_srq.c \ + mlnx_uvp_verbs.c + +INCLUDES= \ + ..; \ + $(TRUNK)\inc\user; \ + $(TRUNK)\inc\complib; \ + $(TRUNK)\inc\user\complib; \ + $(TRUNK)\inc; \ + +USER_C_FLAGS=$(USER_C_FLAGS) /DCL_NO_TRACK_MEM + +TARGETLIBS=\ + $(SDK_LIB_PATH)\user32.lib \ + $(SDK_LIB_PATH)\kernel32.lib \ + $(SDK_LIB_PATH)\Advapi32.lib \ +!if $(FREEBUILD) + $(TARGETPATH)\*\complib.lib \ + $(TARGETPATH)\*\ibal.lib +!else + $(TARGETPATH)\*\complibd.lib \ + $(TARGETPATH)\*\ibald.lib +!endif + +#LINKER_FLAGS=/MAP /MAPINFO:LINES + +!IFDEF ENABLE_EVENT_TRACING + +C_DEFINES = $(C_DEFINES) -DEVENT_TRACING -DWPP_OLDCC + + +RUN_WPP= $(SOURCES) -ext:.c.h -dll\ + -scan:mlnx_uvp_debug.h \ + -func:UVP_PRINT(LEVEL,FLAGS,(MSG,...)) \ + -func:UVP_PRINT_EXIT(LEVEL,FLAGS,(MSG,...)) +!ENDIF + +MSC_WARNING_LEVEL= /W3 diff --git a/branches/IBFD/hw/mthca/user/arch.h b/branches/IBFD/hw/mthca/user/arch.h new file mode 100644 index 00000000..9f23be4b --- /dev/null +++ b/branches/IBFD/hw/mthca/user/arch.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#ifndef INFINIBAND_ARCH_H +#define INFINIBAND_ARCH_H + +#define htonll cl_hton64 +#define ntohll cl_ntoh64 + +/* + * Architecture-specific defines. Currently, an architecture is + * required to implement the following operations: + * + * mb() - memory barrier. No loads or stores may be reordered across + * this macro by either the compiler or the CPU. + */ + +#define mb MemoryBarrier +#define wmb MemoryBarrier +#define rmb MemoryBarrier + +#endif /* INFINIBAND_ARCH_H */ diff --git a/branches/IBFD/hw/mthca/user/mlnx_ual_av.c b/branches/IBFD/hw/mthca/user/mlnx_ual_av.c new file mode 100644 index 00000000..abf25628 --- /dev/null +++ b/branches/IBFD/hw/mthca/user/mlnx_ual_av.c @@ -0,0 +1,400 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include "mt_l2w.h" +#include "mlnx_uvp.h" +#include "mx_abi.h" + +#include "mlnx_ual_main.h" +#if defined(EVENT_TRACING) +#include "mlnx_ual_av.tmh" +#endif + + +uint8_t +gid_to_index_lookup ( + IN ib_ca_attr_t *p_ca_attr, + IN uint8_t port_num, + IN uint8_t *raw_gid) +{ + ib_gid_t *p_gid_table = NULL; + uint8_t i, index = 0; + uint16_t num_gids; + + p_gid_table = p_ca_attr->p_port_attr[port_num-1].p_gid_table; + CL_ASSERT (p_gid_table); + + num_gids = p_ca_attr->p_port_attr[port_num-1].num_gids; + UVP_PRINT(TRACE_LEVEL_INFORMATION, UVP_DBG_AV, + ("Port %d has %d gids\n", port_num, num_gids)); + + for (i = 0; i < num_gids; i++) + { + if (cl_memcmp (raw_gid, p_gid_table[i].raw, sizeof (ib_gid_t))) + { + UVP_PRINT(TRACE_LEVEL_INFORMATION ,UVP_DBG_AV , + ("found GID at index %d\n", i)); + index = i; + break; + } + } + return index; +} + +ib_api_status_t +map_itom_av_attr ( + IN ib_ca_attr_t *p_ca_attr, + IN const ib_av_attr_t *p_av_attr, + OUT struct ibv_ah_attr *p_attr) +{ + + + ib_api_status_t status = IB_SUCCESS; + if (p_av_attr->port_num == 0 || + p_av_attr->port_num > p_ca_attr->num_ports) { + UVP_PRINT(TRACE_LEVEL_WARNING ,UVP_DBG_AV , + (" invalid port number specified (%d)\n",p_av_attr->port_num)); + return IB_INVALID_PORT; + } + + p_attr->sl = p_av_attr->sl; + p_attr->port_num = p_av_attr->port_num; + p_attr->dlid = CL_NTOH16 (p_av_attr->dlid); + p_attr->src_path_bits = p_av_attr->path_bits; // PATH: + + //TODO: how static_rate is coded ? + p_attr->static_rate = + (p_av_attr->static_rate == IB_PATH_RECORD_RATE_10_GBS ? 0 : 3); + + /* For global destination or Multicast address:*/ + if (p_av_attr->grh_valid) { + p_attr->is_global = TRUE; + p_attr->grh.hop_limit = p_av_attr->grh.hop_limit; + ib_grh_get_ver_class_flow( p_av_attr->grh.ver_class_flow, NULL, + &p_attr->grh.traffic_class, &p_attr->grh.flow_label ); + p_attr->grh.sgid_index = gid_to_index_lookup (p_ca_attr, + p_av_attr->port_num, (uint8_t *) p_av_attr->grh.src_gid.raw); + cl_memcpy (p_attr->grh.dgid.raw, p_av_attr->grh.dest_gid.raw, + sizeof (IB_gid_t)); + }else{ + p_attr->is_global = FALSE; + } + + return status; +} + +static ib_api_status_t +__pre_create_av ( + IN const ib_pd_handle_t h_uvp_pd, + IN const ib_av_attr_t *p_av_attr, + IN OUT ci_umv_buf_t *p_umv_buf, + OUT ib_av_handle_t *ph_uvp_av) +{ + int err; + struct mthca_ah *ah; + struct ibv_ah_attr attr; + struct ibv_create_ah *p_create_av; + ib_api_status_t status = IB_SUCCESS; + size_t size = max( sizeof(struct ibv_create_ah), sizeof(struct ibv_create_ah_resp) ); + mlnx_ual_pd_info_t *p_pd = (mlnx_ual_pd_info_t *)h_uvp_pd; + mlnx_ual_hobul_t *p_hobul = p_pd->p_hobul; + + UNREFERENCED_PARAMETER(ph_uvp_av); + + UVP_ENTER(UVP_DBG_AV); + + CL_ASSERT(p_umv_buf); + + // sanity check + if (p_av_attr->port_num == 0 || p_av_attr->port_num > p_hobul->p_hca_attr->num_ports) { + UVP_PRINT(TRACE_LEVEL_WARNING ,UVP_DBG_AV , + (" invalid port number specified (%d)\n",p_av_attr->port_num)); + status = IB_INVALID_PORT; + goto end; + } + + // convert parameters + cl_memset( &attr, 0, sizeof(attr)); + status = map_itom_av_attr (p_hobul->p_hca_attr, p_av_attr, &attr); + if(status != IB_SUCCESS ) + goto end; + + // allocate Ah object + ah = cl_zalloc( sizeof *ah ); + if( !ah ) { + status = IB_INSUFFICIENT_MEMORY; + goto end; + } + + // fill AH partly + ah->h_uvp_pd = h_uvp_pd; + cl_memcpy( &ah->av_attr, p_av_attr, sizeof(ah->av_attr) ); + + // try to create AV + err = mthca_alloc_av(to_mpd(p_pd->ibv_pd), &attr, ah, NULL); + if (err) { + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_AV , ("mthca_alloc_av failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_alloc_av; + } + + // allocate parameters + if( !p_umv_buf->p_inout_buf ) { + p_umv_buf->p_inout_buf = cl_zalloc( size ); + if( !p_umv_buf->p_inout_buf ) + { + status = IB_INSUFFICIENT_MEMORY; + goto err_mem; + } + } + + // fill the parameters + p_umv_buf->input_size = sizeof(struct ibv_create_ah); + p_umv_buf->output_size = sizeof(struct ibv_create_ah_resp); + p_umv_buf->command = TRUE; + p_create_av = (struct ibv_create_ah *)p_umv_buf->p_inout_buf; + p_create_av->user_handle = (uint64_t)(ULONG_PTR)ah; + if (ah->in_kernel) { + struct mthca_ah_page *page = ah->page; + p_create_av->mr.start = (uint64_t)(ULONG_PTR)page->buf; + p_create_av->mr.length = g_page_size; + p_create_av->mr.hca_va = (uint64_t)(ULONG_PTR)page->buf; + p_create_av->mr.pd_handle = p_pd->ibv_pd->handle; + p_create_av->mr.pdn = to_mpd(p_pd->ibv_pd)->pdn; + p_create_av->mr.access_flags = 0; //local read + status = IB_SUCCESS; + } + else + status = IB_VERBS_PROCESSING_DONE; + + goto end; + +err_mem: + mthca_free_av(ah); +err_alloc_av: + cl_free(ah); +end: + UVP_EXIT(UVP_DBG_AV); + return status; +} + + +static void +__post_create_av ( + IN const ib_pd_handle_t h_uvp_pd, + IN ib_api_status_t ioctl_status, + IN OUT ib_av_handle_t *ph_uvp_av, + IN OUT ci_umv_buf_t *p_umv_buf) +{ + int err; + struct mthca_ah *ah; + struct mthca_ah_page *page; + struct ibv_create_ah_resp *p_resp; + ib_api_status_t status = IB_SUCCESS; + mlnx_ual_pd_info_t *p_pd = (mlnx_ual_pd_info_t *)h_uvp_pd; + + UVP_ENTER(UVP_DBG_AV); + + CL_ASSERT(p_umv_buf); + + p_resp = (struct ibv_create_ah_resp *)p_umv_buf->p_inout_buf; + ah = (struct mthca_ah *)(ULONG_PTR)p_resp->user_handle; + + if (IB_SUCCESS == ioctl_status) { + + if (!mthca_is_memfree(p_pd->ibv_pd->context)) { + page = ah->page; + if (ah->in_kernel) { + // fill mr parameters + page->mr.handle = p_resp->mr.mr_handle; + page->mr.lkey = p_resp->mr.lkey; + page->mr.rkey = p_resp->mr.rkey; + page->mr.pd = p_pd->ibv_pd; + page->mr.context = p_pd->ibv_pd->context; + } + ah->key = page->mr.lkey; + } + *ph_uvp_av = (ib_av_handle_t)ah; + } + else { + mthca_free_av(ah); + cl_free(ah); + } + goto end; + +end: + if (p_resp) + cl_free( p_resp ); + UVP_EXIT(UVP_DBG_AV); +} + +static ib_api_status_t +__pre_query_av ( + IN const ib_av_handle_t h_uvp_av, + IN OUT ci_umv_buf_t *p_umv_buf ) +{ + UNREFERENCED_PARAMETER(h_uvp_av); + UNREFERENCED_PARAMETER(p_umv_buf); + UVP_ENTER(UVP_DBG_AV); + UVP_EXIT(UVP_DBG_AV); + return IB_VERBS_PROCESSING_DONE; +} + + +static void +__post_query_av ( + IN const ib_av_handle_t h_uvp_av, + IN ib_api_status_t ioctl_status, + IN OUT ib_av_attr_t *p_addr_vector, + IN OUT ib_pd_handle_t *ph_pd, + IN OUT ci_umv_buf_t *p_umv_buf) +{ + struct mthca_ah *ah = (struct mthca_ah *)h_uvp_av; + UNREFERENCED_PARAMETER(p_umv_buf); + + UVP_ENTER(UVP_DBG_AV); + CL_ASSERT(p_umv_buf); + CL_ASSERT(p_addr_vector); + + if (ioctl_status == IB_SUCCESS) + { + cl_memcpy (p_addr_vector, &ah->av_attr, sizeof (ib_av_attr_t)); + if (ph_pd) + *ph_pd = (ib_pd_handle_t)ah->h_uvp_pd; + } + + UVP_EXIT(UVP_DBG_AV); +} + +void mthca_set_av_params( struct mthca_ah *ah_p, struct ibv_ah_attr *ah_attr ); + +static ib_api_status_t +__pre_modify_av ( + IN const ib_av_handle_t h_uvp_av, + IN const ib_av_attr_t *p_addr_vector, + IN OUT ci_umv_buf_t *p_umv_buf) +{ + ib_api_status_t status ; + struct mthca_ah *mthca_ah = (struct mthca_ah *)h_uvp_av; + mlnx_ual_pd_info_t *p_pd_info; + mlnx_ual_hobul_t *p_hobul; + struct ibv_ah_attr attr; + + UNREFERENCED_PARAMETER(p_umv_buf); + + UVP_ENTER(UVP_DBG_AV); + + CL_ASSERT(p_umv_buf); + + p_pd_info = mthca_ah->h_uvp_pd; + CL_ASSERT (p_pd_info); + + p_hobul = p_pd_info->p_hobul; + CL_ASSERT (p_hobul); + + status = map_itom_av_attr (p_hobul->p_hca_attr, p_addr_vector, &attr); + if(status != IB_SUCCESS) return status; + + mthca_set_av_params( mthca_ah, &attr); + cl_memcpy (&mthca_ah->av_attr, p_addr_vector, sizeof(ib_av_attr_t)); + + UVP_EXIT(UVP_DBG_AV); + + return IB_VERBS_PROCESSING_DONE; +} + +static void +__post_modify_av ( + IN const ib_av_handle_t h_uvp_av, + IN ib_api_status_t ioctl_status, + IN OUT ci_umv_buf_t *p_umv_buf) +{ + UVP_ENTER(UVP_DBG_AV); + UVP_EXIT(UVP_DBG_AV); +} + + +static ib_api_status_t +__pre_destroy_av ( + IN const ib_av_handle_t h_uvp_av) +{ + ib_api_status_t status ; + struct mthca_ah *mthca_ah = (struct mthca_ah *)h_uvp_av; + UVP_ENTER(UVP_DBG_AV); + if (mthca_ah->in_kernel) + status = IB_SUCCESS; + else + status = IB_VERBS_PROCESSING_DONE; + UVP_EXIT(UVP_DBG_AV); + return status; +} + +static void +__post_destroy_av ( + IN const ib_av_handle_t h_uvp_av, + IN ib_api_status_t ioctl_status) +{ + struct mthca_ah *mthca_ah = (struct mthca_ah *)h_uvp_av; + + UVP_ENTER(UVP_DBG_AV); + CL_ASSERT (h_uvp_av); + + if (IB_SUCCESS == ioctl_status) { + mthca_free_av(mthca_ah); + cl_free(mthca_ah); + } + + UVP_EXIT(UVP_DBG_AV); + return; +} + +void +mlnx_get_av_interface ( + IN OUT uvp_interface_t *p_uvp ) +{ + + CL_ASSERT(p_uvp); + + /* + * Address Vector Management Verbs + */ + p_uvp->pre_create_av = __pre_create_av; + p_uvp->post_create_av = __post_create_av; + p_uvp->pre_query_av = __pre_query_av; + p_uvp->post_query_av = __post_query_av; + p_uvp->pre_modify_av = __pre_modify_av; + p_uvp->post_modify_av = __post_modify_av; + p_uvp->pre_destroy_av = __pre_destroy_av; + p_uvp->post_destroy_av = __post_destroy_av; + +} + diff --git a/branches/IBFD/hw/mthca/user/mlnx_ual_ca.c b/branches/IBFD/hw/mthca/user/mlnx_ual_ca.c new file mode 100644 index 00000000..c66d7f77 --- /dev/null +++ b/branches/IBFD/hw/mthca/user/mlnx_ual_ca.c @@ -0,0 +1,286 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include "mlnx_ual_main.h" +#include "mt_l2w.h" +#include "mlnx_uvp.h" +#include "mlnx_uvp_verbs.h" +#include "mx_abi.h" + +#if defined(EVENT_TRACING) +#include "mlnx_ual_ca.tmh" +#endif + +extern uint32_t mlnx_dbg_lvl; + +static ib_api_status_t +__pre_open_ca ( + IN const ib_net64_t ca_guid, + IN OUT ci_umv_buf_t *p_umv_buf, + OUT ib_ca_handle_t *ph_uvp_ca) +{ + ib_api_status_t status = IB_SUCCESS; + + UNREFERENCED_PARAMETER(ph_uvp_ca); + + UVP_ENTER(UVP_DBG_SHIM); + if( p_umv_buf ) + { + if( !p_umv_buf->p_inout_buf ) + { + p_umv_buf->p_inout_buf = cl_zalloc( sizeof(struct ibv_get_context_resp) ); + if( !p_umv_buf->p_inout_buf ) + { + status = IB_INSUFFICIENT_MEMORY; + goto err_memory; + } + } + p_umv_buf->input_size = p_umv_buf->output_size = sizeof(struct ibv_get_context_resp); + p_umv_buf->command = TRUE; + } +err_memory: + UVP_EXIT(UVP_DBG_SHIM); + return IB_SUCCESS; +} + + +static ib_api_status_t +__post_open_ca ( + IN const ib_net64_t ca_guid, + IN ib_api_status_t ioctl_status, + IN OUT ib_ca_handle_t *ph_uvp_ca, + IN ci_umv_buf_t *p_umv_buf ) +{ + ib_api_status_t status = ioctl_status; + mlnx_ual_hobul_t *new_ca; + struct ibv_get_context_resp *p_resp; + struct ibv_context * ibvcontext; + int err; + + UVP_ENTER(UVP_DBG_SHIM); + + p_resp = (struct ibv_get_context_resp *)p_umv_buf->p_inout_buf; + + if (IB_SUCCESS == status) { + /* allocate ibv context */ + ibvcontext = mthca_alloc_context(p_resp); + if (IS_ERR(ibvcontext)) { + err = PTR_ERR(ibvcontext); + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_SHIM ,("mthca_alloc_context failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_alloc_context; + } + + /* allocate mthca context */ + new_ca = (mlnx_ual_hobul_t *)cl_zalloc( sizeof(mlnx_ual_hobul_t) ); + if( !new_ca ) { + status = IB_INSUFFICIENT_MEMORY; + goto err_memory; + } + + /* return results */ + new_ca->ibv_ctx = ibvcontext; + new_ca->p_hca_attr = NULL; + *ph_uvp_ca = (ib_ca_handle_t)new_ca; + } + +err_memory: +err_alloc_context: + if (p_resp) + cl_free( p_resp ); + UVP_EXIT(UVP_DBG_SHIM); + return status; +} + +static ib_api_status_t +__pre_query_ca ( + IN ib_ca_handle_t h_uvp_ca, + IN ib_ca_attr_t *p_ca_attr, + IN size_t byte_count, + IN ci_umv_buf_t *p_umv_buf ) +{ + ib_api_status_t status = IB_SUCCESS; + + UVP_ENTER(UVP_DBG_SHIM); + + CL_ASSERT(h_uvp_ca); + + /* + * First time call query_ca - populate our internal cached attributes + * so we can access the GID table. Note that query_ca calls *always* + * get their attributes from the kernel. + */ + if ( !h_uvp_ca->p_hca_attr ) + { + /* + * Assume if user buffer is valid then byte_cnt is valid too + * so we can preallocate ca attr buffer for post ioctl data saving + * + * Note that we squirel the buffer away into the umv_buf and only + * set it into the HCA if the query is successful. + */ + if ( p_ca_attr != NULL ) + { + p_umv_buf->p_inout_buf = cl_zalloc(byte_count); + if ( !p_umv_buf->p_inout_buf ) + { + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_SHIM , + ("Failed to alloc new_ca\n")); + status = IB_INSUFFICIENT_RESOURCES; + return status; + } + } + p_umv_buf->input_size = p_umv_buf->output_size = 0; + } + + UVP_EXIT(UVP_DBG_SHIM); + return status; +} + + +static void +__post_query_ca ( + IN ib_ca_handle_t h_uvp_ca, + IN ib_api_status_t ioctl_status, + IN ib_ca_attr_t *p_ca_attr, + IN size_t byte_count, + IN ci_umv_buf_t *p_umv_buf ) +{ + UVP_ENTER(UVP_DBG_SHIM); + + CL_ASSERT(h_uvp_ca); + CL_ASSERT(p_umv_buf); + + if ( ioctl_status == IB_SUCCESS && p_ca_attr && + byte_count && !h_uvp_ca->p_hca_attr ) + { + CL_ASSERT( byte_count >= p_ca_attr->size ); + h_uvp_ca->p_hca_attr = p_umv_buf->p_inout_buf; + ib_copy_ca_attr( h_uvp_ca->p_hca_attr, p_ca_attr ); + } + else if (p_umv_buf->p_inout_buf) + { + cl_free (p_umv_buf->p_inout_buf); + } + + UVP_EXIT(UVP_DBG_SHIM); + return; +} + + +static ib_api_status_t +__pre_modify_ca ( + IN ib_ca_handle_t h_uvp_ca, + IN uint8_t port_num, + IN ib_ca_mod_t ca_mod, + IN const ib_port_attr_mod_t* p_port_attr_mod) +{ + UVP_ENTER(UVP_DBG_SHIM); + UVP_EXIT(UVP_DBG_SHIM); + return IB_SUCCESS; +} + + +static void +__post_modify_ca ( + IN ib_ca_handle_t h_uvp_ca, + IN ib_api_status_t ioctl_status) +{ + UVP_ENTER(UVP_DBG_SHIM); + UVP_EXIT(UVP_DBG_SHIM); +} + + +static ib_api_status_t +__pre_close_ca ( + IN ib_ca_handle_t h_uvp_ca) +{ + UVP_ENTER(UVP_DBG_SHIM); + UVP_EXIT(UVP_DBG_SHIM); + return IB_SUCCESS; +} + + +static ib_api_status_t +__post_close_ca ( + IN ib_ca_handle_t h_uvp_ca, + IN ib_api_status_t ioctl_status ) +{ + mlnx_ual_hobul_t *p_hobul = (mlnx_ual_hobul_t *)((void*)h_uvp_ca); + + UVP_ENTER(UVP_DBG_SHIM); + + CL_ASSERT(p_hobul); + + if (IB_SUCCESS == ioctl_status) { + if (p_hobul->ibv_ctx) { + mthca_free_context(p_hobul->ibv_ctx); + p_hobul->ibv_ctx = NULL; + } + + if (p_hobul->p_hca_attr) { + cl_free( p_hobul->p_hca_attr); + p_hobul->p_hca_attr = NULL; + } + + cl_free(p_hobul); + } + + UVP_EXIT(UVP_DBG_SHIM); + return IB_SUCCESS; +} + +void +mlnx_get_ca_interface ( + IN OUT uvp_interface_t *p_uvp ) +{ + CL_ASSERT(p_uvp); + + /* + * HCA Access Verbs + */ + p_uvp->pre_open_ca = __pre_open_ca; + p_uvp->post_open_ca = __post_open_ca; + + + p_uvp->pre_query_ca = __pre_query_ca; + p_uvp->post_query_ca = __post_query_ca; + + p_uvp->pre_modify_ca = NULL; + p_uvp->post_modify_ca = NULL; + + p_uvp->pre_close_ca = __pre_close_ca; + p_uvp->post_close_ca = __post_close_ca; + +} + + diff --git a/branches/IBFD/hw/mthca/user/mlnx_ual_cq.c b/branches/IBFD/hw/mthca/user/mlnx_ual_cq.c new file mode 100644 index 00000000..22d36285 --- /dev/null +++ b/branches/IBFD/hw/mthca/user/mlnx_ual_cq.c @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include "mt_l2w.h" +#include "mlnx_ual_main.h" +#include "mlnx_uvp.h" +#include "mx_abi.h" + +#if defined(EVENT_TRACING) +#include "mlnx_ual_cq.tmh" +#endif + + +extern uint32_t mlnx_dbg_lvl; + +static ib_api_status_t +__pre_create_cq ( + IN const ib_ca_handle_t h_uvp_ca, + IN OUT uint32_t* const p_size, + IN OUT ci_umv_buf_t *p_umv_buf, + OUT ib_cq_handle_t *ph_uvp_cq) +{ + struct ibv_cq *ibv_cq; + ib_api_status_t status = IB_SUCCESS; + size_t size = max( sizeof(struct ibv_create_cq), sizeof(struct ibv_create_cq_resp) ); + mlnx_ual_hobul_t *p_hobul = (mlnx_ual_hobul_t *)((void *)h_uvp_ca); + struct ibv_create_cq *p_create_cq; + int err; + + UNREFERENCED_PARAMETER(ph_uvp_cq); + + UVP_ENTER(UVP_DBG_CQ); + + CL_ASSERT(p_umv_buf); + + if( !p_umv_buf->p_inout_buf ) + { + p_umv_buf->p_inout_buf = cl_zalloc( size ); + if( !p_umv_buf->p_inout_buf ) + { + status = IB_INSUFFICIENT_MEMORY; + goto err_memory; + } + } + p_umv_buf->input_size = sizeof(struct ibv_create_cq); + p_umv_buf->output_size = sizeof(struct ibv_create_cq_resp); + p_umv_buf->command = TRUE; + + /* allocate ibv_cq */ + p_create_cq = (struct ibv_create_cq *)p_umv_buf->p_inout_buf; + ibv_cq = p_hobul->ibv_ctx->ops.create_cq_pre(p_hobul->ibv_ctx, p_size, p_create_cq); + if (IS_ERR(ibv_cq)) { + err = PTR_ERR(ibv_cq); + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_CQ , ("mthca_alloc_cq_pre failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_alloc_cq; + } + + goto end; + +err_alloc_cq: + cl_free(p_umv_buf->p_inout_buf); +err_memory: +end: + UVP_EXIT(UVP_DBG_CQ); + return status; +} + + +static void +__post_create_cq ( + IN const ib_ca_handle_t h_uvp_ca, + IN ib_api_status_t ioctl_status, + IN const uint32_t size, + IN OUT ib_cq_handle_t *ph_uvp_cq, + IN ci_umv_buf_t *p_umv_buf ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct ibv_create_cq_resp *p_resp; + struct ibv_cq *ibv_cq; + mlnx_ual_hobul_t *p_hobul = (mlnx_ual_hobul_t *)((void *)h_uvp_ca); + + + UVP_ENTER(UVP_DBG_CQ); + + CL_ASSERT(p_hobul); + CL_ASSERT(p_umv_buf); + p_resp = (struct ibv_create_cq_resp *)p_umv_buf->p_inout_buf; + + if (IB_SUCCESS == ioctl_status) { + + /* allocate ibv_cq */ + ibv_cq = p_hobul->ibv_ctx->ops.create_cq_post(p_hobul->ibv_ctx, p_resp); + if (IS_ERR(ibv_cq)) { + err = PTR_ERR(ibv_cq); + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_CQ , ("mthca_create_cq failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_create_cq; + } + + *ph_uvp_cq = (ib_cq_handle_t)ibv_cq; + } + goto end; + + p_hobul->ibv_ctx->ops.destroy_cq(ibv_cq); +err_create_cq: +end: + if (p_resp) + cl_free( p_resp ); + UVP_EXIT(UVP_DBG_CQ); + return; +} + + +static ib_api_status_t +__pre_query_cq ( + IN const ib_cq_handle_t h_uvp_cq, + OUT uint32_t* const p_size, + IN OUT ci_umv_buf_t *p_umv_buf) +{ + struct ibv_cq *ibv_cq = (struct ibv_cq *)h_uvp_cq; + + UVP_ENTER(UVP_DBG_CQ); + + *p_size = ibv_cq->cqe; + + UVP_EXIT(UVP_DBG_CQ); + return IB_VERBS_PROCESSING_DONE; +} + + +static ib_api_status_t +__pre_destroy_cq ( + IN const ib_cq_handle_t h_uvp_cq) +{ + UVP_ENTER(UVP_DBG_CQ); + UVP_EXIT(UVP_DBG_CQ); + return IB_SUCCESS; +} + +static void +__post_destroy_cq ( + IN const ib_cq_handle_t h_uvp_cq, + IN ib_api_status_t ioctl_status) +{ + int err; + struct ibv_cq *ibv_cq = (struct ibv_cq *)h_uvp_cq; + UNREFERENCED_PARAMETER(ioctl_status); + + UVP_ENTER(UVP_DBG_CQ); + + CL_ASSERT(ibv_cq); + + if (IB_SUCCESS == ioctl_status) { + err = ibv_cq->context->ops.destroy_cq( ibv_cq ); + if (err) + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_CQ, ("mthca_destroy_cq failed (%d)\n", err)); + //cl_free (p_cq_info); + } + + UVP_EXIT(UVP_DBG_CQ); +} + +void +mlnx_get_cq_interface ( + IN OUT uvp_interface_t *p_uvp ) +{ + UVP_ENTER(UVP_DBG_DEV); + + CL_ASSERT(p_uvp); + + /* + * Completion Queue Management Verbs + */ + p_uvp->pre_create_cq = __pre_create_cq; + p_uvp->post_create_cq = __post_create_cq; + + p_uvp->pre_query_cq = __pre_query_cq; + p_uvp->post_query_cq = NULL; + + p_uvp->pre_resize_cq = NULL; /* __pre_resize_cq: not supported in kernel */ + p_uvp->post_resize_cq = NULL; /* __post_resize_cq:not supported in kernel */ + + p_uvp->pre_destroy_cq = __pre_destroy_cq; + p_uvp->post_destroy_cq = __post_destroy_cq; + + UVP_EXIT(UVP_DBG_DEV); +} + + diff --git a/branches/IBFD/hw/mthca/user/mlnx_ual_data.h b/branches/IBFD/hw/mthca/user/mlnx_ual_data.h new file mode 100644 index 00000000..fa201d97 --- /dev/null +++ b/branches/IBFD/hw/mthca/user/mlnx_ual_data.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ +#include +#include + +// taken from ib_defs.h +typedef uint32_t IB_wqpn_t; /* Work QP number: Only 24 LSbits */ +typedef uint8_t IB_port_t; +typedef uint8_t IB_gid_t[16]; /* GID (aka IPv6) H-to-L (big) (network) endianess */ +typedef uint32_t IB_ts_t; + +typedef struct _ib_ca +{ + struct ibv_context *ibv_ctx; + ib_ca_attr_t *p_hca_attr; +} mlnx_ual_hobul_t; + + +typedef struct _ib_pd +{ + struct ibv_pd *ibv_pd; + mlnx_ual_hobul_t *p_hobul; +} mlnx_ual_pd_info_t; + +typedef struct _ib_mw +{ + ib_pd_handle_t h_uvp_pd; + uint32_t rkey; +} mlnx_ual_mw_info_t; + diff --git a/branches/IBFD/hw/mthca/user/mlnx_ual_main.c b/branches/IBFD/hw/mthca/user/mlnx_ual_main.c new file mode 100644 index 00000000..172fd341 --- /dev/null +++ b/branches/IBFD/hw/mthca/user/mlnx_ual_main.c @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include +#include +#include +#include "mlnx_ual_main.h" + +#if defined(EVENT_TRACING) +#include "mlnx_ual_main.tmh" +#endif + + +uint32_t mlnx_dbg_lvl = 0; // MLNX_TRACE_LVL_8; + +static void uvp_init(); + +extern BOOL APIENTRY +_DllMainCRTStartupForGS( + IN HINSTANCE h_module, + IN DWORD ul_reason_for_call, + IN LPVOID lp_reserved ); + + +BOOL APIENTRY +DllMain( + IN HINSTANCE h_module, + IN DWORD ul_reason_for_call, + IN LPVOID lp_reserved ) +{ + switch( ul_reason_for_call ) + { + case DLL_PROCESS_ATTACH: +#if defined(EVENT_TRACING) +#if DBG + WPP_INIT_TRACING(L"mthcaud.dll"); +#else + WPP_INIT_TRACING(L"mthcau.dll"); +#endif +#endif + if( !_DllMainCRTStartupForGS( + h_module, ul_reason_for_call, lp_reserved ) ) + { + return FALSE; + } + + fill_bit_tbls(); + uvp_init(); + break; + + case DLL_PROCESS_DETACH: + // The calling process is detaching + // the DLL from its address space. + // + // Note that lpvReserved will be NULL if the detach is due to + // a FreeLibrary() call, and non-NULL if the detach is due to + // process cleanup. + // +#if defined(EVENT_TRACING) + WPP_CLEANUP(); +#endif + + default: + return _DllMainCRTStartupForGS( + h_module, ul_reason_for_call, lp_reserved ); + } + return TRUE; +} + + +/* + * UVP Shared Library Init routine +*/ + +static void +uvp_init() +{ + +#if !defined(EVENT_TRACING) +#if DBG +#define ENV_BUFSIZE 16 + TCHAR dbg_lvl_str[ENV_BUFSIZE]; + DWORD i; + + + i = GetEnvironmentVariable( "UVP_DBG_LEVEL", dbg_lvl_str, ENV_BUFSIZE ); + if( i && i <= 16 ) + { + g_mlnx_dbg_level = _tcstoul( dbg_lvl_str, NULL, ENV_BUFSIZE ); + } + + i = GetEnvironmentVariable( "UVP_DBG_FLAGS", dbg_lvl_str, ENV_BUFSIZE ); + if( i && i <= 16 ) + { + g_mlnx_dbg_flags = _tcstoul( dbg_lvl_str, NULL, ENV_BUFSIZE ); + } + + + UVP_PRINT(TRACE_LEVEL_INFORMATION ,UVP_DBG_DEV , + ("Given UVP_DBG debug level:%d debug flags 0x%x\n", + g_mlnx_dbg_level ,g_mlnx_dbg_flags) ); + +#endif +#endif +} + +__declspec(dllexport) ib_api_status_t +uvp_get_interface ( + IN OUT uvp_interface_t *p_uvp ) +{ + UVP_ENTER(UVP_DBG_SHIM); + + CL_ASSERT(p_uvp); + /* + * Version of the header file this interface export can handle + */ + p_uvp->version = 0x101; + p_uvp->guid = 0x12345678; + + /* + * CA Management + */ + mlnx_get_ca_interface (p_uvp); + + /* + * Protection Domain + */ + mlnx_get_pd_interface (p_uvp); + + /* + * SRQ Management Verbs + */ + mlnx_get_srq_interface (p_uvp); + + /* + * QP Management Verbs + */ + mlnx_get_qp_interface (p_uvp); + + /* + * Completion Queue Management Verbs + */ + mlnx_get_cq_interface (p_uvp); + + /* + * AV Management + */ + mlnx_get_av_interface(p_uvp); + + /* + * Memory Region / Window Management Verbs + */ + mlnx_get_mrw_interface (p_uvp); + + /* + * Multicast Support Verbs + */ + mlnx_get_mcast_interface (p_uvp); + + /* + * OS bypass (send, receive, poll/notify cq) + */ + mlnx_get_osbypass_interface(p_uvp); + + + /* + * Local MAD support, for HCA's that do not support + * Agents in the HW. + * ??? Do we need this for user-mode ??? + */ + + UVP_EXIT(UVP_DBG_SHIM); + return IB_SUCCESS; +} + diff --git a/branches/IBFD/hw/mthca/user/mlnx_ual_main.h b/branches/IBFD/hw/mthca/user/mlnx_ual_main.h new file mode 100644 index 00000000..9d717d8f --- /dev/null +++ b/branches/IBFD/hw/mthca/user/mlnx_ual_main.h @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#ifndef __UAL_MAIN_H__ +#define __UAL_MAIN_H__ + +#include +#include +#include + +//#include +#include "mlnx_ual_data.h" +#include "mlnx_uvp_debug.h" +#include +#include +//#include + + +#define MAX_WRS_PER_CHAIN 16 +#define MAX_NUM_SGE 32 + +#define MLNX_SGE_SIZE 16 +#define MLNX_UAL_ALLOC_HCA_UL_RES 1 +#define MLNX_UAL_FREE_HCA_UL_RES 2 + +typedef unsigned __int3264 cl_dev_handle_t; + +extern uint32_t mlnx_dbg_lvl; +static inline errno_to_iberr(int err) +{ +#define MAP_ERR(err,ibstatus) case err: ib_status = ibstatus; break + ib_api_status_t ib_status = IB_UNKNOWN_ERROR; + if (err < 0) + err = -err; + switch (err) { + MAP_ERR( ENOENT, IB_NOT_FOUND ); + MAP_ERR( EINTR, IB_INTERRUPTED ); + MAP_ERR( EAGAIN, IB_RESOURCE_BUSY ); + MAP_ERR( ENOMEM, IB_INSUFFICIENT_MEMORY ); + MAP_ERR( EACCES, IB_INVALID_PERMISSION ); + MAP_ERR( EFAULT, IB_ERROR ); + MAP_ERR( EBUSY, IB_RESOURCE_BUSY ); + MAP_ERR( ENODEV, IB_UNSUPPORTED ); + MAP_ERR( EINVAL, IB_INVALID_PARAMETER ); + MAP_ERR( ENOSYS, IB_UNSUPPORTED ); + default: + CL_TRACE (CL_DBG_ERROR, mlnx_dbg_lvl, ("Unmapped errno (%d)\n", err)); + break; + } + return ib_status; +} + + + + +/* + * PROTOTYPES + */ + +/************* CA operations *************************/ +void +mlnx_get_ca_interface ( + IN OUT uvp_interface_t *p_uvp ); + +/************* PD Management *************************/ +void +mlnx_get_pd_interface ( + IN OUT uvp_interface_t *p_uvp ); + +/************* AV Management *************************/ +void +mlnx_get_av_interface ( + IN OUT uvp_interface_t *p_uvp ); + +/************* CQ Management *************************/ +void +mlnx_get_cq_interface ( + IN OUT uvp_interface_t *p_uvp ); + +/************* SRQ Management *************************/ +void +mlnx_get_srq_interface ( + IN OUT uvp_interface_t *p_uvp ); + +/************* QP Management *************************/ +void +mlnx_get_qp_interface ( + IN OUT uvp_interface_t *p_uvp ); + +/************* MR/MW Management *************************/ +void +mlnx_get_mrw_interface ( + IN OUT uvp_interface_t *p_uvp ); + +/************* MCAST Management *************************/ +void +mlnx_get_mcast_interface ( + IN OUT uvp_interface_t *p_uvp ); + +/************* OS BYPASS Management *************************/ +void +mlnx_get_osbypass_interface ( + IN OUT uvp_interface_t *p_uvp ); + +#endif diff --git a/branches/IBFD/hw/mthca/user/mlnx_ual_mcast.c b/branches/IBFD/hw/mthca/user/mlnx_ual_mcast.c new file mode 100644 index 00000000..b72a210c --- /dev/null +++ b/branches/IBFD/hw/mthca/user/mlnx_ual_mcast.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include "mlnx_ual_main.h" + +#if defined(EVENT_TRACING) +#include "mlnx_ual_mcast.tmh" +#endif + +static ib_api_status_t +__pre_attach_mcast ( + IN const ib_qp_handle_t h_uvp_qp, + IN const ib_gid_t *p_mcast_gid, + IN const uint16_t mcast_lid, + IN OUT ci_umv_buf_t *p_umv_buf, + OUT ib_mcast_handle_t *ph_mcast) +{ + UNREFERENCED_PARAMETER(ph_mcast); + + UVP_ENTER(UVP_DBG_SHIM); + CL_ASSERT(p_umv_buf); + p_umv_buf->p_inout_buf = NULL;; + p_umv_buf->input_size = 0; + p_umv_buf->output_size = 0; + p_umv_buf->command = TRUE; + + UVP_EXIT(UVP_DBG_SHIM); + return IB_SUCCESS; +} + + + +static void +__post_attach_mcast ( + IN const ib_qp_handle_t h_uvp_qp, + IN ib_api_status_t ioctl_status, + IN OUT ib_mcast_handle_t *ph_mcast, + IN OUT ci_umv_buf_t *p_umv_buf) +{ + UVP_ENTER(UVP_DBG_SHIM); + UVP_EXIT(UVP_DBG_SHIM); +} + + + +static ib_api_status_t +__pre_detach_mcast ( + IN ib_mcast_handle_t h_uvp_mcast, + IN OUT ci_umv_buf_t *p_umv_buf) +{ + UVP_ENTER(UVP_DBG_SHIM); + + CL_ASSERT(p_umv_buf); + p_umv_buf->p_inout_buf = NULL;; + p_umv_buf->input_size = 0; + p_umv_buf->output_size = 0; + + UVP_EXIT(UVP_DBG_SHIM); + return IB_SUCCESS; +} + + +static void +__post_detach_mcast ( + IN ib_mcast_handle_t h_uvp_mcast, + IN ib_api_status_t ioctl_status, + IN OUT ci_umv_buf_t *p_umv_buf) +{ + UVP_ENTER(UVP_DBG_SHIM); + UVP_EXIT(UVP_DBG_SHIM); +} + +void +mlnx_get_mcast_interface ( + IN OUT uvp_interface_t *p_uvp ) +{ + UVP_ENTER(UVP_DBG_SHIM); + + CL_ASSERT(p_uvp); + + /* + * Multicast Support Verbs + */ + p_uvp->pre_attach_mcast = NULL; + p_uvp->post_attach_mcast = NULL; + p_uvp->pre_detach_mcast = NULL; + p_uvp->post_detach_mcast = NULL; + + UVP_EXIT(UVP_DBG_SHIM); +} + + diff --git a/branches/IBFD/hw/mthca/user/mlnx_ual_mrw.c b/branches/IBFD/hw/mthca/user/mlnx_ual_mrw.c new file mode 100644 index 00000000..6577aadc --- /dev/null +++ b/branches/IBFD/hw/mthca/user/mlnx_ual_mrw.c @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include "mlnx_ual_main.h" +#if defined(EVENT_TRACING) +#include "mlnx_ual_mrw.tmh" +#endif + + + +static ib_api_status_t +__pre_register_mr ( + IN const ib_pd_handle_t h_uvp_pd, + IN const ib_mr_create_t *p_mr_create, + IN OUT ci_umv_buf_t *p_umv_buf, + OUT ib_mr_handle_t *ph_uvp_mr) +{ + UNREFERENCED_PARAMETER(ph_uvp_mr); + + UVP_ENTER(UVP_DBG_SHIM); + CL_ASSERT(p_umv_buf); + p_umv_buf->p_inout_buf = NULL;; + p_umv_buf->input_size = 0; + p_umv_buf->output_size = 0; + + UVP_EXIT(UVP_DBG_SHIM); + return IB_SUCCESS; +} + + +static void +__post_register_mr ( + IN const ib_pd_handle_t h_uvp_pd, + IN ib_api_status_t ioctl_status, + IN const uint32_t *p_lkey, + IN const uint32_t *p_rkey, + IN OUT const ib_mr_handle_t *ph_uvp_mr, + IN OUT ci_umv_buf_t *p_umv_buf) +{ + UVP_ENTER(UVP_DBG_SHIM); + UVP_EXIT(UVP_DBG_SHIM); + return; +} + + +static ib_api_status_t +__pre_query_mr ( + IN const ib_mr_handle_t h_uvp_mr, + IN OUT ci_umv_buf_t *p_umv_buf) +{ + UVP_ENTER(UVP_DBG_SHIM); + CL_ASSERT(p_umv_buf); + p_umv_buf->p_inout_buf = NULL;; + p_umv_buf->input_size = 0; + p_umv_buf->output_size = 0; + + UVP_EXIT(UVP_DBG_SHIM); + return IB_SUCCESS; +} + + +static void +__post_query_mr ( + IN const ib_mr_handle_t h_uvp_mr, + IN ib_api_status_t ioctl_status, + IN const ib_mr_attr_t *p_mr_query, + IN OUT ci_umv_buf_t *p_umv_buf) +{ + UVP_ENTER(UVP_DBG_SHIM); + UVP_EXIT(UVP_DBG_SHIM); + return; +} + + +static ib_api_status_t +__pre_modify_mr ( + IN const ib_mr_handle_t h_uvp_mr, + IN const ib_pd_handle_t h_uvp_pd OPTIONAL, + IN const ib_mr_mod_t mr_mod_mask, + IN const ib_mr_create_t *p_mr_create OPTIONAL, + IN OUT ci_umv_buf_t *p_umv_buf) +{ + UVP_ENTER(UVP_DBG_SHIM); + CL_ASSERT(p_umv_buf); + p_umv_buf->p_inout_buf = NULL;; + p_umv_buf->input_size = 0; + p_umv_buf->output_size = 0; + + UVP_EXIT(UVP_DBG_SHIM); + return IB_SUCCESS; +} + + +static void +__post_modify_mr ( + IN const ib_mr_handle_t h_uvp_mr, + IN const ib_pd_handle_t h_uvp_pd OPTIONAL, + IN ib_api_status_t ioctl_status, + IN const uint32_t *p_lkey, + IN const uint32_t *p_rkey, + IN OUT ci_umv_buf_t *p_umv_buf) +{ + UVP_ENTER(UVP_DBG_SHIM); + UVP_EXIT(UVP_DBG_SHIM); + return; +} + + +static ib_api_status_t +__pre_register_smr ( + IN const ib_pd_handle_t h_uvp_pd, + IN const ib_mr_handle_t h_uvp_mr, + IN const ib_access_t access_ctrl, + IN void *p_vaddr, + IN OUT ci_umv_buf_t *p_umv_buf) +{ + UVP_ENTER(UVP_DBG_SHIM); + CL_ASSERT(p_umv_buf); + p_umv_buf->p_inout_buf = NULL;; + p_umv_buf->input_size = 0; + p_umv_buf->output_size = 0; + + UVP_EXIT(UVP_DBG_SHIM); + return IB_SUCCESS; +} + + +static void +__post_register_smr ( + IN const ib_pd_handle_t h_uvp_pd, + IN const ib_mr_handle_t h_uvp_mr, + IN ib_api_status_t ioctl_status, + IN const void *p_vaddr, + IN const uint32_t *p_lkey, + IN const uint32_t *p_rkey, + OUT const ib_mr_handle_t *ph_uvp_smr, + IN OUT ci_umv_buf_t *p_umv_buf) +{ + UVP_ENTER(UVP_DBG_SHIM); + UVP_EXIT(UVP_DBG_SHIM); + return; +} + + +static ib_api_status_t +__pre_deregister_mr ( + IN const ib_mr_handle_t h_uvp_mr, + IN OUT ci_umv_buf_t *p_umv_buf) +{ + UVP_ENTER(UVP_DBG_SHIM); + UVP_EXIT(UVP_DBG_SHIM); + return IB_SUCCESS; +} + + +static void +__post_deregister_mr ( + IN const ib_mr_handle_t h_uvp_mr, + IN OUT ci_umv_buf_t *p_umv_buf) +{ + UVP_ENTER(UVP_DBG_SHIM); + UVP_EXIT(UVP_DBG_SHIM); + return; +} + +void +mlnx_get_mrw_interface ( + IN OUT uvp_interface_t *p_uvp ) +{ + UVP_ENTER(UVP_DBG_SHIM); + + CL_ASSERT(p_uvp); + + /* + * Memory Management Verbs + */ +// p_uvp->pre_register_mr = NULL; +// p_uvp->post_register_mr = NULL; +// p_uvp->pre_query_mr = NULL; +// p_uvp->post_query_mr = NULL; +// p_uvp->pre_deregister_mr = NULL; +// p_uvp->post_deregister_mr = NULL; +// p_uvp->pre_modify_mr = NULL; +// p_uvp->post_modify_mr = NULL; +// p_uvp->pre_register_smr = NULL; +// p_uvp->post_register_smr = NULL; + + /* + * Memory Window Verbs + */ + p_uvp->pre_create_mw = NULL; // __pre_create_mw + p_uvp->post_create_mw = NULL; // __post_create_mw + p_uvp->pre_query_mw = NULL; // __pre_query_mw + p_uvp->post_query_mw = NULL; // __post_query_mw + p_uvp->pre_destroy_mw = NULL; // __pre_destroy_mw + p_uvp->post_destroy_mw = NULL; // __post_destroy_mw + + /* register_pmr is not supported in user-mode */ + + UVP_EXIT(UVP_DBG_SHIM); +} + diff --git a/branches/IBFD/hw/mthca/user/mlnx_ual_osbypass.c b/branches/IBFD/hw/mthca/user/mlnx_ual_osbypass.c new file mode 100644 index 00000000..0c61f747 --- /dev/null +++ b/branches/IBFD/hw/mthca/user/mlnx_ual_osbypass.c @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include "mt_l2w.h" +#include "mlnx_uvp.h" +#include "mx_abi.h" + +#include "mlnx_ual_main.h" +#if defined(EVENT_TRACING) +#include "mlnx_ual_osbypass.tmh" +#endif + +static ib_api_status_t __to_status(int err) +{ + ib_api_status_t status; + + switch (err) { + case -ENOMEM: status = IB_INSUFFICIENT_RESOURCES; break; + case -EINVAL: status = IB_INVALID_WR_TYPE; break; + case -ERANGE: status = IB_INVALID_MAX_SGE; break; + case -EBUSY: status = IB_INVALID_QP_STATE; break; + case -E2BIG: status = IB_INVALID_PARAMETER; break; + default: status = errno_to_iberr(err); + } + return status; +} + +static ib_api_status_t +__post_send ( + IN const void* __ptr64 h_qp, + IN ib_send_wr_t* const p_send_wr, + OUT ib_send_wr_t** pp_send_failure ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct mthca_qp *qp = (struct mthca_qp *) ((void*)h_qp); + + UVP_ENTER(UVP_DBG_QP); + + CL_ASSERT (qp); + + CL_ASSERT( p_send_wr ); + + err = qp->ibv_qp.context->ops.post_send(&qp->ibv_qp, p_send_wr, pp_send_failure ); + + if (err) { + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP , ("mthca_post_send failed (%d)\n", err)); + status = __to_status(err); + } + + + UVP_EXIT(UVP_DBG_QP); + return status; +} + +static ib_api_status_t +__post_recv ( + IN const void* __ptr64 h_qp, + IN ib_recv_wr_t* const p_recv_wr, + OUT ib_recv_wr_t** pp_recv_failure ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct mthca_qp *qp = (struct mthca_qp *) ((void*)h_qp); + + UVP_ENTER(UVP_DBG_QP); + + CL_ASSERT (qp); + + CL_ASSERT( p_recv_wr ); + + err = qp->ibv_qp.context->ops.post_recv(&qp->ibv_qp, p_recv_wr, pp_recv_failure ); + + if (err) { + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP, ("mthca_post_recv failed (%d)\n", err)); + status = __to_status(err); + } + + UVP_EXIT(UVP_DBG_QP); + return status; +} + + +static ib_api_status_t +__post_srq_recv ( + IN const void* __ptr64 h_srq, + IN ib_recv_wr_t* const p_recv_wr, + OUT ib_recv_wr_t** pp_recv_failure ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct mthca_srq *srq = (struct mthca_srq *) ((void*)h_srq); + + UVP_ENTER(UVP_DBG_QP); + + CL_ASSERT (srq); + + CL_ASSERT( p_recv_wr ); + + err = srq->ibv_srq.context->ops.post_srq_recv(&srq->ibv_srq, p_recv_wr, pp_recv_failure ); + if (err) { + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP, ("mthca_post_recv failed (%d)\n", err)); + status = __to_status(err); + } + + UVP_EXIT(UVP_DBG_QP); + return status; +} + + +static ib_api_status_t +__poll_cq ( + IN const void* __ptr64 h_cq, + IN OUT ib_wc_t** const pp_free_wclist, + OUT ib_wc_t** const pp_done_wclist ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct mthca_cq *cq = (struct mthca_cq *) ((void*)h_cq); + + UVP_ENTER(UVP_DBG_CQ); + CL_ASSERT (cq); + + if (!pp_free_wclist || !*pp_free_wclist || !pp_done_wclist) + { + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_CQ ,("Passed in bad params\n")); + status = IB_INVALID_PARAMETER; + goto err_invalid_params; + } + + err = cq->ibv_cq.context->ops.poll_cq_list(&cq->ibv_cq, pp_free_wclist, pp_done_wclist ); + if (err) { + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_CQ , ("mthca_poll_cq failed (%d)\n", err)); + status = errno_to_iberr(err); + }else if (!*pp_done_wclist) + status = IB_NOT_FOUND; + + +err_invalid_params: + + if (status != IB_NOT_FOUND){ + UVP_PRINT_EXIT(TRACE_LEVEL_ERROR ,UVP_DBG_CQ ,("completes with ERROR status %s\n", ib_get_err_str(status))); + }else + UVP_EXIT(UVP_DBG_CQ); + + return status; +} + + +static ib_api_status_t +__enable_cq_notify ( + IN const void* __ptr64 h_cq, + IN const boolean_t solicited ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct mthca_cq *cq = (struct mthca_cq *) ((void*)h_cq); + + UVP_ENTER(UVP_DBG_CQ); + CL_ASSERT (cq); + + err = cq->ibv_cq.context->ops.req_notify_cq(&cq->ibv_cq, (solicited) ? IB_CQ_SOLICITED : IB_CQ_NEXT_COMP ); + if (err) { + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_SHIM , ("mthca_enable_cq_notify failed (%d)\n", err)); + status = errno_to_iberr(err); + goto exit; + } + +exit: + UVP_EXIT(UVP_DBG_CQ); + return status; +} + + +static ib_api_status_t +__enable_ncomp_cq_notify ( + IN const void* __ptr64 h_cq, + IN const uint32_t n_cqes ) +{ + // Not yet implemented + ib_api_status_t status = IB_UNSUPPORTED; + UVP_ENTER(UVP_DBG_SHIM); + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_SHIM , ("__enable_ncomp_cq_notify is not implemented yet\n")); + UVP_EXIT(UVP_DBG_SHIM); + return status; +} + + +void +mlnx_get_osbypass_interface ( + IN OUT uvp_interface_t *p_uvp ) +{ + + CL_ASSERT(p_uvp); + + /* + * Work Request Processing Verbs + * Should the types be same as Verbs? + */ + p_uvp->post_send = __post_send; + p_uvp->post_recv = __post_recv; + p_uvp->post_srq_recv = __post_srq_recv; + + /* + * Completion Processing and + * Completion Notification Request Verbs. + * Should the types be same as Verbs? + */ + p_uvp->poll_cq = __poll_cq; + p_uvp->rearm_cq = __enable_cq_notify; + p_uvp->rearm_n_cq = NULL; /* __enable_ncomp_cq_notify: Not implemented */; + p_uvp->peek_cq = NULL; /* __peek_cq: Not implemented */ + + /* Memory window bind */ + p_uvp->bind_mw = NULL; /* __bind_mw: Not implemented */ +} + diff --git a/branches/IBFD/hw/mthca/user/mlnx_ual_pd.c b/branches/IBFD/hw/mthca/user/mlnx_ual_pd.c new file mode 100644 index 00000000..0bf043ca --- /dev/null +++ b/branches/IBFD/hw/mthca/user/mlnx_ual_pd.c @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + + +#include "mt_l2w.h" +#include "mlnx_ual_main.h" +#include "mlnx_uvp.h" +#include "mx_abi.h" + +#if defined(EVENT_TRACING) +#include "mlnx_ual_pd.tmh" +#endif + +static ib_api_status_t +__pre_allocate_pd ( + IN const ib_ca_handle_t h_uvp_ca, + IN OUT ci_umv_buf_t *p_umv_buf, + OUT ib_pd_handle_t *ph_uvp_pd) +{ + ib_api_status_t status = IB_SUCCESS; + + UNREFERENCED_PARAMETER(ph_uvp_pd); + + UVP_ENTER(UVP_DBG_SHIM); + + CL_ASSERT(p_umv_buf); + + if( !p_umv_buf->p_inout_buf ) + { + p_umv_buf->p_inout_buf = cl_zalloc( sizeof(struct ibv_alloc_pd_resp) ); + if( !p_umv_buf->p_inout_buf ) + { + status = IB_INSUFFICIENT_MEMORY; + goto err_memory; + } + } + p_umv_buf->input_size = p_umv_buf->output_size = sizeof(struct ibv_alloc_pd_resp); + p_umv_buf->command = TRUE; + +err_memory: + UVP_EXIT(UVP_DBG_SHIM); + return status; +} + + +static void +__post_allocate_pd ( + IN ib_ca_handle_t h_uvp_ca, + IN ib_api_status_t ioctl_status, + IN OUT ib_pd_handle_t *ph_uvp_pd, + IN ci_umv_buf_t *p_umv_buf ) +{ + int err; + ib_api_status_t status = IB_SUCCESS; + struct ibv_alloc_pd_resp *p_resp; + struct ibv_pd *ibv_pd; + mlnx_ual_hobul_t *p_hobul = (mlnx_ual_hobul_t *)((void *)h_uvp_ca); + mlnx_ual_pd_info_t *p_new_pd; + + UVP_ENTER(UVP_DBG_SHIM); + + CL_ASSERT(p_hobul); + CL_ASSERT(p_umv_buf); + p_resp = (struct ibv_alloc_pd_resp *)p_umv_buf->p_inout_buf; + + if (IB_SUCCESS == ioctl_status) { + + /* allocate ibv_pd */ + ibv_pd = p_hobul->ibv_ctx->ops.alloc_pd(p_hobul->ibv_ctx, p_resp); + if (IS_ERR(ibv_pd)) { + err = PTR_ERR(ibv_pd); + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_SHIM , ("mthca_alloc_pd failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_alloc_pd; + } + + /* allocate pd */ + p_new_pd = (mlnx_ual_pd_info_t *)cl_zalloc( sizeof(mlnx_ual_pd_info_t) ); + if( !p_new_pd ) { + status = IB_INSUFFICIENT_MEMORY; + goto err_memory; + } + + /* return results */ + p_new_pd->ibv_pd = ibv_pd; + p_new_pd->p_hobul = p_hobul; + *ph_uvp_pd = (ib_pd_handle_t)p_new_pd; + } + goto end; + +err_memory: + p_hobul->ibv_ctx->ops.dealloc_pd(ibv_pd); +err_alloc_pd: +end: + if (p_resp) + cl_free( p_resp ); + UVP_EXIT(UVP_DBG_SHIM); + return; +} + + +static ib_api_status_t +__pre_deallocate_pd ( + IN const ib_pd_handle_t h_uvp_pd) +{ + mlnx_ual_pd_info_t *p_pd_info = (mlnx_ual_pd_info_t *)((void *)h_uvp_pd); + UVP_ENTER(UVP_DBG_SHIM); + CL_ASSERT(p_pd_info); + UVP_EXIT(UVP_DBG_SHIM); + return IB_SUCCESS; +} + + +static void +__post_deallocate_pd ( + IN const ib_pd_handle_t h_uvp_pd, + IN ib_api_status_t ioctl_status ) +{ + int err; + mlnx_ual_pd_info_t *p_pd_info = (mlnx_ual_pd_info_t *)((void *)h_uvp_pd); + + UVP_ENTER(UVP_DBG_SHIM); + + CL_ASSERT(p_pd_info || p_pd_info->ibv_pd); + + if (IB_SUCCESS == ioctl_status) { + err = p_pd_info->p_hobul->ibv_ctx->ops.dealloc_pd( p_pd_info->ibv_pd ); + if (err) + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_SHIM , ("mthca_alloc_pd failed (%d)\n", err)); + + cl_free (p_pd_info); + } + UVP_EXIT(UVP_DBG_SHIM); +} + +void +mlnx_get_pd_interface ( + IN OUT uvp_interface_t *p_uvp ) +{ + UVP_ENTER(UVP_DBG_SHIM); + + CL_ASSERT(p_uvp); + + /* + * Protection Domain + */ + p_uvp->pre_allocate_pd = __pre_allocate_pd; + p_uvp->post_allocate_pd = __post_allocate_pd; + p_uvp->pre_deallocate_pd = __pre_deallocate_pd; + p_uvp->post_deallocate_pd = __post_deallocate_pd; + + UVP_EXIT(UVP_DBG_SHIM); +} + + diff --git a/branches/IBFD/hw/mthca/user/mlnx_ual_qp.c b/branches/IBFD/hw/mthca/user/mlnx_ual_qp.c new file mode 100644 index 00000000..91834e81 --- /dev/null +++ b/branches/IBFD/hw/mthca/user/mlnx_ual_qp.c @@ -0,0 +1,372 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include "mt_l2w.h" +#include "mlnx_uvp.h" +#include "mx_abi.h" + +#include "mlnx_ual_main.h" +#if defined(EVENT_TRACING) +#include "mlnx_ual_qp.tmh" +#endif + +static void +__nd_modify_qp( + IN const ib_qp_handle_t h_uvp_qp, + OUT void** pp_outbuf, + OUT DWORD* p_size + ) +{ + struct ibv_qp *ibv_qp = (struct ibv_qp *)h_uvp_qp; + UVP_ENTER(UVP_DBG_QP); + *(uint32_t**)pp_outbuf = (uint32_t*)&ibv_qp->state; + *p_size = sizeof(ibv_qp->state); + UVP_EXIT(UVP_DBG_QP); +} + +static ib_qp_state_t __qp_state_to_ibal(enum ibv_qp_state state) +{ + switch ( state ) { + case IBV_QPS_RESET: return IB_QPS_RESET; + case IBV_QPS_INIT: return IB_QPS_INIT; + case IBV_QPS_RTR: return IB_QPS_RTR; + case IBV_QPS_RTS: return IB_QPS_RTS; + case IBV_QPS_SQD: return IB_QPS_SQD; + case IBV_QPS_SQE: return IB_QPS_SQERR; + case IBV_QPS_ERR: return IB_QPS_ERROR; + default: return IB_QPS_TIME_WAIT; + }; +} + +static uint32_t +__nd_get_qp_state( + IN const ib_qp_handle_t h_uvp_qp + ) +{ + struct ibv_qp *ibv_qp = (struct ibv_qp *)h_uvp_qp; + UVP_ENTER(UVP_DBG_QP); + return __qp_state_to_ibal( ibv_qp->state ); + UVP_EXIT(UVP_DBG_QP); +} + +static ib_api_status_t +__pre_create_qp ( + IN const ib_pd_handle_t h_uvp_pd, + IN const ib_qp_create_t *p_create_attr, + IN OUT ci_umv_buf_t *p_umv_buf, + OUT ib_qp_handle_t *ph_uvp_qp) +{ + int err; + struct ibv_qp *ibv_qp; + struct ibv_qp_init_attr attr; + struct ibv_create_qp *p_create_qp; + ib_api_status_t status = IB_SUCCESS; + size_t size = max( sizeof(struct ibv_create_qp), sizeof(struct ibv_create_qp_resp) ); + struct ibv_pd *ibv_pd = h_uvp_pd->ibv_pd; + + UNREFERENCED_PARAMETER(ph_uvp_qp); + + UVP_ENTER(UVP_DBG_QP); + + CL_ASSERT(p_umv_buf); + + if( !p_umv_buf->p_inout_buf ) + { + p_umv_buf->p_inout_buf = cl_zalloc( size ); + if( !p_umv_buf->p_inout_buf ) + { + status = IB_INSUFFICIENT_MEMORY; + goto err_memory; + } + } + p_umv_buf->input_size = sizeof(struct ibv_create_qp); + p_umv_buf->output_size = sizeof(struct ibv_create_qp_resp); + p_umv_buf->command = TRUE; + + /* convert attributes */ + attr.send_cq = (struct ibv_cq *)p_create_attr->h_sq_cq; + attr.recv_cq = (struct ibv_cq *)p_create_attr->h_rq_cq; + attr.srq = (struct ibv_srq*)p_create_attr->h_srq; + attr.cap.max_send_wr = p_create_attr->sq_depth; + attr.cap.max_recv_wr = p_create_attr->rq_depth; + attr.cap.max_send_sge = p_create_attr->sq_sge; + attr.cap.max_recv_sge = p_create_attr->rq_sge; + attr.cap.max_inline_data = 0; /* absent in IBAL */ + attr.qp_type = p_create_attr->qp_type; + attr.sq_sig_all = p_create_attr->sq_signaled; + + /* allocate ibv_qp */ + p_create_qp = (struct ibv_create_qp *)p_umv_buf->p_inout_buf; + ibv_qp = ibv_pd->context->ops.create_qp_pre(ibv_pd, &attr, p_create_qp); + if (IS_ERR(ibv_qp)) { + err = PTR_ERR(ibv_qp); + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("mthca_create_qp_pre failed (%d)\n", err)); + if(err == -ENOMEM && (attr.cap.max_send_sge == 0 ||attr.cap.max_recv_sge == 0|| + attr.cap.max_send_wr == 0 || attr.cap.max_recv_wr == 0)) + status = IB_INVALID_SETTING; + else + status = errno_to_iberr(err); + goto err_alloc_qp; + } + + goto end; + +err_alloc_qp: + cl_free(p_umv_buf->p_inout_buf); +err_memory: +end: + UVP_EXIT(UVP_DBG_QP); + return status; +} + +static ib_api_status_t +__post_create_qp ( + IN const ib_pd_handle_t h_uvp_pd, + IN ib_api_status_t ioctl_status, + IN OUT ib_qp_handle_t *ph_uvp_qp, + IN ci_umv_buf_t *p_umv_buf ) +{ + int err; + struct ibv_qp *ibv_qp; + struct ibv_create_qp_resp *p_resp; + struct ibv_create_qp *p_create_qp; + ib_api_status_t status = IB_SUCCESS; + struct ibv_pd *ibv_pd = h_uvp_pd->ibv_pd; + + UVP_ENTER(UVP_DBG_QP); + + + CL_ASSERT(p_umv_buf); + p_resp = (struct ibv_create_qp_resp *)p_umv_buf->p_inout_buf; + + if (IB_SUCCESS == ioctl_status) { + + /* allocate ibv_qp */ + ibv_qp = ibv_pd->context->ops.create_qp_post(ibv_pd, p_resp); + if (IS_ERR(ibv_qp)) { + err = PTR_ERR(ibv_qp); + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP , ("mthca_create_qp_post failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_create_cq; + } + + *ph_uvp_qp = (ib_qp_handle_t)ibv_qp; + } + goto end; + + ibv_pd->context->ops.destroy_qp(ibv_qp); +err_create_cq: +end: + if (p_resp) + cl_free( p_resp ); + UVP_EXIT(UVP_DBG_QP); + return status; +} + +static ib_api_status_t +__pre_modify_qp ( + IN const ib_qp_handle_t h_uvp_qp, + IN const ib_qp_mod_t *p_modify_attr, + IN OUT ci_umv_buf_t *p_umv_buf) +{ + ib_api_status_t status = IB_SUCCESS; + UNREFERENCED_PARAMETER(h_uvp_qp); + UNREFERENCED_PARAMETER(p_modify_attr); + + UVP_ENTER(UVP_DBG_SHIM); + + CL_ASSERT(p_umv_buf); + + if( !p_umv_buf->p_inout_buf ) + { + p_umv_buf->p_inout_buf = cl_zalloc( sizeof(struct ibv_modify_qp_resp) ); + if( !p_umv_buf->p_inout_buf ) + { + status = IB_INSUFFICIENT_MEMORY; + goto err_memory; + } + } + p_umv_buf->input_size = 0; + p_umv_buf->output_size = sizeof(struct ibv_modify_qp_resp); + p_umv_buf->command = TRUE; + +err_memory: + UVP_EXIT(UVP_DBG_SHIM); + return status; +} + + +static void +__post_modify_qp ( + IN const ib_qp_handle_t h_uvp_qp, + IN ib_api_status_t ioctl_status, + IN OUT ci_umv_buf_t *p_umv_buf) +{ + int err; + ib_api_status_t status; + struct ibv_modify_qp_resp *p_resp; + struct ibv_qp_attr attr; + struct ibv_qp *ibv_qp = (struct ibv_qp *)h_uvp_qp; + + UVP_ENTER(UVP_DBG_SHIM); + CL_ASSERT(p_umv_buf); + + p_resp = (struct ibv_modify_qp_resp *)p_umv_buf->p_inout_buf; + + if (IB_SUCCESS == ioctl_status) + { + memset( &attr, 0, sizeof(attr)); + attr.qp_state = p_resp->qp_state; + if (ibv_qp) { + err = ibv_qp->context->ops.modify_qp( ibv_qp, + &attr, p_resp->attr_mask); + if (err) { + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_SHIM , ("mthca_modify_qp failed (%d)\n", err)); + status = errno_to_iberr(err); + goto err_modify_qp; + } + } + UVP_PRINT(TRACE_LEVEL_INFORMATION ,UVP_DBG_SHIM , + ("Committed to modify QP to state %d\n", p_resp->qp_state)); + } + + +err_modify_qp: + if (p_resp) + cl_free (p_resp); + UVP_EXIT(UVP_DBG_SHIM); + return; + } + + +static ib_api_status_t +__pre_query_qp ( + IN ib_qp_handle_t h_uvp_qp, + IN OUT ci_umv_buf_t *p_umv_buf) +{ + UNREFERENCED_PARAMETER(h_uvp_qp); + UVP_ENTER(UVP_DBG_SHIM); + p_umv_buf->input_size = p_umv_buf->output_size = 0; + p_umv_buf->command = FALSE; + p_umv_buf->status = IB_SUCCESS; + UVP_EXIT(UVP_DBG_SHIM); + return IB_SUCCESS; +} + + +static void +__post_query_qp ( + IN ib_qp_handle_t h_uvp_qp, + IN ib_api_status_t ioctl_status, + IN OUT ib_qp_attr_t *p_query_attr, + IN OUT ci_umv_buf_t *p_umv_buf) +{ + struct mthca_qp *p_mthca_qp = (struct mthca_qp *)h_uvp_qp; + UVP_ENTER(UVP_DBG_SHIM); + + UNREFERENCED_PARAMETER(p_umv_buf); + if(IB_SUCCESS == ioctl_status) + { + p_query_attr->sq_max_inline = p_mthca_qp->max_inline_data; + p_query_attr->sq_sge = p_mthca_qp->sq.max_gs; + p_query_attr->sq_depth = p_mthca_qp->sq.max; + p_query_attr->rq_sge = p_mthca_qp->rq.max_gs; + p_query_attr->rq_depth = p_mthca_qp->rq.max; + } + UVP_EXIT(UVP_DBG_SHIM); +} + + +static ib_api_status_t +__pre_destroy_qp ( + IN const ib_qp_handle_t h_uvp_qp) +{ + int err; + + + UVP_ENTER(UVP_DBG_SHIM); + + mthca_destroy_qp_pre((struct ibv_qp*)h_uvp_qp); + + UVP_EXIT(UVP_DBG_SHIM); + return IB_SUCCESS; +} + +static void +__post_destroy_qp ( + IN const ib_qp_handle_t h_uvp_qp, + IN ib_api_status_t ioctl_status) +{ + int err; + + UVP_ENTER(UVP_DBG_SHIM); + + CL_ASSERT(h_uvp_qp); + + mthca_destroy_qp_post((struct ibv_qp*)h_uvp_qp, (int)ioctl_status); + if (ioctl_status != IB_SUCCESS) + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_SHIM , ("mthca_destroy_qp_post failed (%d)\n", ioctl_status)); + + UVP_EXIT(UVP_DBG_SHIM); + return; +} + + +void +mlnx_get_qp_interface ( + IN OUT uvp_interface_t *p_uvp ) +{ + UVP_ENTER(UVP_DBG_SHIM); + + CL_ASSERT(p_uvp); + + /* + * QP Management Verbs + */ + p_uvp->pre_create_qp = __pre_create_qp; + p_uvp->post_create_qp = __post_create_qp; + + // !!! none for create_spl_qp, UAL will return error !!! + + p_uvp->pre_modify_qp = __pre_modify_qp; + p_uvp->post_modify_qp = __post_modify_qp; + p_uvp->pre_query_qp = NULL; + p_uvp->post_query_qp = __post_query_qp; + p_uvp->pre_destroy_qp = __pre_destroy_qp; + p_uvp->post_destroy_qp = __post_destroy_qp; + p_uvp->nd_modify_qp = __nd_modify_qp; + p_uvp->nd_get_qp_state = __nd_get_qp_state; + + UVP_EXIT(UVP_DBG_SHIM); +} + + diff --git a/branches/IBFD/hw/mthca/user/mlnx_ual_srq.c b/branches/IBFD/hw/mthca/user/mlnx_ual_srq.c new file mode 100644 index 00000000..7ececbd8 --- /dev/null +++ b/branches/IBFD/hw/mthca/user/mlnx_ual_srq.c @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * Copyright (c) 2004-2005 Mellanox Technologies, Inc. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mlnx_ual_srq.c 1611 2006-08-20 14:48:55Z leonid $ + */ + +#include "mt_l2w.h" +#include "mlnx_ual_main.h" +#include "mlnx_uvp.h" +#include "mx_abi.h" + +#if defined(EVENT_TRACING) +#include "mlnx_ual_srq.tmh" +#endif + + +extern uint32_t mlnx_dbg_lvl; + +static void __free_srq(struct mthca_srq *srq) +{ + /* srq may be NULL, when ioctl returned with some kind of error, e.g. IB_INVALID_PARAM */ + if (!srq) + return; + + if (mthca_is_memfree(srq->ibv_srq.context)) { + mthca_free_db(to_mctx(srq->ibv_srq.context)->db_tab, MTHCA_DB_TYPE_SRQ, + srq->db_index); + } + + if (srq->buf) { +#ifdef NOT_USE_VIRTUAL_ALLOC + cl_free(srq->buf); +#else + VirtualFree( srq->buf, 0, MEM_RELEASE); +#endif + } + + if (srq->wrid) + cl_free(srq->wrid); + + cl_spinlock_destroy(&srq->lock); + cl_free (srq); +} + +static ib_api_status_t +__pre_create_srq ( + IN const ib_pd_handle_t h_uvp_pd,// Fix me: if needed + IN const ib_srq_attr_t *p_srq_attr, + IN OUT ci_umv_buf_t *p_umv_buf, + OUT ib_srq_handle_t *ph_uvp_srq) +{ + struct mthca_srq *srq; + ib_api_status_t status = IB_SUCCESS; + size_t size = max( sizeof(struct ibv_create_srq), sizeof(struct ibv_create_srq_resp) ); + mlnx_ual_pd_info_t *p_pd = (mlnx_ual_pd_info_t *)h_uvp_pd; + struct ibv_pd *ibv_pd = p_pd->ibv_pd; + struct ibv_create_srq *p_create_srq; + int err; + + UNREFERENCED_PARAMETER(ph_uvp_srq); + + UVP_ENTER(UVP_DBG_SRQ); + + CL_ASSERT(p_umv_buf); + + /* Sanity check SRQ size before proceeding */ + if (p_srq_attr->max_wr > 1 << 16 || p_srq_attr->max_sge > 64) + { + status = IB_INVALID_PARAMETER; + goto err_params; + } + + if( !p_umv_buf->p_inout_buf ) + { + p_umv_buf->p_inout_buf = cl_zalloc( size ); + if( !p_umv_buf->p_inout_buf ) + { + status = IB_INSUFFICIENT_MEMORY; + goto err_memory; + } + } + p_umv_buf->input_size = sizeof(struct ibv_create_srq); + p_umv_buf->output_size = sizeof(struct ibv_create_srq_resp); + p_umv_buf->command = TRUE; + + /* allocate srq */ + srq = cl_zalloc(sizeof *srq); + if (!srq) + { + status = IB_INSUFFICIENT_MEMORY; + goto err_alloc_srq; + } + + /* init fields */ + cl_spinlock_construct(&srq->lock); + if (cl_spinlock_init(&srq->lock)) + goto err_lock; + + srq->ibv_srq.pd = ibv_pd; + srq->ibv_srq.context = ibv_pd->context; + srq->max = align_queue_size(ibv_pd->context, p_srq_attr->max_wr, 1); + srq->max_gs = p_srq_attr->max_sge; + srq->counter = 0; + + if (mthca_alloc_srq_buf(ibv_pd, (void*)p_srq_attr, srq)) + { + status = IB_INSUFFICIENT_MEMORY; + goto err_alloc_buf; + } + + // fill the parameters for ioctl + p_create_srq = (struct ibv_create_srq *)p_umv_buf->p_inout_buf; + p_create_srq->user_handle = (uint64_t)(ULONG_PTR)srq; + p_create_srq->mr.start = (uint64_t)(ULONG_PTR)srq->buf; + p_create_srq->mr.length = srq->buf_size; + p_create_srq->mr.hca_va = 0; + p_create_srq->mr.pd_handle = p_pd->ibv_pd->handle; + p_create_srq->mr.pdn = to_mpd(p_pd->ibv_pd)->pdn; + p_create_srq->mr.access_flags = 0; //local read + + if (mthca_is_memfree(ibv_pd->context)) { + srq->db_index = mthca_alloc_db(to_mctx(ibv_pd->context)->db_tab, + MTHCA_DB_TYPE_SRQ, &srq->db); + if (srq->db_index < 0) + goto err_alloc_db; + + p_create_srq->db_page = db_align(srq->db); + p_create_srq->db_index = srq->db_index; + } + + status = IB_SUCCESS; + goto end; + +err_alloc_db: +#ifdef NOT_USE_VIRTUAL_ALLOC + cl_free(srq->buf); +#else + VirtualFree( srq->buf, 0, MEM_RELEASE); +#endif + cl_free(srq->wrid); +err_alloc_buf: + cl_spinlock_destroy(&srq->lock); +err_lock: + cl_free(srq); +err_alloc_srq: + cl_free(p_umv_buf->p_inout_buf); +err_memory: +err_params: +end: + UVP_EXIT(UVP_DBG_SRQ); + return status; +} + + +static void +__post_create_srq ( + IN const ib_pd_handle_t h_uvp_pd, + IN ib_api_status_t ioctl_status, + IN OUT ib_srq_handle_t *ph_uvp_srq, + IN ci_umv_buf_t *p_umv_buf ) +{ + int err; + struct mthca_srq *srq; + struct ibv_create_srq_resp *p_resp; + mlnx_ual_pd_info_t *p_pd = (mlnx_ual_pd_info_t *)h_uvp_pd; + struct ibv_pd *ibv_pd = p_pd->ibv_pd; + ib_api_status_t status = IB_SUCCESS; + + UVP_ENTER(UVP_DBG_SRQ); + + CL_ASSERT(p_umv_buf); + p_resp = (struct ibv_create_srq_resp *)p_umv_buf->p_inout_buf; + srq = (struct mthca_srq *)(ULONG_PTR)p_resp->user_handle; + + if (IB_SUCCESS == ioctl_status) { + + /* complete filling SRQ object */ + srq->ibv_srq.handle = p_resp->srq_handle; + srq->srqn = p_resp->srqn; + srq->max = p_resp->max_wr; + srq->max_gs = p_resp->max_sge; + srq->mr.handle = p_resp->mr.mr_handle; + srq->mr.lkey = p_resp->mr.lkey; + srq->mr.rkey = p_resp->mr.rkey; + srq->mr.pd = ibv_pd; + srq->mr.context = ibv_pd->context; + + if (mthca_is_memfree(ibv_pd->context)) + mthca_set_db_qn(srq->db, MTHCA_DB_TYPE_SRQ, srq->srqn); + + *ph_uvp_srq = (ib_srq_handle_t)srq; + } + else + __free_srq(srq); + + if (p_resp) + cl_free( p_resp ); + UVP_EXIT(UVP_DBG_SRQ); + return; +} + +static void +__post_destroy_srq ( + IN const ib_srq_handle_t h_uvp_srq, + IN ib_api_status_t ioctl_status) +{ + int err; + struct mthca_srq *srq = (struct mthca_srq *) ((void*)h_uvp_srq); + + UVP_ENTER(UVP_DBG_CQ); + + CL_ASSERT(srq); + + if (IB_SUCCESS == ioctl_status) + __free_srq(srq); + + UVP_EXIT(UVP_DBG_CQ); +} + +void +mlnx_get_srq_interface ( + IN OUT uvp_interface_t *p_uvp ) +{ + UVP_ENTER(UVP_DBG_DEV); + + CL_ASSERT(p_uvp); + + /* + * Completion Queue Management Verbs + */ + p_uvp->pre_create_srq = __pre_create_srq; + p_uvp->post_create_srq = __post_create_srq; + + p_uvp->pre_query_srq = NULL; /* __pre_query_srq; */ + p_uvp->post_query_srq = NULL; /*__post_query_srq;*/ + + p_uvp->pre_modify_srq = NULL; /* __modify_srq;*/ + p_uvp->post_modify_srq = NULL; /*__post_modify_srq;*/ + + p_uvp->pre_destroy_srq = NULL; /* __pre_destroy_srq; */ + p_uvp->post_destroy_srq = __post_destroy_srq; + + UVP_EXIT(UVP_DBG_DEV); +} + + diff --git a/branches/IBFD/hw/mthca/user/mlnx_uvp.c b/branches/IBFD/hw/mthca/user/mlnx_uvp.c new file mode 100644 index 00000000..2e36bb62 --- /dev/null +++ b/branches/IBFD/hw/mthca/user/mlnx_uvp.c @@ -0,0 +1,238 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include "mt_l2w.h" +#include "mlnx_uvp.h" + +#if defined(EVENT_TRACING) +#include "mlnx_uvp.tmh" +#endif + +#include "mx_abi.h" + +size_t g_page_size = 0; + +#ifndef PCI_VENDOR_ID_MELLANOX +#define PCI_VENDOR_ID_MELLANOX 0x15b3 +#endif + +#ifndef PCI_DEVICE_ID_MELLANOX_TAVOR +#define PCI_DEVICE_ID_MELLANOX_TAVOR 0x5a44 +#endif + +#ifndef PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT +#define PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT 0x6278 +#endif + +#ifndef PCI_DEVICE_ID_MELLANOX_ARBEL +#define PCI_DEVICE_ID_MELLANOX_ARBEL 0x6282 +#endif + +#ifndef PCI_DEVICE_ID_MELLANOX_SINAI_OLD +#define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c +#endif + +#ifndef PCI_DEVICE_ID_MELLANOX_SINAI +#define PCI_DEVICE_ID_MELLANOX_SINAI 0x6274 +#endif + +#ifndef PCI_VENDOR_ID_TOPSPIN +#define PCI_VENDOR_ID_TOPSPIN 0x1867 +#endif + +/* live fishes */ +#ifndef PCI_DEVICE_ID_MELLANOX_TAVOR_BD +#define PCI_DEVICE_ID_MELLANOX_TAVOR_BD 0x5a45 +#endif + +#ifndef PCI_DEVICE_ID_MELLANOX_ARBEL_BD +#define PCI_DEVICE_ID_MELLANOX_ARBEL_BD 0x6279 +#endif + +#ifndef PCI_DEVICE_ID_MELLANOX_SINAI_OLD_BD +#define PCI_DEVICE_ID_MELLANOX_SINAI_OLD_BD 0x5e8d +#endif + +#ifndef PCI_DEVICE_ID_MELLANOX_SINAI_BD +#define PCI_DEVICE_ID_MELLANOX_SINAI_BD 0x6275 +#endif + + +#define HCA(v, d, t) \ + { PCI_VENDOR_ID_##v, PCI_DEVICE_ID_MELLANOX_##d, MTHCA_##t } + +static struct pci_device_id { + unsigned vendor; + unsigned device; + enum mthca_hca_type type; +} mthca_pci_table[] = { + HCA( MELLANOX, TAVOR, TAVOR), + HCA( MELLANOX, ARBEL_COMPAT, TAVOR), + HCA( MELLANOX, ARBEL, ARBEL), + HCA( MELLANOX, SINAI_OLD, ARBEL), + HCA( MELLANOX, SINAI, ARBEL), + HCA( TOPSPIN, TAVOR, TAVOR), + HCA( TOPSPIN, ARBEL_COMPAT, TAVOR), + HCA( TOPSPIN, ARBEL, ARBEL), + HCA( TOPSPIN, SINAI_OLD, ARBEL), + HCA( TOPSPIN, SINAI, ARBEL), + // live fishes + HCA(MELLANOX, TAVOR_BD, LIVEFISH), + HCA(MELLANOX, ARBEL_BD, LIVEFISH), + HCA(MELLANOX, SINAI_OLD_BD, LIVEFISH), + HCA(MELLANOX, SINAI_BD, LIVEFISH), + HCA(TOPSPIN, TAVOR_BD, LIVEFISH), + HCA(TOPSPIN, ARBEL_BD, LIVEFISH), + HCA(TOPSPIN, SINAI_OLD_BD, LIVEFISH), + HCA(TOPSPIN, SINAI_BD, LIVEFISH), +}; + +static struct ibv_context_ops mthca_ctx_ops = { + NULL, // mthca_query_device, + NULL, // mthca_query_port, + mthca_alloc_pd, + mthca_free_pd, + NULL, // mthca_reg_mr, + NULL, // mthca_dereg_mr, + mthca_create_cq_pre, + mthca_create_cq_post, + mthca_poll_cq, + mthca_poll_cq_list, + NULL, /* req_notify_cq */ + mthca_destroy_cq, + NULL, // mthca_create_srq, + NULL, // mthca_modify_srq, + NULL, // mthca_destroy_srq, + NULL, /* post_srq_recv */ + mthca_create_qp_pre, + mthca_create_qp_post, + mthca_modify_qp, + NULL, + NULL, /* post_send */ + NULL, /* post_recv */ + mthca_attach_mcast, + mthca_detach_mcast +}; + +struct ibv_context *mthca_alloc_context(struct ibv_get_context_resp *resp_p) +{ + struct mthca_context * context; + struct ibv_alloc_pd_resp pd_resp; + int i; + + /* allocate context */ + context = cl_zalloc(sizeof *context); + if (!context) + return NULL; + + /* find page size */ + if (!g_page_size) { + SYSTEM_INFO sys_info; + GetSystemInfo(&sys_info); + g_page_size = sys_info.dwPageSize; + } + + /* calculate device type */ + for (i = 0; i < sizeof mthca_pci_table / sizeof mthca_pci_table[0]; ++i) + if (resp_p->vend_id == mthca_pci_table[i].vendor && + resp_p->dev_id == mthca_pci_table[i].device) + goto found; + goto err_dev_type; + +found: + context->hca_type = mthca_pci_table[i].type; + context->uar = (void*)(UINT_PTR)resp_p->uar_addr; + context->num_qps = resp_p->qp_tab_size; + context->qp_table_shift = ffs(context->num_qps) - 1 - MTHCA_QP_TABLE_BITS; + context->qp_table_mask = (1 << context->qp_table_shift) - 1; + + if (mthca_is_memfree(&context->ibv_ctx)) { + context->db_tab = mthca_alloc_db_tab(resp_p->uarc_size); + if (!context->db_tab) + goto err_alloc_db_tab; + } else + context->db_tab = NULL; + + context->qp_table_mutex = CreateMutex( NULL, FALSE, NULL ); + if (!context->qp_table_mutex) + goto err_mutex; + for (i = 0; i < MTHCA_QP_TABLE_SIZE; ++i) + context->qp_table[i].refcnt = 0; + + cl_spinlock_construct(&context->uar_lock); + if (cl_spinlock_init(&context->uar_lock)) + goto err_spinlock; + + pd_resp.pd_handle = resp_p->pd_handle; + pd_resp.pdn = resp_p->pdn; + context->pd = mthca_alloc_pd(&context->ibv_ctx, &pd_resp); + if (!context->pd) + goto err_unmap; + + context->ibv_ctx.ops = mthca_ctx_ops; + + if (mthca_is_memfree(&context->ibv_ctx)) { + context->ibv_ctx.ops.req_notify_cq = mthca_arbel_arm_cq; + context->ibv_ctx.ops.post_send = mthca_arbel_post_send; + context->ibv_ctx.ops.post_recv = mthca_arbel_post_recv; + context->ibv_ctx.ops.post_srq_recv = mthca_arbel_post_srq_recv; + } else { + context->ibv_ctx.ops.req_notify_cq = mthca_tavor_arm_cq; + context->ibv_ctx.ops.post_send = mthca_tavor_post_send; + context->ibv_ctx.ops.post_recv = mthca_tavor_post_recv; + context->ibv_ctx.ops.post_srq_recv = mthca_tavor_post_srq_recv; + } + + return &context->ibv_ctx; + +err_unmap: +err_spinlock: +err_mutex: + mthca_free_db_tab(context->db_tab); + +err_alloc_db_tab: +err_dev_type: + cl_free(context); + return NULL; +} + +void mthca_free_context(struct ibv_context *ibctx) +{ + struct mthca_context *context = to_mctx(ibctx); + + cl_spinlock_destroy(&context->uar_lock); + mthca_free_pd(context->pd); + mthca_free_db_tab(context->db_tab); + cl_free(context); +} diff --git a/branches/IBFD/hw/mthca/user/mlnx_uvp.def b/branches/IBFD/hw/mthca/user/mlnx_uvp.def new file mode 100644 index 00000000..55f97537 --- /dev/null +++ b/branches/IBFD/hw/mthca/user/mlnx_uvp.def @@ -0,0 +1,10 @@ +#if DBG +LIBRARY mthcaud.dll +#else +LIBRARY mthcau.dll +#endif + +#ifndef _WIN64 +EXPORTS +uvp_get_interface +#endif diff --git a/branches/IBFD/hw/mthca/user/mlnx_uvp.h b/branches/IBFD/hw/mthca/user/mlnx_uvp.h new file mode 100644 index 00000000..b5d4702f --- /dev/null +++ b/branches/IBFD/hw/mthca/user/mlnx_uvp.h @@ -0,0 +1,337 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#ifndef MTHCA_H +#define MTHCA_H + +#include +#include +#include +#include "mlnx_uvp_debug.h" + +#define PFX "mthca: " + +enum mthca_hca_type { + MTHCA_TAVOR, + MTHCA_ARBEL, + MTHCA_LIVEFISH +}; + +enum { + MTHCA_CQ_ENTRY_SIZE = 0x20, + MTHCA_BYTES_PER_ATOMIC_COMPL = 0x8 +}; + +enum { + MTHCA_QP_TABLE_BITS = 8, + MTHCA_QP_TABLE_SIZE = 1 << MTHCA_QP_TABLE_BITS, + MTHCA_QP_TABLE_MASK = MTHCA_QP_TABLE_SIZE - 1 +}; + +enum { + MTHCA_DB_REC_PAGE_SIZE = 4096, + MTHCA_DB_REC_PER_PAGE = MTHCA_DB_REC_PAGE_SIZE / 8 +}; + +enum mthca_db_type { + MTHCA_DB_TYPE_INVALID = 0x0, + MTHCA_DB_TYPE_CQ_SET_CI = 0x1, + MTHCA_DB_TYPE_CQ_ARM = 0x2, + MTHCA_DB_TYPE_SQ = 0x3, + MTHCA_DB_TYPE_RQ = 0x4, + MTHCA_DB_TYPE_SRQ = 0x5, + MTHCA_DB_TYPE_GROUP_SEP = 0x7 +}; + +enum mthca_wr_opcode { + MTHCA_OPCODE_NOP = 0x00, + MTHCA_OPCODE_RDMA_WRITE = 0x08, + MTHCA_OPCODE_RDMA_WRITE_IMM = 0x09, + MTHCA_OPCODE_SEND = 0x0a, + MTHCA_OPCODE_SEND_IMM = 0x0b, + MTHCA_OPCODE_RDMA_READ = 0x10, + MTHCA_OPCODE_ATOMIC_CS = 0x11, + MTHCA_OPCODE_ATOMIC_FA = 0x12, + MTHCA_OPCODE_BIND_MW = 0x18, + MTHCA_OPCODE_INVALID = 0xff +}; + +struct mthca_ah_page; + +struct mthca_db_table; + +struct mthca_context { + struct ibv_context ibv_ctx; + void *uar; + cl_spinlock_t uar_lock; + struct mthca_db_table *db_tab; + struct ibv_pd *pd; + struct { + struct mthca_qp **table; + int refcnt; + } qp_table[MTHCA_QP_TABLE_SIZE]; + HANDLE qp_table_mutex; + int num_qps; + int qp_table_shift; + int qp_table_mask; + enum mthca_hca_type hca_type; +}; + +struct mthca_pd { + struct ibv_pd ibv_pd; + struct mthca_ah_page *ah_list; + HANDLE ah_mutex; + uint32_t pdn; +}; + +struct mthca_cq { + struct ibv_cq ibv_cq; + void *buf; + cl_spinlock_t lock; + struct ibv_mr mr; + uint32_t cqn; + uint32_t cons_index; + + /* Next fields are mem-free only */ + int set_ci_db_index; + uint32_t *set_ci_db; + int arm_db_index; + uint32_t *arm_db; + int u_arm_db_index; + uint32_t *p_u_arm_sn; +}; + +struct mthca_srq { + struct ibv_srq ibv_srq; + void *buf; + void *last; + cl_spinlock_t lock; + struct ibv_mr mr; + uint64_t *wrid; + uint32_t srqn; + int max; + int max_gs; + int wqe_shift; + int first_free; + int last_free; + int buf_size; + + /* Next fields are mem-free only */ + int db_index; + uint32_t *db; + uint16_t counter; +}; + +struct mthca_wq { + cl_spinlock_t lock; + int max; + unsigned next_ind; + unsigned last_comp; + unsigned head; + unsigned tail; + void *last; + int max_gs; + int wqe_shift; + + /* Next fields are mem-free only */ + int db_index; + uint32_t *db; +}; + +struct mthca_qp { + struct ibv_qp ibv_qp; + uint8_t *buf; + uint64_t *wrid; + int send_wqe_offset; + int max_inline_data; + int buf_size; + struct mthca_wq sq; + struct mthca_wq rq; + struct ibv_mr mr; + int sq_sig_all; +}; + +struct mthca_av { + uint32_t port_pd; + uint8_t reserved1; + uint8_t g_slid; + uint16_t dlid; + uint8_t reserved2; + uint8_t gid_index; + uint8_t msg_sr; + uint8_t hop_limit; + uint32_t sl_tclass_flowlabel; + uint32_t dgid[4]; +}; + +struct mthca_ah { + struct mthca_av *av; + ib_av_attr_t av_attr; + ib_pd_handle_t h_uvp_pd; + struct mthca_ah_page *page; + uint32_t key; + int in_kernel; +}; + +#pragma warning( disable : 4200) +struct mthca_ah_page { + struct mthca_ah_page *prev, *next; + void *buf; + struct ibv_mr mr; + int use_cnt; + unsigned free[0]; +}; +#pragma warning( default : 4200) + + +static inline uintptr_t db_align(uint32_t *db) +{ + return (uintptr_t) db & ~((uintptr_t) MTHCA_DB_REC_PAGE_SIZE - 1); +} + +#define to_mxxx(xxx, type) \ + ((struct mthca_##type *) \ + ((uint8_t *) ib##xxx - offsetof(struct mthca_##type, ibv_##xxx))) + +static inline struct mthca_context *to_mctx(struct ibv_context *ibctx) +{ + return to_mxxx(ctx, context); +} + +static inline struct mthca_pd *to_mpd(struct ibv_pd *ibpd) +{ + return to_mxxx(pd, pd); +} + +static inline struct mthca_cq *to_mcq(struct ibv_cq *ibcq) +{ + return to_mxxx(cq, cq); +} + +static inline struct mthca_srq *to_msrq(struct ibv_srq *ibsrq) +{ + return to_mxxx(srq, srq); +} + +static inline struct mthca_qp *to_mqp(struct ibv_qp *ibqp) +{ + return to_mxxx(qp, qp); +} + +static inline int mthca_is_memfree(struct ibv_context *ibctx) +{ + return to_mctx(ibctx)->hca_type == MTHCA_ARBEL; +} + +int mthca_alloc_db(struct mthca_db_table *db_tab, enum mthca_db_type type, + uint32_t **db); +void mthca_set_db_qn(uint32_t *db, enum mthca_db_type type, uint32_t qn); +void mthca_free_db(struct mthca_db_table *db_tab, enum mthca_db_type type, int db_index); +struct mthca_db_table *mthca_alloc_db_tab(int uarc_size); +void mthca_free_db_tab(struct mthca_db_table *db_tab); + +int mthca_query_device(struct ibv_context *context, + struct ibv_device_attr *attr); +int mthca_query_port(struct ibv_context *context, uint8_t port, + struct ibv_port_attr *attr); + + struct ibv_pd *mthca_alloc_pd(struct ibv_context *context, + struct ibv_alloc_pd_resp *resp_p); + +int mthca_free_pd(struct ibv_pd *pd); + +struct ibv_cq *mthca_create_cq_pre(struct ibv_context *context, int *cqe, + struct ibv_create_cq *req); +struct ibv_cq *mthca_create_cq_post(struct ibv_context *context, + struct ibv_create_cq_resp *resp); +int mthca_destroy_cq(struct ibv_cq *cq); +int mthca_poll_cq(struct ibv_cq *cq, int ne, struct _ib_wc *wc); +int mthca_poll_cq_list(struct ibv_cq *ibcq, + struct _ib_wc** const pp_free_wclist, + struct _ib_wc** const pp_done_wclist ); +int mthca_tavor_arm_cq(struct ibv_cq *cq, int solicited); +int mthca_arbel_arm_cq(struct ibv_cq *cq, int solicited); +void mthca_cq_clean(struct mthca_cq *cq, uint32_t qpn, + struct mthca_srq *srq); +void mthca_init_cq_buf(struct mthca_cq *cq, int nent); + +struct ibv_srq *mthca_create_srq(struct ibv_pd *pd, + struct ibv_srq_init_attr *attr); +int mthca_modify_srq(struct ibv_srq *srq, + struct ibv_srq_attr *attr, + enum ibv_srq_attr_mask mask); +int mthca_destroy_srq(struct ibv_srq *srq); +int mthca_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr, + struct mthca_srq *srq); +void mthca_free_srq_wqe(struct mthca_srq *srq, int ind); +int mthca_tavor_post_srq_recv(struct ibv_srq *ibsrq, + struct _ib_recv_wr *wr, + struct _ib_recv_wr **bad_wr); +int mthca_arbel_post_srq_recv(struct ibv_srq *ibsrq, + struct _ib_recv_wr *wr, + struct _ib_recv_wr **bad_wr); +struct ibv_qp *mthca_create_qp_pre(struct ibv_pd *pd, + struct ibv_qp_init_attr *attr, struct ibv_create_qp *req); +struct ibv_qp *mthca_create_qp_post(struct ibv_pd *pd, + struct ibv_create_qp_resp *resp); +int mthca_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask); +void mthca_destroy_qp_pre(struct ibv_qp *qp); +void mthca_destroy_qp_post(struct ibv_qp *qp, int ret); +void mthca_init_qp_indices(struct mthca_qp *qp); +int mthca_tavor_post_send(struct ibv_qp *ibqp, struct _ib_send_wr *wr, + struct _ib_send_wr **bad_wr); +int mthca_tavor_post_recv(struct ibv_qp *ibqp, struct _ib_recv_wr *wr, + struct _ib_recv_wr **bad_wr); +int mthca_arbel_post_send(struct ibv_qp *ibqp, struct _ib_send_wr *wr, + struct _ib_send_wr **bad_wr); +int mthca_arbel_post_recv(struct ibv_qp *ibqp, struct _ib_recv_wr *wr, + struct _ib_recv_wr **bad_wr); +int mthca_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap, + ib_qp_type_t type, struct mthca_qp *qp); +struct mthca_qp *mthca_find_qp(struct mthca_context *ctx, uint32_t qpn); +int mthca_store_qp(struct mthca_context *ctx, uint32_t qpn, struct mthca_qp *qp); +void mthca_clear_qp(struct mthca_context *ctx, uint32_t qpn); +int mthca_free_err_wqe(struct mthca_qp *qp, int is_send, + int index, int *dbd, uint32_t *new_wqe); +int mthca_alloc_av(struct mthca_pd *pd, struct ibv_ah_attr *attr, + struct mthca_ah *ah, struct ibv_create_ah_resp *resp); +void mthca_free_av(struct mthca_ah *ah); +int mthca_attach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid); +int mthca_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid); +struct ibv_context *mthca_alloc_context(struct ibv_get_context_resp *resp_p); +void mthca_free_context(struct ibv_context *ibctx); + +#endif /* MTHCA_H */ diff --git a/branches/IBFD/hw/mthca/user/mlnx_uvp.rc b/branches/IBFD/hw/mthca/user/mlnx_uvp.rc new file mode 100644 index 00000000..f3d2e34a --- /dev/null +++ b/branches/IBFD/hw/mthca/user/mlnx_uvp.rc @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2005 SilverStorm Technologies. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + + +#include + +#define VER_FILETYPE VFT_DLL +#define VER_FILESUBTYPE VFT2_UNKNOWN + +#ifdef DBG +#define VER_FILEDESCRIPTION_STR "HCA User Mode Verb Provider (checked)" +#define VER_INTERNALNAME_STR "mthcaud.dll" +#define VER_ORIGINALFILENAME_STR "mthcaud.dll" +#else +#define VER_FILEDESCRIPTION_STR "HCA User Mode Verb Provider" +#define VER_INTERNALNAME_STR "mthcau.dll" +#define VER_ORIGINALFILENAME_STR "mthcau.dll" +#endif + +#include diff --git a/branches/IBFD/hw/mthca/user/mlnx_uvp_ah.c b/branches/IBFD/hw/mthca/user/mlnx_uvp_ah.c new file mode 100644 index 00000000..be1eb898 --- /dev/null +++ b/branches/IBFD/hw/mthca/user/mlnx_uvp_ah.c @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include "mt_l2w.h" +#include "mlnx_uvp.h" +#include "mlnx_ual_data.h" +#include "mx_abi.h" + +static struct mthca_ah_page *__add_page( + struct mthca_pd *pd, int page_size, int per_page) +{ + struct mthca_ah_page *page; + int i; + + page = cl_malloc(sizeof *page + per_page * sizeof (int)); + if (!page) + return NULL; + + if (posix_memalign(&page->buf, page_size, page_size)) { + cl_free(page); + return NULL; + } + + page->use_cnt = 0; + for (i = 0; i < per_page; ++i) + page->free[i] = ~0; + + page->prev = NULL; + page->next = pd->ah_list; + pd->ah_list = page; + if (page->next) + page->next->prev = page; + + return page; +} + +int mthca_alloc_av(struct mthca_pd *pd, struct ibv_ah_attr *attr, + struct mthca_ah *ah, struct ibv_create_ah_resp *resp) +{ + if (mthca_is_memfree(pd->ibv_pd.context)) { + ah->av = cl_malloc(sizeof *ah->av); + if (!ah->av) + return -ENOMEM; + } else { + struct mthca_ah_page *page; + int ps; + int pp; + int i, j; + + ps = g_page_size; + pp = ps / (sizeof *ah->av * 8 * sizeof (int)); + + WaitForSingleObject( pd->ah_mutex, INFINITE ); + for (page = pd->ah_list; page; page = page->next) + if (page->use_cnt < ps / (int)(sizeof *ah->av)) + for (i = 0; i < pp; ++i) + if (page->free[i]) + goto found; + + page = __add_page(pd, ps, pp); + if (!page) { + ReleaseMutex( pd->ah_mutex ); + return -ENOMEM; + } + ah->in_kernel = TRUE; + + found: + ++page->use_cnt; + + for (i = 0, j = -1; i < pp; ++i) + if (page->free[i]) { + j = ffs(page->free[i]); + page->free[i] &= ~(1 << (j - 1)); + ah->av = (struct mthca_av *)((uint8_t*)page->buf + + (i * 8 * sizeof (int) + (j - 1)) * sizeof *ah->av); + break; + } + + ah->page = page; + + ReleaseMutex( pd->ah_mutex ); + } + + memset(ah->av, 0, sizeof *ah->av); + + ah->av->port_pd = cl_hton32(pd->pdn | (attr->port_num << 24)); + ah->av->g_slid = attr->src_path_bits; + ah->av->dlid = cl_hton16(attr->dlid); + ah->av->msg_sr = (3 << 4) | /* 2K message */ + attr->static_rate; + ah->av->sl_tclass_flowlabel = cl_hton32(attr->sl << 28); + if (attr->is_global) { + ah->av->g_slid |= 0x80; + /* XXX get gid_table length */ + ah->av->gid_index = (attr->port_num - 1) * 32 + + attr->grh.sgid_index; + ah->av->hop_limit = attr->grh.hop_limit; + ah->av->sl_tclass_flowlabel |= + cl_hton32((attr->grh.traffic_class << 20) | + attr->grh.flow_label); + memcpy(ah->av->dgid, attr->grh.dgid.raw, 16); + } else { + /* Arbel workaround -- low byte of GID must be 2 */ + ah->av->dgid[3] = cl_hton32(2); + } + return 0; +} + +void mthca_free_av(struct mthca_ah *ah) +{ + mlnx_ual_pd_info_t *p_pd = (mlnx_ual_pd_info_t *)ah->h_uvp_pd; + if (mthca_is_memfree(p_pd->ibv_pd->context)) { + cl_free(ah->av); + } else { + struct mthca_pd *pd = to_mpd(p_pd->ibv_pd); + struct mthca_ah_page *page; + int i; + + WaitForSingleObject( pd->ah_mutex, INFINITE ); + page = ah->page; + i = ((uint8_t *)ah->av - (uint8_t *)page->buf) / sizeof *ah->av; + page->free[i / (8 * sizeof (int))] |= 1 << (i % (8 * sizeof (int))); + --page->use_cnt; + ReleaseMutex( pd->ah_mutex ); + } +} + +//NB: temporary, for support of modify_qp +void mthca_set_av_params( struct mthca_ah *ah_p, struct ibv_ah_attr *ah_attr ) +{ + struct mthca_av *av = ah_p->av; + mlnx_ual_pd_info_t *p_pd = (mlnx_ual_pd_info_t *)ah_p->h_uvp_pd; + struct mthca_pd *pd =to_mpd(p_pd->ibv_pd); + + // taken from mthca_alloc_av + //TODO: why cl_hton32 ? + av->port_pd = cl_hton32(pd->pdn | (ah_attr->port_num << 24)); + av->g_slid = ah_attr->src_path_bits; + //TODO: why cl_hton16 ? + av->dlid = cl_hton16(ah_attr->dlid); + av->msg_sr = (3 << 4) | /* 2K message */ + ah_attr->static_rate; + //TODO: why cl_hton32 ? + av->sl_tclass_flowlabel = cl_hton32(ah_attr->sl << 28); + if (ah_attr->is_global) { + av->g_slid |= 0x80; + av->gid_index = (ah_attr->port_num - 1) * 32 + + ah_attr->grh.sgid_index; + av->hop_limit = ah_attr->grh.hop_limit; + av->sl_tclass_flowlabel |= cl_hton32((ah_attr->grh.traffic_class << 20) | + ah_attr->grh.flow_label); + memcpy(av->dgid, ah_attr->grh.dgid.raw, 16); + } else { + /* Arbel workaround -- low byte of GID must be 2 */ + //TODO: why cl_hton32 ? + av->dgid[3] = cl_hton32(2); + } +} + diff --git a/branches/IBFD/hw/mthca/user/mlnx_uvp_cq.c b/branches/IBFD/hw/mthca/user/mlnx_uvp_cq.c new file mode 100644 index 00000000..3d592c0d --- /dev/null +++ b/branches/IBFD/hw/mthca/user/mlnx_uvp_cq.c @@ -0,0 +1,626 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include +#include +#include "mlnx_uvp.h" +#include "mlnx_uvp_doorbell.h" + +#if defined(EVENT_TRACING) +#include "mlnx_uvp_cq.tmh" +#endif + + +enum { + MTHCA_CQ_DOORBELL = 0x20 +}; + +enum { + CQ_OK = 0, + CQ_EMPTY = -1, + CQ_POLL_ERR = -2 +}; + +#define MTHCA_TAVOR_CQ_DB_INC_CI (1 << 24) +#define MTHCA_TAVOR_CQ_DB_REQ_NOT (2 << 24) +#define MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL (3 << 24) +#define MTHCA_TAVOR_CQ_DB_SET_CI (4 << 24) +#define MTHCA_TAVOR_CQ_DB_REQ_NOT_MULT (5 << 24) + +#define MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL (1 << 24) +#define MTHCA_ARBEL_CQ_DB_REQ_NOT (2 << 24) +#define MTHCA_ARBEL_CQ_DB_REQ_NOT_MULT (3 << 24) + +enum { + MTHCA_CQ_ENTRY_OWNER_SW = 0x00, + MTHCA_CQ_ENTRY_OWNER_HW = 0x80, + MTHCA_ERROR_CQE_OPCODE_MASK = 0xfe +}; + +enum { + SYNDROME_LOCAL_LENGTH_ERR = 0x01, + SYNDROME_LOCAL_QP_OP_ERR = 0x02, + SYNDROME_LOCAL_EEC_OP_ERR = 0x03, + SYNDROME_LOCAL_PROT_ERR = 0x04, + SYNDROME_WR_FLUSH_ERR = 0x05, + SYNDROME_MW_BIND_ERR = 0x06, + SYNDROME_BAD_RESP_ERR = 0x10, + SYNDROME_LOCAL_ACCESS_ERR = 0x11, + SYNDROME_REMOTE_INVAL_REQ_ERR = 0x12, + SYNDROME_REMOTE_ACCESS_ERR = 0x13, + SYNDROME_REMOTE_OP_ERR = 0x14, + SYNDROME_RETRY_EXC_ERR = 0x15, + SYNDROME_RNR_RETRY_EXC_ERR = 0x16, + SYNDROME_LOCAL_RDD_VIOL_ERR = 0x20, + SYNDROME_REMOTE_INVAL_RD_REQ_ERR = 0x21, + SYNDROME_REMOTE_ABORTED_ERR = 0x22, + SYNDROME_INVAL_EECN_ERR = 0x23, + SYNDROME_INVAL_EEC_STATE_ERR = 0x24 +}; + +struct mthca_cqe { + uint32_t my_qpn; + uint32_t my_ee; + uint32_t rqpn; + uint16_t sl_g_mlpath; + uint16_t rlid; + uint32_t imm_etype_pkey_eec; + uint32_t byte_cnt; + uint32_t wqe; + uint8_t opcode; + uint8_t is_send; + uint8_t reserved; + uint8_t owner; +}; + +struct mthca_err_cqe { + uint32_t my_qpn; + uint32_t reserved1[3]; + uint8_t syndrome; + uint8_t vendor_err; + uint16_t db_cnt; + uint32_t reserved2; + uint32_t wqe; + uint8_t opcode; + uint8_t reserved3[2]; + uint8_t owner; +}; + +static inline struct mthca_cqe *get_cqe(struct mthca_cq *cq, int entry) +{ + return (struct mthca_cqe *)((uint8_t*)cq->buf + entry * MTHCA_CQ_ENTRY_SIZE); +} + +static inline struct mthca_cqe *cqe_sw(struct mthca_cq *cq, int i) +{ + struct mthca_cqe *cqe = get_cqe(cq, i); + return MTHCA_CQ_ENTRY_OWNER_HW & cqe->owner ? NULL : cqe; +} + +static inline struct mthca_cqe *next_cqe_sw(struct mthca_cq *cq) +{ + return cqe_sw(cq, cq->cons_index & cq->ibv_cq.cqe); +} + +static inline void set_cqe_hw(struct mthca_cqe *cqe) +{ + cqe->owner = MTHCA_CQ_ENTRY_OWNER_HW; +} + +/* + * incr is ignored in native Arbel (mem-free) mode, so cq->cons_index + * should be correct before calling update_cons_index(). + */ +static inline void update_cons_index(struct mthca_cq *cq, int incr) +{ + uint32_t doorbell[2]; + + if (mthca_is_memfree(cq->ibv_cq.context)) { + *cq->set_ci_db = cl_hton32(cq->cons_index); + mb(); + } else { + doorbell[0] = cl_hton32(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn); + doorbell[1] = cl_hton32(incr - 1); + + mthca_write64(doorbell, to_mctx(cq->ibv_cq.context), MTHCA_CQ_DOORBELL); + } +} + + +static void dump_cqe(uint32_t print_lvl, void *cqe_ptr) +{ + uint32_t *cqe = cqe_ptr; + int i; + (void) cqe; /* avoid warning if mthca_dbg compiled away... */ + + UVP_PRINT(print_lvl,UVP_DBG_CQ,("CQE content \n")); + UVP_PRINT(print_lvl,UVP_DBG_CQ,(" [%2x] %08x %08x %08x %08x \n",0 + , cl_ntoh32(cqe[0]), cl_ntoh32(cqe[1]), cl_ntoh32(cqe[2]), cl_ntoh32(cqe[3]))); + UVP_PRINT(print_lvl,UVP_DBG_CQ,(" [%2x] %08x %08x %08x %08x\n",16 + , cl_ntoh32(cqe[4]), cl_ntoh32(cqe[5]), cl_ntoh32(cqe[6]), cl_ntoh32(cqe[7]))); + +} + +static int handle_error_cqe(struct mthca_cq *cq, + struct mthca_qp *qp, int wqe_index, int is_send, + struct mthca_err_cqe *cqe, + struct _ib_wc *entry, int *free_cqe) +{ + int err; + int dbd; + uint32_t new_wqe; + + if (cqe->syndrome == SYNDROME_LOCAL_QP_OP_ERR) { + UVP_PRINT(TRACE_LEVEL_ERROR , UVP_DBG_CQ,("local QP operation err " + "(QPN %06x, WQE @ %08x, CQN %06x, index %d, vendor_err %d)\n", + cl_ntoh32(cqe->my_qpn), cl_ntoh32(cqe->wqe), + cq->cqn, cq->cons_index, cqe->vendor_err)); + dump_cqe(TRACE_LEVEL_VERBOSE, cqe); + } + + /* + * For completions in error, only work request ID, status, vendor error + * (and freed resource count for RD) have to be set. + */ + switch (cqe->syndrome) { + case SYNDROME_LOCAL_LENGTH_ERR: + entry->status = IB_WCS_LOCAL_LEN_ERR; + break; + case SYNDROME_LOCAL_QP_OP_ERR: + entry->status = IB_WCS_LOCAL_OP_ERR; + break; + case SYNDROME_LOCAL_PROT_ERR: + entry->status = IB_WCS_LOCAL_PROTECTION_ERR; + break; + case SYNDROME_WR_FLUSH_ERR: + entry->status = IB_WCS_WR_FLUSHED_ERR; + break; + case SYNDROME_MW_BIND_ERR: + entry->status = IB_WCS_MEM_WINDOW_BIND_ERR; + break; + case SYNDROME_BAD_RESP_ERR: + entry->status = IB_WCS_BAD_RESP_ERR; + break; + case SYNDROME_LOCAL_ACCESS_ERR: + entry->status = IB_WCS_LOCAL_ACCESS_ERR; + break; + case SYNDROME_REMOTE_INVAL_REQ_ERR: + entry->status = IB_WCS_REM_INVALID_REQ_ERR; + break; + case SYNDROME_REMOTE_ACCESS_ERR: + entry->status = IB_WCS_REM_ACCESS_ERR; + break; + case SYNDROME_REMOTE_OP_ERR: + entry->status = IB_WCS_REM_OP_ERR; + break; + case SYNDROME_RETRY_EXC_ERR: + entry->status = IB_WCS_TIMEOUT_RETRY_ERR; + break; + case SYNDROME_RNR_RETRY_EXC_ERR: + entry->status = IB_WCS_RNR_RETRY_ERR; + break; + case SYNDROME_LOCAL_EEC_OP_ERR: + case SYNDROME_LOCAL_RDD_VIOL_ERR: + case SYNDROME_REMOTE_INVAL_RD_REQ_ERR: + case SYNDROME_REMOTE_ABORTED_ERR: + case SYNDROME_INVAL_EECN_ERR: + case SYNDROME_INVAL_EEC_STATE_ERR: + default: + entry->status = IB_WCS_GENERAL_ERR; + break; + } + + entry->vendor_specific = cqe->vendor_err; + + /* + * Mem-free HCAs always generate one CQE per WQE, even in the + * error case, so we don't have to check the doorbell count, etc. + */ + if (mthca_is_memfree(cq->ibv_cq.context)) + return 0; + + err = mthca_free_err_wqe(qp, is_send, wqe_index, &dbd, &new_wqe); + if (err) + return err; + + /* + * If we're at the end of the WQE chain, or we've used up our + * doorbell count, free the CQE. Otherwise just update it for + * the next poll operation. + * + * This doesn't apply to mem-free HCAs, which never use the + * doorbell count field. In that case we always free the CQE. + */ + if (mthca_is_memfree(cq->ibv_cq.context) || + !(new_wqe & cl_hton32(0x3f)) || (!cqe->db_cnt && dbd)) + return 0; + + cqe->db_cnt = cl_hton16(cl_ntoh16(cqe->db_cnt) - dbd); + cqe->wqe = new_wqe; + cqe->syndrome = SYNDROME_WR_FLUSH_ERR; + + *free_cqe = 0; + + return 0; +} + +static inline int mthca_poll_one(struct mthca_cq *cq, + struct mthca_qp **cur_qp, + int *freed, + struct _ib_wc *entry) +{ + struct mthca_wq *wq; + struct mthca_cqe *cqe; + uint32_t qpn; + int wqe_index; + int is_error; + int is_send; + int free_cqe = 1; + int err = 0; + + UVP_ENTER(UVP_DBG_CQ); + + cqe = next_cqe_sw(cq); + if (!cqe) + return -EAGAIN; + + /* + * Make sure we read CQ entry contents after we've checked the + * ownership bit. + */ + rmb(); + + { // debug print + UVP_PRINT(TRACE_LEVEL_VERBOSE,UVP_DBG_CQ,("%x/%d: CQE -> QPN %06x, WQE @ %08x\n", + cq->cqn, cq->cons_index, cl_ntoh32(cqe->my_qpn), + cl_ntoh32(cqe->wqe))); + dump_cqe(TRACE_LEVEL_VERBOSE,cqe); + } + + qpn = cl_ntoh32(cqe->my_qpn); + + is_error = (cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) == + MTHCA_ERROR_CQE_OPCODE_MASK; + is_send = is_error ? cqe->opcode & 0x01 : cqe->is_send & 0x80; + + if (!*cur_qp || cl_ntoh32(cqe->my_qpn) != (*cur_qp)->ibv_qp.qp_num) { + /* + * We do not have to take the QP table lock here, + * because CQs will be locked while QPs are removed + * from the table. + */ + *cur_qp = mthca_find_qp(to_mctx(cq->ibv_cq.context), cl_ntoh32(cqe->my_qpn)); + if (!*cur_qp) { + UVP_PRINT(TRACE_LEVEL_WARNING,UVP_DBG_CQ, ("CQ entry for unknown QP %06x\n", + cl_ntoh32(cqe->my_qpn) & 0xffffff)); + err = -EINVAL; + goto out; + } + } + + if (is_send) { + wq = &(*cur_qp)->sq; + wqe_index = ((cl_ntoh32(cqe->wqe) - (*cur_qp)->send_wqe_offset) >> wq->wqe_shift); + entry->wr_id = (*cur_qp)->wrid[wqe_index + (*cur_qp)->rq.max]; + } else if ((*cur_qp)->ibv_qp.srq) { + struct mthca_srq * srq = to_msrq((*cur_qp)->ibv_qp.srq); + uint32_t wqe = cl_hton32(cqe->wqe); + wq = NULL; + wqe_index = wqe >> srq->wqe_shift; + entry->wr_id = srq->wrid[wqe_index]; + mthca_free_srq_wqe(srq, wqe_index); + } else { + wq = &(*cur_qp)->rq; + wqe_index = cl_ntoh32(cqe->wqe) >> wq->wqe_shift; + entry->wr_id = (*cur_qp)->wrid[wqe_index]; + } + + if (wq) { + if ((int)wq->last_comp < wqe_index) + wq->tail += wqe_index - wq->last_comp; + else + wq->tail += wqe_index + wq->max - wq->last_comp; + + wq->last_comp = wqe_index; + } + + if (is_send) { + entry->recv.ud.recv_opt = 0; + switch (cqe->opcode) { + case MTHCA_OPCODE_RDMA_WRITE: + entry->wc_type = IB_WC_RDMA_WRITE; + break; + case MTHCA_OPCODE_RDMA_WRITE_IMM: + entry->wc_type = IB_WC_RDMA_WRITE; + entry->recv.ud.recv_opt |= IB_RECV_OPT_IMMEDIATE; + break; + case MTHCA_OPCODE_SEND: + entry->wc_type = IB_WC_SEND; + break; + case MTHCA_OPCODE_SEND_IMM: + entry->wc_type = IB_WC_SEND; + entry->recv.ud.recv_opt |= IB_RECV_OPT_IMMEDIATE; + break; + case MTHCA_OPCODE_RDMA_READ: + entry->wc_type = IB_WC_RDMA_READ; + entry->length = cl_ntoh32(cqe->byte_cnt); + break; + case MTHCA_OPCODE_ATOMIC_CS: + entry->wc_type = IB_WC_COMPARE_SWAP; + entry->length = MTHCA_BYTES_PER_ATOMIC_COMPL; + break; + case MTHCA_OPCODE_ATOMIC_FA: + entry->wc_type = IB_WC_FETCH_ADD; + entry->length = MTHCA_BYTES_PER_ATOMIC_COMPL; + break; + case MTHCA_OPCODE_BIND_MW: + entry->wc_type = IB_WC_MW_BIND; + break; + default: + /* assume it's a send completion */ + entry->wc_type = IB_WC_SEND; + break; + } + } else { + entry->length = cl_ntoh32(cqe->byte_cnt); + switch (cqe->opcode & 0x1f) { + case IBV_OPCODE_SEND_LAST_WITH_IMMEDIATE: + case IBV_OPCODE_SEND_ONLY_WITH_IMMEDIATE: + entry->recv.ud.recv_opt = IB_RECV_OPT_IMMEDIATE; + entry->recv.ud.immediate_data = cqe->imm_etype_pkey_eec; + entry->wc_type = IB_WC_RECV; + break; + case IBV_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE: + case IBV_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE: + entry->recv.ud.recv_opt = IB_RECV_OPT_IMMEDIATE; + entry->recv.ud.immediate_data = cqe->imm_etype_pkey_eec; + entry->wc_type = IB_WC_RECV; + break; + default: + entry->recv.ud.recv_opt = 0; + entry->wc_type = IB_WC_RECV; + break; + } + entry->recv.ud.remote_lid = cqe->rlid; + entry->recv.ud.remote_qp = cqe->rqpn & 0xffffff00; + entry->recv.ud.pkey_index = (uint16_t)(cl_ntoh32(cqe->imm_etype_pkey_eec) >> 16); + entry->recv.ud.remote_sl = cl_ntoh16(cqe->sl_g_mlpath) >> 12; + entry->recv.ud.path_bits = cl_ntoh16(cqe->sl_g_mlpath) & 0x7f; + entry->recv.ud.recv_opt |= cl_ntoh16(cqe->sl_g_mlpath) & 0x80 ? + IB_RECV_OPT_GRH_VALID : 0; + } + + + if (is_error) { + err = handle_error_cqe(cq, *cur_qp, wqe_index, is_send, + (struct mthca_err_cqe *) cqe, + entry, &free_cqe); + } + else + entry->status = IB_WCS_SUCCESS; + +out: + if (likely(free_cqe)) { + set_cqe_hw(cqe); + ++(*freed); + ++cq->cons_index; + } + + UVP_EXIT(UVP_DBG_CQ); + return err; +} + +int mthca_poll_cq(struct ibv_cq *ibcq, int num_entries, struct _ib_wc *entry) +{ + struct mthca_cq *cq = to_mcq(ibcq); + struct mthca_qp *qp = NULL; + int err = CQ_OK; + int freed = 0; + int npolled; + + cl_spinlock_acquire(&cq->lock); + + for (npolled = 0; npolled < num_entries; ++npolled) { + err = mthca_poll_one(cq, &qp, &freed, entry + npolled); + if (err) + break; + } + + if (freed) { + wmb(); + update_cons_index(cq, freed); + } + + cl_spinlock_release(&cq->lock); + + return (err == 0 || err == -EAGAIN) ? npolled : err; +} + +int mthca_poll_cq_list( + IN struct ibv_cq *ibcq, + IN OUT struct _ib_wc** const pp_free_wclist, + OUT struct _ib_wc** const pp_done_wclist ) +{ + struct mthca_cq *cq = to_mcq(ibcq); + struct mthca_qp *qp = NULL; + int err = CQ_OK; + int freed = 0; + ib_wc_t *wc_p, **next_pp; + uint32_t wc_cnt = 0; + + cl_spinlock_acquire(&cq->lock); + + // loop through CQ + next_pp = pp_done_wclist; + wc_p = *pp_free_wclist; + while( wc_p ) { + // poll one CQE + err = mthca_poll_one(cq, &qp, &freed, wc_p); + if (err) + break; + + // prepare for the next loop + *next_pp = wc_p; + next_pp = &wc_p->p_next; + wc_p = wc_p->p_next; + } + + // prepare the results + *pp_free_wclist = wc_p; /* Set the head of the free list. */ + *next_pp = NULL; /* Clear the tail of the done list. */ + + // update consumer index + if (freed) { + wmb(); + update_cons_index(cq, freed); + } + + cl_spinlock_release(&cq->lock); + return (err == 0 || err == -EAGAIN)? 0 : err; +} + +int mthca_tavor_arm_cq(struct ibv_cq *cq, enum ib_cq_notify notify) +{ + uint32_t doorbell[2]; + + doorbell[0] = cl_hton32((notify == IB_CQ_SOLICITED ? + MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL : + MTHCA_TAVOR_CQ_DB_REQ_NOT) | + to_mcq(cq)->cqn); + doorbell[1] = 0xffffffff; + + mthca_write64(doorbell, to_mctx(cq->context), MTHCA_CQ_DOORBELL); + + return 0; +} + +int mthca_arbel_arm_cq(struct ibv_cq *ibvcq, enum ib_cq_notify notify) +{ + struct mthca_cq *cq = to_mcq(ibvcq); + uint32_t doorbell[2]; + uint32_t sn; + uint32_t ci; + + sn = *cq->p_u_arm_sn & 3; + ci = cl_hton32(cq->cons_index); + + doorbell[0] = ci; + doorbell[1] = cl_hton32((cq->cqn << 8) | (2 << 5) | (sn << 3) | + (notify == IB_CQ_SOLICITED ? 1 : 2)); + + mthca_write_db_rec(doorbell, cq->arm_db); + + /* + * Make sure that the doorbell record in host memory is + * written before ringing the doorbell via PCI MMIO. + */ + wmb(); + + doorbell[0] = cl_hton32((sn << 28) | + (notify == IB_CQ_SOLICITED ? + MTHCA_ARBEL_CQ_DB_REQ_NOT_SOL : + MTHCA_ARBEL_CQ_DB_REQ_NOT) | + cq->cqn); + doorbell[1] = ci; + + mthca_write64(doorbell, to_mctx(ibvcq->context), MTHCA_CQ_DOORBELL); + + return 0; +} + +static inline int is_recv_cqe(struct mthca_cqe *cqe) +{ + if ((cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) == + MTHCA_ERROR_CQE_OPCODE_MASK) + return !(cqe->opcode & 0x01); + else + return !(cqe->is_send & 0x80); +} + +void mthca_cq_clean(struct mthca_cq *cq, uint32_t qpn, struct mthca_srq *srq) +{ + struct mthca_cqe *cqe; + uint32_t prod_index; + int nfreed = 0; + + cl_spinlock_acquire(&cq->lock); + + /* + * First we need to find the current producer index, so we + * know where to start cleaning from. It doesn't matter if HW + * adds new entries after this loop -- the QP we're worried + * about is already in RESET, so the new entries won't come + * from our QP and therefore don't need to be checked. + */ + for (prod_index = cq->cons_index; + cqe_sw(cq, prod_index & cq->ibv_cq.cqe); + ++prod_index) + if (prod_index == cq->cons_index + cq->ibv_cq.cqe) + break; + + /* + * Now sweep backwards through the CQ, removing CQ entries + * that match our QP by copying older entries on top of them. + */ + while ((int) --prod_index - (int) cq->cons_index >= 0) { + cqe = get_cqe(cq, prod_index & cq->ibv_cq.cqe); + if (cqe->my_qpn == cl_hton32(qpn)) { + if (srq && is_recv_cqe(cqe)) + mthca_free_srq_wqe(srq, + cl_ntoh32(cqe->wqe) >> srq->wqe_shift); + ++nfreed; + } else if (nfreed) + memcpy(get_cqe(cq, (prod_index + nfreed) & cq->ibv_cq.cqe), + cqe, MTHCA_CQ_ENTRY_SIZE); + } + + if (nfreed) { + mb(); + cq->cons_index += nfreed; + update_cons_index(cq, nfreed); + } + + cl_spinlock_release(&cq->lock); +} + +void mthca_init_cq_buf(struct mthca_cq *cq, int nent) +{ + int i; + + for (i = 0; i < nent; ++i) + set_cqe_hw(get_cqe(cq, i)); + + cq->cons_index = 0; +} diff --git a/branches/IBFD/hw/mthca/user/mlnx_uvp_debug.c b/branches/IBFD/hw/mthca/user/mlnx_uvp_debug.c new file mode 100644 index 00000000..3fc71134 --- /dev/null +++ b/branches/IBFD/hw/mthca/user/mlnx_uvp_debug.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2005 Mellanox Technologies LTD. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +// Author: Yossi Leybovich + +#include "mlnx_uvp_debug.h" +#include +#include +#include + +#if !defined(EVENT_TRACING) + + +#if DBG +uint32_t g_mlnx_dbg_level = TRACE_LEVEL_WARNING; +uint32_t g_mlnx_dbg_flags= UVP_DBG_QP | UVP_DBG_CQ|UVP_DBG_MEMORY; +#endif + +VOID +_UVP_PRINT( + IN char* msg, + ... + ) + + { +#if DBG +#define TEMP_BUFFER_SIZE 1024 + va_list list; + UCHAR debugMessageBuffer[TEMP_BUFFER_SIZE]; + HRESULT result; + + va_start(list, msg); + + if (msg) { + + // + // Using new safe string functions instead of _vsnprintf. This function takes + // care of NULL terminating if the message is longer than the buffer. + // + + result = StringCbVPrintfA (debugMessageBuffer, sizeof(debugMessageBuffer), + msg, list); + if(((HRESULT)(result) < 0)) { + + OutputDebugString (": StringCbVPrintfA failed \n"); + return; + } + OutputDebugString ( debugMessageBuffer); + + } + va_end(list); + + return; +#endif //DBG +} + +#endif //EVENT_TRACING + diff --git a/branches/IBFD/hw/mthca/user/mlnx_uvp_debug.h b/branches/IBFD/hw/mthca/user/mlnx_uvp_debug.h new file mode 100644 index 00000000..2a9cbc5b --- /dev/null +++ b/branches/IBFD/hw/mthca/user/mlnx_uvp_debug.h @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * + * This software is available to you under the OpenIB.org BSD license + * below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + + +#ifndef _MLNX_UVP_DEBUG_H_ +#define _MLNX_UVP_DEBUG_H_ + +#include + +extern uint32_t g_mlnx_dbg_level; +extern uint32_t g_mlnx_dbg_flags; + + +#if defined(EVENT_TRACING) +// +// Software Tracing Definitions +// +// + +#define WPP_CONTROL_GUIDS \ + WPP_DEFINE_CONTROL_GUID(HCACtlGuid,(2C718E52,0D36,4bda,9E58,0FC601818D8F), \ + WPP_DEFINE_BIT( UVP_DBG_DEV) \ + WPP_DEFINE_BIT( UVP_DBG_PNP) \ + WPP_DEFINE_BIT( UVP_DBG_MAD) \ + WPP_DEFINE_BIT( UVP_DBG_PO) \ + WPP_DEFINE_BIT( UVP_DBG_CQ) \ + WPP_DEFINE_BIT( UVP_DBG_QP) \ + WPP_DEFINE_BIT( UVP_DBG_MEMORY) \ + WPP_DEFINE_BIT( UVP_DBG_SRQ) \ + WPP_DEFINE_BIT( UVP_DBG_AV) \ + WPP_DEFINE_BIT( UVP_DBG_SEND) \ + WPP_DEFINE_BIT( UVP_DBG_RECV) \ + WPP_DEFINE_BIT( UVP_DBG_LOW) \ + WPP_DEFINE_BIT( UVP_DBG_SHIM)) + + +#define WPP_LEVEL_FLAGS_ENABLED(lvl, flags) (WPP_LEVEL_ENABLED(flags) && WPP_CONTROL(WPP_BIT_ ## flags).Level >= lvl) +#define WPP_LEVEL_FLAGS_LOGGER(lvl,flags) WPP_LEVEL_LOGGER(flags) +#define WPP_FLAG_ENABLED(flags)(WPP_LEVEL_ENABLED(flags) && WPP_CONTROL(WPP_BIT_ ## flags).Level >= TRACE_LEVEL_VERBOSE) +#define WPP_FLAG_LOGGER(flags) WPP_LEVEL_LOGGER(flags) + + +// begin_wpp config +// UVP_ENTER(FLAG); +// UVP_EXIT(FLAG); +// USEPREFIX(UVP_PRINT, "%!FUNC!() "); +// USESUFFIX(UVP_ENTER, "%!FUNC!===>"); +// USESUFFIX(UVP_EXIT, "%!FUNC!<==="); +// end_wpp + + +#else + +#include +#include + +/* + * Debug macros + */ + + +#define UVP_DBG_DEV (1 << 0) +#define UVP_DBG_PNP (1 << 1) +#define UVP_DBG_MAD (1 << 2) +#define UVP_DBG_PO (1 << 3) +#define UVP_DBG_QP (1 << 4) +#define UVP_DBG_CQ (1 << 5) +#define UVP_DBG_MEMORY (1 << 6) +#define UVP_DBG_SRQ (1 << 7) +#define UVP_DBG_AV (1 << 8) +#define UVP_DBG_SEND (1 << 9) +#define UVP_DBG_RECV (1 << 10) +#define UVP_DBG_LOW (1 << 11) +#define UVP_DBG_SHIM (1 << 12) + + +VOID + _UVP_PRINT( + IN char* msg, + ...); + +#if DBG + +#define UVP_PRINT(_level_,_flags_,_msg_) \ + if ((_level_) <= g_mlnx_dbg_level && (_flags_) & g_mlnx_dbg_flags) {\ + _UVP_PRINT("[UVP] %s():",__FUNCTION__);\ + if((_level_) == TRACE_LEVEL_ERROR) _UVP_PRINT ("***ERROR*** ");\ + _UVP_PRINT _msg_ ; \ + } + + +// +#else + +#define UVP_PRINT(lvl ,flags, msg) + +#endif + + +#define UVP_ENTER(flags)\ + UVP_PRINT(TRACE_LEVEL_VERBOSE, flags,("===>\n")); + +#define UVP_EXIT(flags)\ + UVP_PRINT(TRACE_LEVEL_VERBOSE, flags,("<===\n")); + +#define UVP_PRINT_EXIT(_level_,_flag_,_msg_) \ + {\ + if (status != IB_SUCCESS) {\ + UVP_PRINT(_level_,_flag_,_msg_);\ + }\ + UVP_EXIT(_flag_);\ + } + +#endif //EVENT_TRACING + +#endif /*_MLNX_UVP_DEBUG_H_ */ + diff --git a/branches/IBFD/hw/mthca/user/mlnx_uvp_doorbell.h b/branches/IBFD/hw/mthca/user/mlnx_uvp_doorbell.h new file mode 100644 index 00000000..7928eceb --- /dev/null +++ b/branches/IBFD/hw/mthca/user/mlnx_uvp_doorbell.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#ifndef DOORBELL_H +#define DOORBELL_H + +enum { + MTHCA_SEND_DOORBELL_FENCE = 1 << 5 +}; +#if defined _WIN64 + +static inline void mthca_write64(uint32_t val[2], struct mthca_context *ctx, int offset) +{ + *(volatile uint64_t *) ((char *)ctx->uar + offset) = *(volatile uint64_t*)val; +} + +static inline void mthca_write_db_rec(uint32_t val[2], uint32_t *db) +{ + *(volatile uint64_t *) db = *(volatile uint64_t*)val; +} + + +#elif defined(_WIN32) + +static inline void mthca_write64(uint32_t val[2], struct mthca_context *ctx, int offset) +{ + volatile uint64_t *target_p = (volatile uint64_t*)((uint8_t*)ctx->uar + offset); + + cl_spinlock_acquire(&ctx->uar_lock); + *(volatile uint32_t *) ((uint8_t*)ctx->uar + offset) = val[0]; + *(volatile uint32_t *) ((uint8_t*)ctx->uar + offset + 4) = val[1]; + cl_spinlock_release(&ctx->uar_lock); + + //TODO: can we save mm0 and not to use emms, as Linux do ? + //__asm movq mm0,val + //__asm movq target_p,mm0 + //__asm emms +} +static inline void mthca_write_db_rec(uint32_t val[2], uint32_t *db) +{ + db[0] = val[0]; + wmb(); + db[1] = val[1]; +} + + +#endif + +#endif /* MTHCA_H */ diff --git a/branches/IBFD/hw/mthca/user/mlnx_uvp_memfree.c b/branches/IBFD/hw/mthca/user/mlnx_uvp_memfree.c new file mode 100644 index 00000000..f08d5e44 --- /dev/null +++ b/branches/IBFD/hw/mthca/user/mlnx_uvp_memfree.c @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include + +#include "mlnx_uvp.h" + +#define MTHCA_FREE_MAP_SIZE (MTHCA_DB_REC_PER_PAGE / BITS_PER_LONG) + +struct mthca_db_page { + unsigned long free[MTHCA_FREE_MAP_SIZE]; + uint64_t *db_rec; +}; + +struct mthca_db_table { + int npages; + int max_group1; + int min_group2; + HANDLE mutex; + struct mthca_db_page page[]; +}; + +int mthca_alloc_db(struct mthca_db_table *db_tab, enum mthca_db_type type, + uint32_t **db) +{ + int i, j, k; + int group, start, end, dir; + int ret = 0; + + WaitForSingleObject( db_tab->mutex, INFINITE ); + + switch (type) { + case MTHCA_DB_TYPE_CQ_ARM: + case MTHCA_DB_TYPE_SQ: + group = 0; + start = 0; + end = db_tab->max_group1; + dir = 1; + break; + + case MTHCA_DB_TYPE_CQ_SET_CI: + case MTHCA_DB_TYPE_RQ: + case MTHCA_DB_TYPE_SRQ: + group = 1; + start = db_tab->npages - 1; + end = db_tab->min_group2; + dir = -1; + break; + + default: + ret = -1; + goto out; + } + + for (i = start; i != end; i += dir) + if (db_tab->page[i].db_rec) + for (j = 0; j < MTHCA_FREE_MAP_SIZE; ++j) + if (db_tab->page[i].free[j]) + goto found; + + if (db_tab->max_group1 >= db_tab->min_group2 - 1) { + ret = -1; + goto out; + } + + if (posix_memalign((void **) &db_tab->page[i].db_rec, MTHCA_DB_REC_PAGE_SIZE, + MTHCA_DB_REC_PAGE_SIZE)) { + ret = -1; + goto out; + } + + memset(db_tab->page[i].db_rec, 0, MTHCA_DB_REC_PAGE_SIZE); + memset(db_tab->page[i].free, 0xff, sizeof db_tab->page[i].free); + + if (group == 0) + ++db_tab->max_group1; + else + --db_tab->min_group2; + +found: + for (j = 0; j < MTHCA_FREE_MAP_SIZE; ++j) { + k = ffsl(db_tab->page[i].free[j]); + if (k) + break; + } + + if (!k) { + ret = -1; + goto out; + } + + --k; + db_tab->page[i].free[j] &= ~(1UL << k); + + j = j * BITS_PER_LONG + k; + if (group == 1) + j = MTHCA_DB_REC_PER_PAGE - 1 - j; + + ret = i * MTHCA_DB_REC_PER_PAGE + j; + *db = (uint32_t *) &db_tab->page[i].db_rec[j]; + +out: + ReleaseMutex( db_tab->mutex ); + return ret; +} + +void mthca_set_db_qn(uint32_t *db, enum mthca_db_type type, uint32_t qn) +{ + db[1] = cl_hton32((qn << 8) | (type << 5)); +} + +void mthca_free_db(struct mthca_db_table *db_tab, enum mthca_db_type type, int db_index) +{ + int i, j; + struct mthca_db_page *page; + + i = db_index / MTHCA_DB_REC_PER_PAGE; + j = db_index % MTHCA_DB_REC_PER_PAGE; + + page = db_tab->page + i; + + WaitForSingleObject( db_tab->mutex, INFINITE ); + page->db_rec[j] = 0; + + if (i >= db_tab->min_group2) + j = MTHCA_DB_REC_PER_PAGE - 1 - j; + + page->free[j / BITS_PER_LONG] |= 1UL << (j % BITS_PER_LONG); + + ReleaseMutex( db_tab->mutex ); +} + +struct mthca_db_table *mthca_alloc_db_tab(int uarc_size) +{ + struct mthca_db_table *db_tab; + int npages; + int i; + + npages = uarc_size / MTHCA_DB_REC_PAGE_SIZE; + db_tab = cl_malloc(sizeof (struct mthca_db_table) + + npages * sizeof (struct mthca_db_page)); + if (!db_tab) + goto err_malloc; + + db_tab->mutex = CreateMutex( NULL, FALSE, NULL ); + if (!db_tab->mutex) + goto err_mutex; + db_tab->npages = npages; + db_tab->max_group1 = 0; + db_tab->min_group2 = npages - 1; + + for (i = 0; i < npages; ++i) + db_tab->page[i].db_rec = NULL; + + goto end; + +err_mutex: + cl_free(db_tab); +err_malloc: +end: + return db_tab; +} + +void mthca_free_db_tab(struct mthca_db_table *db_tab) +{ + int i; + + if (!db_tab) + return; + + for (i = 0; i < db_tab->npages; ++i) + if (db_tab->page[i].db_rec) +#ifdef NOT_USE_VIRTUAL_ALLOC + cl_free(db_tab->page[i].db_rec); +#else + VirtualFree( db_tab->page[i].db_rec, 0, MEM_RELEASE); +#endif + + cl_free(db_tab); +} diff --git a/branches/IBFD/hw/mthca/user/mlnx_uvp_qp.c b/branches/IBFD/hw/mthca/user/mlnx_uvp_qp.c new file mode 100644 index 00000000..f5bfe59c --- /dev/null +++ b/branches/IBFD/hw/mthca/user/mlnx_uvp_qp.c @@ -0,0 +1,1085 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include +#include "mlnx_uvp.h" +#include "mlnx_uvp_doorbell.h" +#include "mthca_wqe.h" +#include "mlnx_ual_data.h" + +#if defined(EVENT_TRACING) +#include "mlnx_uvp_qp.tmh" +#endif + +static const uint8_t mthca_opcode[] = { + MTHCA_OPCODE_RDMA_WRITE, + MTHCA_OPCODE_RDMA_WRITE_IMM, + MTHCA_OPCODE_SEND, + MTHCA_OPCODE_SEND_IMM, + MTHCA_OPCODE_RDMA_READ, + MTHCA_OPCODE_ATOMIC_CS, + MTHCA_OPCODE_ATOMIC_FA +}; + +static enum mthca_wr_opcode conv_ibal_wr_opcode(struct _ib_send_wr *wr) +{ + enum mthca_wr_opcode opcode = -1; //= wr->wr_type; + + switch (wr->wr_type) { + case WR_SEND: + opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_SEND_IMM : MTHCA_OPCODE_SEND; + break; + case WR_RDMA_WRITE: + opcode = (wr->send_opt & IB_SEND_OPT_IMMEDIATE) ? MTHCA_OPCODE_RDMA_WRITE_IMM : MTHCA_OPCODE_RDMA_WRITE; + break; + case WR_RDMA_READ: opcode = MTHCA_OPCODE_RDMA_READ; break; + case WR_COMPARE_SWAP: opcode = MTHCA_OPCODE_ATOMIC_CS; break; + case WR_FETCH_ADD: opcode = MTHCA_OPCODE_ATOMIC_FA; break; + default: opcode = MTHCA_OPCODE_INVALID;break; + } + return opcode; +} + + +static void dump_wqe(uint32_t print_lvl, uint32_t *wqe_ptr , struct mthca_qp *qp_ptr) +{ + net32_t *wqe = wqe_ptr; + + (void) wqe; /* avoid warning if mthca_dbg compiled away... */ + UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents QPN 0x%06x \n",qp_ptr->ibv_qp.qp_num)); + UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",0 + , cl_ntoh32(wqe[0]), cl_ntoh32(wqe[1]), cl_ntoh32(wqe[2]), cl_ntoh32(wqe[3]))); + UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",4 + , cl_ntoh32(wqe[4]), cl_ntoh32(wqe[5]), cl_ntoh32(wqe[6]), cl_ntoh32(wqe[7]))); + UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",8 + , cl_ntoh32(wqe[8]), cl_ntoh32(wqe[9]), cl_ntoh32(wqe[10]), cl_ntoh32(wqe[11]))); + UVP_PRINT(print_lvl,UVP_DBG_QP,("WQE contents [%02x] %08x %08x %08x %08x \n",12 + , cl_ntoh32(wqe[12]), cl_ntoh32(wqe[13]), cl_ntoh32(wqe[14]), cl_ntoh32(wqe[15]))); + +} +static void *get_recv_wqe(struct mthca_qp *qp, int n) +{ + return qp->buf + (n << qp->rq.wqe_shift); +} + +static void *get_send_wqe(struct mthca_qp *qp, int n) +{ + void *wqe_addr = qp->buf + qp->send_wqe_offset + (n << qp->sq.wqe_shift); + UVP_PRINT(TRACE_LEVEL_INFORMATION,UVP_DBG_QP, + ("wqe %p, qp_buf %p, offset %#x, index %d, shift %d \n", + wqe_addr, qp->buf, qp->send_wqe_offset, n, + qp->sq.wqe_shift)); + + return wqe_addr; +} + +void mthca_init_qp_indices(struct mthca_qp *qp) +{ + qp->sq.next_ind = 0; + qp->sq.last_comp = qp->sq.max - 1; + qp->sq.head = 0; + qp->sq.tail = 0; + qp->sq.last = get_send_wqe(qp, qp->sq.max - 1); + + qp->rq.next_ind = 0; + qp->rq.last_comp = qp->rq.max - 1; + qp->rq.head = 0; + qp->rq.tail = 0; + qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1); +} + +static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq, struct mthca_cq *cq) +{ + unsigned cur; + + cur = wq->head - wq->tail; + if ((int)(cur + nreq) < wq->max) + return 0; + + cl_spinlock_acquire(&cq->lock); + cur = wq->head - wq->tail; + cl_spinlock_release(&cq->lock); + + return (int)(cur + nreq) >= wq->max; +} + + +int mthca_tavor_post_send(struct ibv_qp *ibqp, struct _ib_send_wr *wr, + struct _ib_send_wr **bad_wr) +{ + struct mthca_qp *qp = to_mqp(ibqp); + uint8_t *wqe; + uint8_t *prev_wqe; + int ret = 0; + int nreq; + int i; + int size; + int size0 = 0; + uint32_t f0 = unlikely(wr->send_opt & IB_SEND_OPT_FENCE) ? MTHCA_SEND_DOORBELL_FENCE : 0; + int ind; + int op0 = 0; + enum ib_wr_opcode opcode; + + UVP_ENTER(UVP_DBG_QP); + cl_spinlock_acquire(&qp->sq.lock); + + /* XXX check that state is OK to post send */ + + ind = qp->sq.next_ind; + + if(ibqp->state == IBV_QPS_RESET) { + ret = -EBUSY; + if (bad_wr) + *bad_wr = wr; + goto err_busy; + } + + for (nreq = 0; wr; ++nreq, wr = wr->p_next) { + + if (mthca_wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) { + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("SQ %06x full (%u head, %u tail," + " %d max, %d nreq)\n", ibqp->qp_num, + qp->sq.head, qp->sq.tail, + qp->sq.max, nreq)); + ret = -ENOMEM; + if (bad_wr) + *bad_wr = wr; + goto out; + } + + wqe = get_send_wqe(qp, ind); + prev_wqe = qp->sq.last; + qp->sq.last = wqe; + opcode = conv_ibal_wr_opcode(wr); + if (opcode == MTHCA_OPCODE_INVALID) { + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("SQ %06x opcode invalid\n",ibqp->qp_num)); + ret = -EINVAL; + if (bad_wr) + *bad_wr = wr; + goto out; + } + + + ((struct mthca_next_seg *) wqe)->nda_op = 0; + ((struct mthca_next_seg *) wqe)->ee_nds = 0; + ((struct mthca_next_seg *) wqe)->flags = + ((wr->send_opt & IB_SEND_OPT_SIGNALED) ? + cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) | + ((wr->send_opt & IB_SEND_OPT_SOLICITED) ? + cl_hton32(MTHCA_NEXT_SOLICIT) : 0) | + cl_hton32(1); + if (opcode == MTHCA_OPCODE_SEND_IMM|| + opcode == MTHCA_OPCODE_RDMA_WRITE_IMM) + ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data; + + wqe += sizeof (struct mthca_next_seg); + size = sizeof (struct mthca_next_seg) / 16; + + + switch (ibqp->qp_type) { + case IB_QPT_RELIABLE_CONN: + switch (opcode) { + case MTHCA_OPCODE_ATOMIC_CS: + case MTHCA_OPCODE_ATOMIC_FA: + ((struct mthca_raddr_seg *) wqe)->raddr = + cl_hton64(wr->remote_ops.vaddr); + ((struct mthca_raddr_seg *) wqe)->rkey = + wr->remote_ops.rkey; + ((struct mthca_raddr_seg *) wqe)->reserved = 0; + + wqe += sizeof (struct mthca_raddr_seg); + + if (opcode == MTHCA_OPCODE_ATOMIC_CS) { + ((struct mthca_atomic_seg *) wqe)->swap_add = + cl_hton64(wr->remote_ops.atomic2); + ((struct mthca_atomic_seg *) wqe)->compare = + cl_hton64(wr->remote_ops.atomic1); + } else { + ((struct mthca_atomic_seg *) wqe)->swap_add = + cl_hton64(wr->remote_ops.atomic1); + ((struct mthca_atomic_seg *) wqe)->compare = 0; + } + + wqe += sizeof (struct mthca_atomic_seg); + size += (sizeof (struct mthca_raddr_seg) + + sizeof (struct mthca_atomic_seg)) / 16; + break; + + case MTHCA_OPCODE_RDMA_WRITE: + case MTHCA_OPCODE_RDMA_WRITE_IMM: + case MTHCA_OPCODE_RDMA_READ: + ((struct mthca_raddr_seg *) wqe)->raddr = + cl_hton64(wr->remote_ops.vaddr); + ((struct mthca_raddr_seg *) wqe)->rkey = + wr->remote_ops.rkey; + ((struct mthca_raddr_seg *) wqe)->reserved = 0; + wqe += sizeof (struct mthca_raddr_seg); + size += sizeof (struct mthca_raddr_seg) / 16; + break; + + default: + /* No extra segments required for sends */ + break; + } + + break; + + case IB_QPT_UNRELIABLE_CONN: + switch (opcode) { + case MTHCA_OPCODE_RDMA_WRITE: + case MTHCA_OPCODE_RDMA_WRITE_IMM: + ((struct mthca_raddr_seg *) wqe)->raddr = + cl_hton64(wr->remote_ops.vaddr); + ((struct mthca_raddr_seg *) wqe)->rkey = + wr->remote_ops.rkey; + ((struct mthca_raddr_seg *) wqe)->reserved = 0; + wqe += sizeof (struct mthca_raddr_seg); + size += sizeof (struct mthca_raddr_seg) / 16; + break; + + default: + /* No extra segments required for sends */ + break; + } + + break; + + case IB_QPT_UNRELIABLE_DGRM: + { + struct mthca_ah *ah = ((struct mthca_ah *)wr->dgrm.ud.h_av); + ((struct mthca_tavor_ud_seg *) wqe)->lkey = + cl_hton32(ah->key); + ((struct mthca_tavor_ud_seg *) wqe)->av_addr = + cl_hton64((uint64_t)ah->av); + ((struct mthca_tavor_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp; + ((struct mthca_tavor_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey; + + wqe += sizeof (struct mthca_tavor_ud_seg); + size += sizeof (struct mthca_tavor_ud_seg) / 16; + break; + } + + default: + break; + } + + if ((int)(int)wr->num_ds > qp->sq.max_gs) { + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("SQ %06x too many gathers\n",ibqp->qp_num)); + ret = -ERANGE; + if (bad_wr) + *bad_wr = wr; + goto out; + } +//TODO sleybo: + if (wr->send_opt & IB_SEND_OPT_INLINE) { + if (wr->num_ds) { + struct mthca_inline_seg *seg = (struct mthca_inline_seg *)wqe; + uint32_t s = 0; + + wqe += sizeof *seg; + for (i = 0; i < (int)wr->num_ds; ++i) { + struct _ib_local_ds *sge = &wr->ds_array[i]; + + s += sge->length; + + if (s > (uint32_t)qp->max_inline_data) { + ret = -1; + if (bad_wr) + *bad_wr = wr; + goto out; + } + + memcpy(wqe, (void *) (ULONG_PTR) sge->vaddr, + sge->length); + wqe += sge->length; + } + + seg->byte_count = cl_hton32(MTHCA_INLINE_SEG | s); + size += align(s + sizeof *seg, 16) / 16; + } + } else { + for (i = 0; i < (int)wr->num_ds; ++i) { + ((struct mthca_data_seg *) wqe)->byte_count = + cl_hton32(wr->ds_array[i].length); + ((struct mthca_data_seg *) wqe)->lkey = + cl_hton32(wr->ds_array[i].lkey); + ((struct mthca_data_seg *) wqe)->addr = + cl_hton64(wr->ds_array[i].vaddr); + wqe += sizeof (struct mthca_data_seg); + size += sizeof (struct mthca_data_seg) / 16; + } + } + + qp->wrid[ind + qp->rq.max] = wr->wr_id; + + ((struct mthca_next_seg *) prev_wqe)->nda_op = + cl_hton32(((ind << qp->sq.wqe_shift) + + qp->send_wqe_offset) |opcode); + + wmb(); + + ((struct mthca_next_seg *) prev_wqe)->ee_nds = + cl_hton32((size0 ? 0 : MTHCA_NEXT_DBD) | size | + ((wr->send_opt& IB_SEND_OPT_FENCE) ? + MTHCA_NEXT_FENCE : 0)); + + if (!size0) { + size0 = size; + op0 = opcode; + } + + dump_wqe( TRACE_LEVEL_VERBOSE, (uint32_t*)qp->sq.last,qp); + + ++ind; + if (unlikely(ind >= qp->sq.max)) + ind -= qp->sq.max; + + } + +out: + if (likely(nreq)) { + uint32_t doorbell[2]; + + doorbell[0] = cl_hton32(((qp->sq.next_ind << qp->sq.wqe_shift) + + qp->send_wqe_offset) | f0 | op0); + doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0); + + wmb(); + + mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL); + } + + qp->sq.next_ind = ind; + qp->sq.head += nreq; + +err_busy: + cl_spinlock_release(&qp->sq.lock); + + UVP_EXIT(UVP_DBG_QP); + return ret; +} + + +int mthca_tavor_post_recv(struct ibv_qp *ibqp, struct _ib_recv_wr *wr, + struct _ib_recv_wr **bad_wr) +{ + struct mthca_qp *qp = to_mqp(ibqp); + uint32_t doorbell[2]; + int ret = 0; + int nreq; + int i; + int size; + int size0 = 0; + int ind; + uint8_t *wqe; + uint8_t *prev_wqe; + + UVP_ENTER(UVP_DBG_QP); + + cl_spinlock_acquire(&qp->rq.lock); + + /* XXX check that state is OK to post receive */ + + ind = qp->rq.next_ind; + if(ibqp->state == IBV_QPS_RESET) { + ret = -EBUSY; + if (bad_wr) + *bad_wr = wr; + goto err_busy; + } + + for (nreq = 0; wr; ++nreq, wr = wr->p_next) { + if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) { + nreq = 0; + + doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0); + doorbell[1] = cl_hton32(ibqp->qp_num << 8); //TODO sleybo: add qpn to qp struct + + /* + * Make sure that descriptors are written + * before doorbell is rung. + */ + mb(); + + mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_RECV_DOORBELL); + + qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB; + size0 = 0; + } + + if (mthca_wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) { + UVP_PRINT(TRACE_LEVEL_ERROR,UVP_DBG_QP,("RQ %06x full (%u head, %u tail," + " %d max, %d nreq)\n", ibqp->qp_num, + qp->rq.head, qp->rq.tail, + qp->rq.max, nreq)); + ret = -ENOMEM; + if (bad_wr) + *bad_wr = wr; + goto out; + } + + wqe = get_recv_wqe(qp, ind); + prev_wqe = qp->rq.last; + qp->rq.last = wqe; + + ((struct mthca_next_seg *) wqe)->nda_op = 0; + ((struct mthca_next_seg *) wqe)->ee_nds = + cl_hton32(MTHCA_NEXT_DBD); + ((struct mthca_next_seg *) wqe)->flags = + cl_hton32(MTHCA_NEXT_CQ_UPDATE); + + wqe += sizeof (struct mthca_next_seg); + size = sizeof (struct mthca_next_seg) / 16; + + if (unlikely((int)wr->num_ds > qp->rq.max_gs)) { + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("RQ %06x too many gathers\n",ibqp->qp_num)); + ret = -ERANGE; + if (bad_wr) + *bad_wr = wr; + goto out; + } + + for (i = 0; i < (int)wr->num_ds; ++i) { + ((struct mthca_data_seg *) wqe)->byte_count = + cl_hton32(wr->ds_array[i].length); + ((struct mthca_data_seg *) wqe)->lkey = + cl_hton32(wr->ds_array[i].lkey); + ((struct mthca_data_seg *) wqe)->addr = + cl_hton64(wr->ds_array[i].vaddr); + wqe += sizeof (struct mthca_data_seg); + size += sizeof (struct mthca_data_seg) / 16; + } + + qp->wrid[ind] = wr->wr_id; + + ((struct mthca_next_seg *) prev_wqe)->nda_op = + cl_hton32((ind << qp->rq.wqe_shift) | 1); + ((struct mthca_next_seg *) prev_wqe)->ee_nds = + cl_hton32(MTHCA_NEXT_DBD | size); + + if (!size0) + size0 = size; + + ++ind; + if (unlikely(ind >= qp->rq.max)) + ind -= qp->rq.max; + } + +out: + if (likely(nreq)) { + doorbell[0] = cl_hton32((qp->rq.next_ind << qp->rq.wqe_shift) | size0); + doorbell[1] = cl_hton32((ibqp->qp_num << 8) | (nreq & 255)); + + /* + * Make sure that descriptors are written before + * doorbell is rung. + */ + mb(); + + mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_RECV_DOORBELL); + } + + qp->rq.next_ind = ind; + qp->rq.head += nreq; + +err_busy: + cl_spinlock_release(&qp->rq.lock); + UVP_EXIT(UVP_DBG_QP); + return ret; +} + +int mthca_arbel_post_send(struct ibv_qp *ibqp, struct _ib_send_wr *wr, + struct _ib_send_wr **bad_wr) +{ + struct mthca_qp *qp = to_mqp(ibqp); + uint32_t doorbell[2]; + uint8_t *wqe; + uint8_t *prev_wqe; + int ret = 0; + int nreq; + int i; + int size; + int size0 = 0; + uint32_t f0 = unlikely(wr->send_opt & IB_SEND_OPT_FENCE) ? MTHCA_SEND_DOORBELL_FENCE : 0; + int ind; + uint8_t op0 = 0; + enum ib_wr_opcode opcode; + + UVP_ENTER(UVP_DBG_QP); + + cl_spinlock_acquire(&qp->sq.lock); + + /* XXX check that state is OK to post send */ + + ind = qp->sq.head & (qp->sq.max - 1); + if(ibqp->state == IBV_QPS_RESET) { + ret = -EBUSY; + if (bad_wr) + *bad_wr = wr; + goto err_busy; + } + + for (nreq = 0; wr; ++nreq, wr = wr->p_next) { + if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) { + nreq = 0; + + doorbell[0] = cl_hton32((MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) | + ((qp->sq.head & 0xffff) << 8) | f0 | op0); + doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0); + qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB; + size0 = 0; + f0 = unlikely(wr->send_opt & IB_SEND_OPT_FENCE) ? MTHCA_SEND_DOORBELL_FENCE : 0; + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + *qp->sq.db = cl_hton32(qp->sq.head & 0xffff); + + /* + * Make sure doorbell record is written before we + * write MMIO send doorbell. + */ + wmb(); + mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL); + + } + + if (mthca_wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) { + UVP_PRINT(TRACE_LEVEL_ERROR,UVP_DBG_QP,("SQ %06x full (%u head, %u tail," + " %d max, %d nreq)\n", ibqp->qp_num, + qp->sq.head, qp->sq.tail, + qp->sq.max, nreq)); + ret = -ENOMEM; + if (bad_wr) + *bad_wr = wr; + goto out; + } + + wqe = get_send_wqe(qp, ind); + prev_wqe = qp->sq.last; + qp->sq.last = wqe; + opcode = conv_ibal_wr_opcode(wr); + + ((struct mthca_next_seg *) wqe)->flags = + ((wr->send_opt & IB_SEND_OPT_SIGNALED) ? + cl_hton32(MTHCA_NEXT_CQ_UPDATE) : 0) | + ((wr->send_opt & IB_SEND_OPT_SOLICITED) ? + cl_hton32(MTHCA_NEXT_SOLICIT) : 0) | + cl_hton32(1); + if (opcode == MTHCA_OPCODE_SEND_IMM|| + opcode == MTHCA_OPCODE_RDMA_WRITE_IMM) + ((struct mthca_next_seg *) wqe)->imm = wr->immediate_data; + + wqe += sizeof (struct mthca_next_seg); + size = sizeof (struct mthca_next_seg) / 16; + + switch (ibqp->qp_type) { + case IB_QPT_RELIABLE_CONN: + switch (opcode) { + case MTHCA_OPCODE_ATOMIC_CS: + case MTHCA_OPCODE_ATOMIC_FA: + ((struct mthca_raddr_seg *) wqe)->raddr = + cl_hton64(wr->remote_ops.vaddr); + ((struct mthca_raddr_seg *) wqe)->rkey = + wr->remote_ops.rkey; + ((struct mthca_raddr_seg *) wqe)->reserved = 0; + + wqe += sizeof (struct mthca_raddr_seg); + + if (opcode == MTHCA_OPCODE_ATOMIC_CS) { + ((struct mthca_atomic_seg *) wqe)->swap_add = + cl_hton64(wr->remote_ops.atomic2); + ((struct mthca_atomic_seg *) wqe)->compare = + cl_hton64(wr->remote_ops.atomic1); + } else { + ((struct mthca_atomic_seg *) wqe)->swap_add = + cl_hton64(wr->remote_ops.atomic1); + ((struct mthca_atomic_seg *) wqe)->compare = 0; + } + + wqe += sizeof (struct mthca_atomic_seg); + size += (sizeof (struct mthca_raddr_seg) + + sizeof (struct mthca_atomic_seg)) / 16; + break; + + case MTHCA_OPCODE_RDMA_READ: + case MTHCA_OPCODE_RDMA_WRITE: + case MTHCA_OPCODE_RDMA_WRITE_IMM: + ((struct mthca_raddr_seg *) wqe)->raddr = + cl_hton64(wr->remote_ops.vaddr); + ((struct mthca_raddr_seg *) wqe)->rkey = + wr->remote_ops.rkey; + ((struct mthca_raddr_seg *) wqe)->reserved = 0; + wqe += sizeof (struct mthca_raddr_seg); + size += sizeof (struct mthca_raddr_seg) / 16; + break; + + default: + /* No extra segments required for sends */ + break; + } + + break; + + case IB_QPT_UNRELIABLE_CONN: + switch (opcode) { + case MTHCA_OPCODE_RDMA_WRITE: + case MTHCA_OPCODE_RDMA_WRITE_IMM: + ((struct mthca_raddr_seg *) wqe)->raddr = + cl_hton64(wr->remote_ops.vaddr); + ((struct mthca_raddr_seg *) wqe)->rkey = + wr->remote_ops.rkey; + ((struct mthca_raddr_seg *) wqe)->reserved = 0; + wqe += sizeof (struct mthca_raddr_seg); + size += sizeof (struct mthca_raddr_seg) / 16; + break; + + default: + /* No extra segments required for sends */ + break; + } + + break; + + case IB_QPT_UNRELIABLE_DGRM: + { + struct mthca_ah *ah = ((struct mthca_ah *)wr->dgrm.ud.h_av); + memcpy(((struct mthca_arbel_ud_seg *) wqe)->av, + ah->av, sizeof ( struct mthca_av)); + ((struct mthca_arbel_ud_seg *) wqe)->dqpn = wr->dgrm.ud.remote_qp; + ((struct mthca_arbel_ud_seg *) wqe)->qkey = wr->dgrm.ud.remote_qkey; + + + wqe += sizeof (struct mthca_arbel_ud_seg); + size += sizeof (struct mthca_arbel_ud_seg) / 16; + break; + } + + default: + break; + } + + if ((int)wr->num_ds > qp->sq.max_gs) { + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("SQ %06x full too many gathers\n",ibqp->qp_num)); + ret = -ERANGE; + if (bad_wr) + *bad_wr = wr; + goto out; + } + + if (wr->send_opt & IB_SEND_OPT_INLINE) { + if (wr->num_ds) { + struct mthca_inline_seg *seg = (struct mthca_inline_seg *)wqe; + uint32_t s = 0; + + wqe += sizeof *seg; + for (i = 0; i < (int)wr->num_ds; ++i) { + struct _ib_local_ds *sge = &wr->ds_array[i]; + + s += sge->length; + + if (s > (uint32_t)qp->max_inline_data) { + ret = -E2BIG; + if (bad_wr) + *bad_wr = wr; + goto out; + } + + memcpy(wqe, (void *) (uintptr_t) sge->vaddr, + sge->length); + wqe += sge->length; + } + + seg->byte_count = cl_hton32(MTHCA_INLINE_SEG | s); + size += align(s + sizeof *seg, 16) / 16; + } + } else { + + for (i = 0; i < (int)wr->num_ds; ++i) { + ((struct mthca_data_seg *) wqe)->byte_count = + cl_hton32(wr->ds_array[i].length); + ((struct mthca_data_seg *) wqe)->lkey = + cl_hton32(wr->ds_array[i].lkey); + ((struct mthca_data_seg *) wqe)->addr = + cl_hton64(wr->ds_array[i].vaddr); + wqe += sizeof (struct mthca_data_seg); + size += sizeof (struct mthca_data_seg) / 16; + } +//TODO do this also in kernel +// size += wr->num_ds * (sizeof *seg / 16); + } + + qp->wrid[ind + qp->rq.max] = wr->wr_id; + + if (opcode == MTHCA_OPCODE_INVALID) { + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("SQ %06x opcode invalid\n",ibqp->qp_num)); + ret = -EINVAL; + if (bad_wr) + *bad_wr = wr; + goto out; + } + + ((struct mthca_next_seg *) prev_wqe)->nda_op = + cl_hton32(((ind << qp->sq.wqe_shift) + + qp->send_wqe_offset) | + opcode); + wmb(); + ((struct mthca_next_seg *) prev_wqe)->ee_nds = + cl_hton32(MTHCA_NEXT_DBD | size | + ((wr->send_opt & IB_SEND_OPT_FENCE) ? + MTHCA_NEXT_FENCE : 0)); + + if (!size0) { + size0 = size; + op0 = opcode; + } + + ++ind; + if (unlikely(ind >= qp->sq.max)) + ind -= qp->sq.max; + } + +out: + if (likely(nreq)) { + doorbell[0] = cl_hton32((nreq << 24) | + ((qp->sq.head & 0xffff) << 8) | f0 | op0); + doorbell[1] = cl_hton32((ibqp->qp_num << 8) | size0); + + qp->sq.head += nreq; + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + *qp->sq.db = cl_hton32(qp->sq.head & 0xffff); + + /* + * Make sure doorbell record is written before we + * write MMIO send doorbell. + */ + wmb(); + mthca_write64(doorbell, to_mctx(ibqp->pd->context), MTHCA_SEND_DOORBELL); + } + +err_busy: + cl_spinlock_release(&qp->sq.lock); + + UVP_EXIT(UVP_DBG_QP); + + return ret; +} + +int mthca_arbel_post_recv(struct ibv_qp *ibqp, struct _ib_recv_wr *wr, + struct _ib_recv_wr **bad_wr) +{ + struct mthca_qp *qp = to_mqp(ibqp); + int ret = 0; + int nreq; + int ind; + int i; + uint8_t *wqe; + + UVP_ENTER(UVP_DBG_QP); + + cl_spinlock_acquire(&qp->rq.lock); + + /* XXX check that state is OK to post receive */ + + ind = qp->rq.head & (qp->rq.max - 1); + if(ibqp->state == IBV_QPS_RESET) { + ret = -EBUSY; + if (bad_wr) + *bad_wr = wr; + goto err_busy; + } + for (nreq = 0; wr; ++nreq, wr = wr->p_next) { + if (mthca_wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {//TODO sleybo: check the cq + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("RQ %06x full (%u head, %u tail," + " %d max, %d nreq)\n", ibqp->qp_num, + qp->rq.head, qp->rq.tail, + qp->rq.max, nreq)); + ret = -ENOMEM; + if (bad_wr) + *bad_wr = wr; + goto out; + } + + wqe = get_recv_wqe(qp, ind); + + ((struct mthca_next_seg *) wqe)->flags = 0; + + wqe += sizeof (struct mthca_next_seg); + + if (unlikely((int)wr->num_ds > qp->rq.max_gs)) { + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("RQ %06x full too many scatter\n",ibqp->qp_num)); + ret = -ERANGE; + if (bad_wr) + *bad_wr = wr; + goto out; + } + + for (i = 0; i < (int)wr->num_ds; ++i) { + ((struct mthca_data_seg *) wqe)->byte_count = + cl_hton32(wr->ds_array[i].length); + ((struct mthca_data_seg *) wqe)->lkey = + cl_hton32(wr->ds_array[i].lkey); + ((struct mthca_data_seg *) wqe)->addr = + cl_hton64(wr->ds_array[i].vaddr); + wqe += sizeof (struct mthca_data_seg); + } + + if (i < qp->rq.max_gs) { + ((struct mthca_data_seg *) wqe)->byte_count = 0; + ((struct mthca_data_seg *) wqe)->lkey = cl_hton32(MTHCA_INVAL_LKEY); + ((struct mthca_data_seg *) wqe)->addr = 0; + } + + qp->wrid[ind] = wr->wr_id; + + ++ind; + if (unlikely(ind >= qp->rq.max)) + ind -= qp->rq.max; + } +out: + if (likely(nreq)) { + qp->rq.head += nreq; + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + mb(); + *qp->rq.db = cl_hton32(qp->rq.head & 0xffff); + } + +err_busy: + cl_spinlock_release(&qp->rq.lock); + + UVP_EXIT(UVP_DBG_QP); + + return ret; +} + +int mthca_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap, + ib_qp_type_t type, struct mthca_qp *qp) +{ + int size; + int max_sq_sge; + + qp->rq.max_gs = cap->max_recv_sge; + qp->sq.max_gs = cap->max_send_sge; + max_sq_sge = align(cap->max_inline_data + sizeof (struct mthca_inline_seg), + sizeof (struct mthca_data_seg)) / sizeof (struct mthca_data_seg); + if (max_sq_sge < (int)cap->max_send_sge) + max_sq_sge = cap->max_send_sge; + + qp->wrid = cl_malloc((qp->rq.max + qp->sq.max) * sizeof (uint64_t)); + if (!qp->wrid) + return -1; + + size = sizeof (struct mthca_next_seg) + + qp->rq.max_gs * sizeof (struct mthca_data_seg); + + for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size; + qp->rq.wqe_shift++) + ; /* nothing */ + + size = max_sq_sge * sizeof (struct mthca_data_seg); + switch (type) { + case IB_QPT_UNRELIABLE_DGRM: + size += mthca_is_memfree(pd->context) ? + sizeof (struct mthca_arbel_ud_seg) : + sizeof (struct mthca_tavor_ud_seg); + break; + + case IB_QPT_UNRELIABLE_CONN: + size += sizeof (struct mthca_raddr_seg); + break; + + case IB_QPT_RELIABLE_CONN: + size += sizeof (struct mthca_raddr_seg); + /* + * An atomic op will require an atomic segment, a + * remote address segment and one scatter entry. + */ + if (size < (sizeof (struct mthca_atomic_seg) + + sizeof (struct mthca_raddr_seg) + + sizeof (struct mthca_data_seg))) + size = (sizeof (struct mthca_atomic_seg) + + sizeof (struct mthca_raddr_seg) + + sizeof (struct mthca_data_seg)); + break; + + default: + break; + } + + /* Make sure that we have enough space for a bind request */ + if (size < sizeof (struct mthca_bind_seg)) + size = sizeof (struct mthca_bind_seg); + + size += sizeof (struct mthca_next_seg); + + for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size; + qp->sq.wqe_shift++) + ; /* nothing */ + + qp->send_wqe_offset = align(qp->rq.max << qp->rq.wqe_shift, + 1 << qp->sq.wqe_shift); + + qp->buf_size = qp->send_wqe_offset + (qp->sq.max << qp->sq.wqe_shift); + + if (posix_memalign(&qp->buf, g_page_size, + align(qp->buf_size, g_page_size))) { + cl_free(qp->wrid); + return -1; + } + + memset(qp->buf, 0, qp->buf_size); + + if (mthca_is_memfree(pd->context)) { + struct mthca_next_seg *next; + struct mthca_data_seg *scatter; + int i; + uint32_t sz; + + sz = cl_hton32((sizeof (struct mthca_next_seg) + + qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16); + + for (i = 0; i < qp->rq.max; ++i) { + next = get_recv_wqe(qp, i); + next->nda_op = cl_hton32(((i + 1) & (qp->rq.max - 1)) << + qp->rq.wqe_shift); + next->ee_nds = sz; + + for (scatter = (void *) (next + 1); + (void *) scatter < (void *) ((char *)next + (1 << qp->rq.wqe_shift)); + ++scatter) + scatter->lkey = cl_hton32(MTHCA_INVAL_LKEY); + } + + for (i = 0; i < qp->sq.max; ++i) { + next = get_send_wqe(qp, i); + next->nda_op = cl_hton32((((i + 1) & (qp->sq.max - 1)) << + qp->sq.wqe_shift) + + qp->send_wqe_offset); + } + } + + qp->sq.last = get_send_wqe(qp, qp->sq.max - 1); + qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1); + + return 0; +} + +struct mthca_qp *mthca_find_qp(struct mthca_context *ctx, uint32_t qpn) +{ + int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; + + if (ctx->qp_table[tind].refcnt) + return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask]; + else + return NULL; +} + +int mthca_store_qp(struct mthca_context *ctx, uint32_t qpn, struct mthca_qp *qp) +{ + int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; + int ret = 0; + + WaitForSingleObject( ctx->qp_table_mutex, INFINITE ); + + if (!ctx->qp_table[tind].refcnt) { + ctx->qp_table[tind].table = cl_malloc( + (ctx->qp_table_mask + 1) * sizeof (struct mthca_qp *)); + if (!ctx->qp_table[tind].table) { + ret = -1; + goto out; + } + } + ++ctx->qp_table[tind].refcnt; + ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = qp; + +out: + ReleaseMutex( ctx->qp_table_mutex ); + return ret; +} + +void mthca_clear_qp(struct mthca_context *ctx, uint32_t qpn) +{ + int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; + + WaitForSingleObject( ctx->qp_table_mutex, INFINITE ); + + if (!--ctx->qp_table[tind].refcnt) + cl_free(ctx->qp_table[tind].table); + else + ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = NULL; + + ReleaseMutex( ctx->qp_table_mutex ); +} + +int mthca_free_err_wqe(struct mthca_qp *qp, int is_send, + int index, int *dbd, uint32_t *new_wqe) +{ + struct mthca_next_seg *next; + + /* + * For SRQs, all WQEs generate a CQE, so we're always at the + * end of the doorbell chain. + */ + if (qp->ibv_qp.srq) { + *new_wqe = 0; + return 0; + } + + if (is_send) + next = get_send_wqe(qp, index); + else + next = get_recv_wqe(qp, index); + + *dbd = !!(next->ee_nds & cl_hton32(MTHCA_NEXT_DBD)); + if (next->ee_nds & cl_hton32(0x3f)) + *new_wqe = (next->nda_op & cl_hton32(~0x3f)) | + (next->ee_nds & cl_hton32(0x3f)); + else + *new_wqe = 0; + + return 0; +} + diff --git a/branches/IBFD/hw/mthca/user/mlnx_uvp_srq.c b/branches/IBFD/hw/mthca/user/mlnx_uvp_srq.c new file mode 100644 index 00000000..e29bbd3c --- /dev/null +++ b/branches/IBFD/hw/mthca/user/mlnx_uvp_srq.c @@ -0,0 +1,326 @@ +/* + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include + +#include "mlnx_uvp.h" +#include "mlnx_uvp_doorbell.h" +#include "mthca_wqe.h" + +#if defined(EVENT_TRACING) +#include "mlnx_uvp_srq.tmh" +#endif + +static void *get_wqe(struct mthca_srq *srq, int n) +{ + return (uint8_t*)srq->buf + (n << srq->wqe_shift); +} + +/* + * Return a pointer to the location within a WQE that we're using as a + * link when the WQE is in the free list. We use the imm field at an + * offset of 12 bytes because in the Tavor case, posting a WQE may + * overwrite the next segment of the previous WQE, but a receive WQE + * will never touch the imm field. This avoids corrupting our free + * list if the previous WQE has already completed and been put on the + * free list when we post the next WQE. + */ +static inline int *wqe_to_link(void *wqe) +{ + return (int *) ((uint8_t*)wqe + 12); +} + +void mthca_free_srq_wqe(struct mthca_srq *srq, int ind) +{ + cl_spinlock_acquire(&srq->lock); + + if (srq->first_free >= 0) + *wqe_to_link(get_wqe(srq, srq->last_free)) = ind; + else + srq->first_free = ind; + + *wqe_to_link(get_wqe(srq, ind)) = -1; + srq->last_free = ind; + + cl_spinlock_release(&srq->lock); +} + +int mthca_tavor_post_srq_recv(struct ibv_srq *ibsrq, + struct _ib_recv_wr *wr, + struct _ib_recv_wr **bad_wr) +{ + struct mthca_srq *srq = to_msrq(ibsrq); + uint32_t doorbell[2]; + int err = 0; + int first_ind; + int ind; + int next_ind; + int nreq; + int i; + uint8_t *wqe; + uint8_t *prev_wqe; + + cl_spinlock_acquire(&srq->lock); + + first_ind = srq->first_free; + + for (nreq = 0; wr; wr = wr->p_next) { + ind = srq->first_free; + + if (ind < 0) { + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("SRQ %06x full\n", srq->srqn)); + err = -1; + *bad_wr = wr; + break; + } + + wqe = get_wqe(srq, ind); + next_ind = *wqe_to_link(wqe); + + if (next_ind < 0) { + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("SRQ %06x full\n", srq->srqn)); + err = -ENOMEM; + *bad_wr = wr; + break; + } + + prev_wqe = srq->last; + srq->last = wqe; + + ((struct mthca_next_seg *) wqe)->nda_op = 0; + ((struct mthca_next_seg *) wqe)->ee_nds = 0; + /* flags field will always remain 0 */ + + wqe += sizeof (struct mthca_next_seg); + + if (unlikely((int)wr->num_ds > srq->max_gs)) { + err = -1; + *bad_wr = wr; + srq->last = prev_wqe; + break; + } + + for (i = 0; i < (int)wr->num_ds; ++i) { + ((struct mthca_data_seg *) wqe)->byte_count = + cl_hton32(wr->ds_array[i].length); + ((struct mthca_data_seg *) wqe)->lkey = + cl_hton32(wr->ds_array[i].lkey); + ((struct mthca_data_seg *) wqe)->addr = + htonll(wr->ds_array[i].vaddr); + wqe += sizeof (struct mthca_data_seg); + } + + if (i < srq->max_gs) { + ((struct mthca_data_seg *) wqe)->byte_count = 0; + ((struct mthca_data_seg *) wqe)->lkey = cl_hton32(MTHCA_INVAL_LKEY); + ((struct mthca_data_seg *) wqe)->addr = 0; + } + + ((struct mthca_next_seg *) prev_wqe)->nda_op = + cl_hton32((ind << srq->wqe_shift) | 1); + mb(); + ((struct mthca_next_seg *) prev_wqe)->ee_nds = + cl_hton32(MTHCA_NEXT_DBD); + + srq->wrid[ind] = wr->wr_id; + srq->first_free = next_ind; + + if (++nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB) { + nreq = 0; + + doorbell[0] = cl_hton32(first_ind << srq->wqe_shift); + doorbell[1] = cl_hton32(srq->srqn << 8); + + /* + * Make sure that descriptors are written + * before doorbell is rung. + */ + wmb(); + + mthca_write64(doorbell, to_mctx(ibsrq->context), MTHCA_RECV_DOORBELL); + + first_ind = srq->first_free; + } + } + + if (nreq) { + doorbell[0] = cl_hton32(first_ind << srq->wqe_shift); + doorbell[1] = cl_hton32((srq->srqn << 8) | nreq); + + /* + * Make sure that descriptors are written before + * doorbell is rung. + */ + wmb(); + + mthca_write64(doorbell, to_mctx(ibsrq->context), MTHCA_RECV_DOORBELL); + } + + cl_spinlock_release(&srq->lock); + return err; +} + +int mthca_arbel_post_srq_recv(struct ibv_srq *ibsrq, + struct _ib_recv_wr *wr, + struct _ib_recv_wr **bad_wr) +{ + struct mthca_srq *srq = to_msrq(ibsrq); + int err = 0; + int ind; + int next_ind; + int nreq; + int i; + uint8_t *wqe; + + cl_spinlock_acquire(&srq->lock); + + for (nreq = 0; wr; ++nreq, wr = wr->p_next) { + ind = srq->first_free; + + if (ind < 0) { + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("SRQ %06x full\n", srq->srqn)); + err = -ENOMEM; + *bad_wr = wr; + break; + } + + wqe = get_wqe(srq, ind); + next_ind = *wqe_to_link(wqe); + + if (next_ind < 0) { + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_LOW ,("SRQ %06x full\n", srq->srqn)); + err = -ENOMEM; + *bad_wr = wr; + break; + } + + ((struct mthca_next_seg *) wqe)->nda_op = + cl_hton32((next_ind << srq->wqe_shift) | 1); + ((struct mthca_next_seg *) wqe)->ee_nds = 0; + /* flags field will always remain 0 */ + + wqe += sizeof (struct mthca_next_seg); + + if (unlikely((int)wr->num_ds > srq->max_gs)) { + err = -1; + *bad_wr = wr; + break; + } + + for (i = 0; i < (int)wr->num_ds; ++i) { + ((struct mthca_data_seg *) wqe)->byte_count = + cl_hton32(wr->ds_array[i].length); + ((struct mthca_data_seg *) wqe)->lkey = + cl_hton32(wr->ds_array[i].lkey); + ((struct mthca_data_seg *) wqe)->addr = + htonll(wr->ds_array[i].vaddr); + wqe += sizeof (struct mthca_data_seg); + } + + if (i < srq->max_gs) { + ((struct mthca_data_seg *) wqe)->byte_count = 0; + ((struct mthca_data_seg *) wqe)->lkey = cl_hton32(MTHCA_INVAL_LKEY); + ((struct mthca_data_seg *) wqe)->addr = 0; + } + + srq->wrid[ind] = wr->wr_id; + srq->first_free = next_ind; + } + + if (likely(nreq)) { + srq->counter += (uint16_t)nreq; + + /* + * Make sure that descriptors are written before + * we write doorbell record. + */ + wmb(); + *srq->db = cl_hton32(srq->counter); + } + + cl_spinlock_release(&srq->lock); + return err; +} + +int mthca_alloc_srq_buf(struct ibv_pd *pd, struct ibv_srq_attr *attr, + struct mthca_srq *srq) +{ + struct mthca_data_seg *scatter; + uint8_t *wqe; + int size; + int i; + + srq->wrid = cl_malloc(srq->max * sizeof (uint64_t)); + if (!srq->wrid) + return -1; + + size = sizeof (struct mthca_next_seg) + + srq->max_gs * sizeof (struct mthca_data_seg); + + for (srq->wqe_shift = 6; 1 << srq->wqe_shift < size; ++srq->wqe_shift) + ; /* nothing */ + + srq->buf_size = srq->max << srq->wqe_shift; + + if (posix_memalign(&srq->buf, g_page_size, + align(srq->buf_size, g_page_size))) { + cl_free(srq->wrid); + return -1; + } + + cl_memclr(srq->buf, srq->buf_size); + + /* + * Now initialize the SRQ buffer so that all of the WQEs are + * linked into the list of free WQEs. In addition, set the + * scatter list L_Keys to the sentry value of 0x100. + */ + + for (i = 0; i < srq->max; ++i) { + wqe = get_wqe(srq, i); + + *wqe_to_link(wqe) = i < srq->max - 1 ? i + 1 : -1; + + for (scatter = (struct mthca_data_seg *)(wqe + sizeof (struct mthca_next_seg)); + (void *) scatter < (void*)(wqe + (1 << srq->wqe_shift)); + ++scatter) + scatter->lkey = cl_hton32(MTHCA_INVAL_LKEY); + } + + srq->first_free = 0; + srq->last_free = srq->max - 1; + srq->last = get_wqe(srq, srq->max - 1); + + return 0; +} diff --git a/branches/IBFD/hw/mthca/user/mlnx_uvp_verbs.c b/branches/IBFD/hw/mthca/user/mlnx_uvp_verbs.c new file mode 100644 index 00000000..c9468ee8 --- /dev/null +++ b/branches/IBFD/hw/mthca/user/mlnx_uvp_verbs.c @@ -0,0 +1,532 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#include + +#include "mlnx_uvp.h" +#include "mx_abi.h" +#include "mthca_wqe.h" + + +#if defined(EVENT_TRACING) +#include "mlnx_uvp_verbs.tmh" +#endif + +struct ibv_pd *mthca_alloc_pd(struct ibv_context *context, struct ibv_alloc_pd_resp *resp) +{ + struct mthca_pd *pd; + + pd = cl_zalloc(sizeof *pd); + if (!pd) + goto err_malloc; + + if (!mthca_is_memfree(context)) { + pd->ah_list = NULL; + pd->ah_mutex = CreateMutex( NULL, FALSE, NULL ); + if (!pd->ah_mutex) + goto err_mutex; + } + + /* fill response fields */ + pd->ibv_pd.context = context; + pd->ibv_pd.handle = resp->pd_handle; + pd->pdn = resp->pdn; + + return &pd->ibv_pd; + +err_mutex: + cl_free(pd); +err_malloc: + return NULL; +} + +int mthca_free_pd(struct ibv_pd *ibv_pd) +{ + struct mthca_pd *pd = to_mpd(ibv_pd); + if (!mthca_is_memfree(ibv_pd->context)) { + struct mthca_ah_page *page, *next_page; + WaitForSingleObject( pd->ah_mutex, INFINITE ); + for (page = pd->ah_list; page; page = next_page) { + next_page = page->next; + #ifdef NOT_USE_VIRTUAL_ALLOC + cl_free(page->buf); + #else + VirtualFree( page->buf, 0, MEM_RELEASE); + #endif + cl_free(page); + } + ReleaseMutex( pd->ah_mutex ); + CloseHandle(pd->ah_mutex); + } + cl_free(pd); + return 0; +} + +/* allocate create_cq infrastructure and fill it's request parameters structure */ +struct ibv_cq *mthca_create_cq_pre(struct ibv_context *context, int *p_cqe, + struct ibv_create_cq *req) +{ + struct mthca_cq *cq; + int nent; + int ret; + + /* Sanity check CQ size before proceeding */ + if (*p_cqe > 131072) + goto exit; + + cq = cl_zalloc(sizeof *cq); + if (!cq) + goto exit; + + cl_spinlock_construct(&cq->lock); + if (cl_spinlock_init(&cq->lock)) + goto err; + + for (nent = 1; nent <= *p_cqe; nent <<= 1) + ; /* nothing */ + + if (posix_memalign(&cq->buf, g_page_size, + align(nent * MTHCA_CQ_ENTRY_SIZE, g_page_size))) + goto err_memalign; + + mthca_init_cq_buf(cq, nent); + + if (mthca_is_memfree(context)) { + cq->set_ci_db_index = mthca_alloc_db(to_mctx(context)->db_tab, + MTHCA_DB_TYPE_CQ_SET_CI, + &cq->set_ci_db); + if (cq->set_ci_db_index < 0) + goto err_unreg; + + cq->arm_db_index = mthca_alloc_db(to_mctx(context)->db_tab, + MTHCA_DB_TYPE_CQ_ARM, + &cq->arm_db); + if (cq->arm_db_index < 0) + goto err_set_db; + + cq->u_arm_db_index = mthca_alloc_db(to_mctx(context)->db_tab, + MTHCA_DB_TYPE_CQ_ARM, + &cq->p_u_arm_sn); + if (cq->u_arm_db_index < 0) + goto err_arm_db; + + *cq->p_u_arm_sn = 1; + + req->arm_db_page = db_align(cq->arm_db); + req->set_db_page = db_align(cq->set_ci_db); + req->u_arm_db_page = (uint64_t)(ULONG_PTR)cq->p_u_arm_sn; + req->arm_db_index = cq->arm_db_index; + req->set_db_index = cq->set_ci_db_index; + req->u_arm_db_index = cq->u_arm_db_index; + } + + req->mr.start = (uint64_t)(ULONG_PTR)cq->buf; + req->mr.length = nent * MTHCA_CQ_ENTRY_SIZE; + req->mr.hca_va = 0; + req->mr.pd_handle = to_mctx(context)->pd->handle; + req->mr.pdn = to_mpd(to_mctx(context)->pd)->pdn; + req->mr.access_flags = MTHCA_ACCESS_LOCAL_WRITE; + req->user_handle = (uint64_t)(ULONG_PTR)cq; +#if 1 + req->cqe = *p_cqe; + *p_cqe = nent-1; +// *p_cqe = *p_cqe; // return the same value +// cq->ibv_cq.cqe = nent -1; +#else + req->cqe = nent; + *p_cqe = *p_cqe; // return the same value +#endif + return &cq->ibv_cq; + +err_arm_db: + if (mthca_is_memfree(context)) + mthca_free_db(to_mctx(context)->db_tab, MTHCA_DB_TYPE_CQ_SET_CI, + cq->arm_db_index); + +err_set_db: + if (mthca_is_memfree(context)) + mthca_free_db(to_mctx(context)->db_tab, MTHCA_DB_TYPE_CQ_SET_CI, + cq->set_ci_db_index); + +err_unreg: + cl_free(cq->buf); + +err_memalign: + cl_spinlock_destroy(&cq->lock); + +err: + cl_free(cq); + +exit: + return ERR_PTR(-ENOMEM); +} + +struct ibv_cq *mthca_create_cq_post(struct ibv_context *context, + struct ibv_create_cq_resp *resp) +{ + struct mthca_cq *cq; + int ret; + + cq = (struct mthca_cq *)(ULONG_PTR)resp->user_handle; + + cq->cqn = resp->cqn; + cq->mr.handle = resp->mr.mr_handle; + cq->mr.lkey = resp->mr.lkey; + cq->mr.rkey = resp->mr.rkey; + cq->mr.pd = to_mctx(context)->pd; + cq->mr.context = context; + cq->ibv_cq.cqe = resp->cqe; + cq->ibv_cq.handle = resp->cq_handle; + cq->ibv_cq.context = context; + + if (mthca_is_memfree(context)) { + mthca_set_db_qn(cq->set_ci_db, MTHCA_DB_TYPE_CQ_SET_CI, cq->cqn); + mthca_set_db_qn(cq->arm_db, MTHCA_DB_TYPE_CQ_ARM, cq->cqn); + } + + return &cq->ibv_cq; + +} + +int mthca_destroy_cq(struct ibv_cq *cq) +{ + int ret; + + if (mthca_is_memfree(cq->context)) { + mthca_free_db(to_mctx(cq->context)->db_tab, MTHCA_DB_TYPE_CQ_SET_CI, + to_mcq(cq)->u_arm_db_index); + mthca_free_db(to_mctx(cq->context)->db_tab, MTHCA_DB_TYPE_CQ_SET_CI, + to_mcq(cq)->set_ci_db_index); + mthca_free_db(to_mctx(cq->context)->db_tab, MTHCA_DB_TYPE_CQ_ARM, + to_mcq(cq)->arm_db_index); + } + +#ifdef NOT_USE_VIRTUAL_ALLOC + cl_free(to_mcq(cq)->buf); +#else + VirtualFree( to_mcq(cq)->buf, 0, MEM_RELEASE); +#endif + + + cl_spinlock_destroy(&((struct mthca_cq *)cq)->lock); + cl_free(to_mcq(cq)); + + return 0; +} + +int align_queue_size(struct ibv_context *context, int size, int spare) +{ + int ret; + + /* + * If someone asks for a 0-sized queue, presumably they're not + * going to use it. So don't mess with their size. + */ + if (!size) + return 0; + + if (mthca_is_memfree(context)) { + for (ret = 1; ret < size + spare; ret <<= 1) + ; /* nothing */ + + return ret; + } else + return size + spare; +} + +struct ibv_qp *mthca_create_qp_pre(struct ibv_pd *pd, + struct ibv_qp_init_attr *attr, struct ibv_create_qp *req) +{ + struct mthca_qp *qp; + struct ibv_context *context = pd->context; + int ret = -ENOMEM; + + UVP_ENTER(UVP_DBG_QP); + /* Sanity check QP size before proceeding */ + if (attr->cap.max_send_wr > 65536 || + attr->cap.max_recv_wr > 65536 || + attr->cap.max_send_sge > 64 || + attr->cap.max_recv_sge > 64 || + attr->cap.max_inline_data > 1024) { + ret = -EINVAL; + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("sanity checks failed (%d)\n",ret)); + goto exit; + } + + qp = cl_zalloc(sizeof *qp); + if (!qp) { + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("cl_malloc failed (%d)\n",ret)); + goto err_nomem; + } + + qp->sq.max = align_queue_size(context, attr->cap.max_send_wr, 0); + qp->rq.max = align_queue_size(context, attr->cap.max_recv_wr, 0); + + if (mthca_alloc_qp_buf(pd, &attr->cap, attr->qp_type, qp)) { + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("mthca_alloc_qp_buf failed (%d)\n",ret)); + goto err_nomem; + } + + mthca_init_qp_indices(qp); + + cl_spinlock_construct(&qp->sq.lock); + if (cl_spinlock_init(&qp->sq.lock)) { + ret = -EFAULT; + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("cl_spinlock_init failed for sq (%d)\n",ret)); + goto err_spinlock_sq; + } + + cl_spinlock_construct(&qp->rq.lock); + if (cl_spinlock_init(&qp->rq.lock)) { + ret = -EFAULT; + UVP_PRINT(TRACE_LEVEL_ERROR ,UVP_DBG_QP ,("cl_spinlock_init failed for rq (%d)\n",ret)); + goto err_spinlock_rq; + } + + if (mthca_is_memfree(context)) { + qp->sq.db_index = mthca_alloc_db(to_mctx(context)->db_tab, + MTHCA_DB_TYPE_SQ, + &qp->sq.db); + if (qp->sq.db_index < 0) + goto err_sq_db; + + qp->rq.db_index = mthca_alloc_db(to_mctx(context)->db_tab, + MTHCA_DB_TYPE_RQ, + &qp->rq.db); + if (qp->rq.db_index < 0) + goto err_rq_db; + + req->sq_db_page = db_align(qp->sq.db); + req->rq_db_page = db_align(qp->rq.db); + req->sq_db_index = qp->sq.db_index; + req->rq_db_index = qp->rq.db_index; + } + + // fill the rest qp fields + qp->ibv_qp.pd = pd; + qp->ibv_qp.context= pd->context; + qp->ibv_qp.send_cq = attr->send_cq; + qp->ibv_qp.recv_cq = attr->recv_cq; + qp->ibv_qp.srq = attr->srq; + qp->ibv_qp.state = IBV_QPS_RESET; + qp->ibv_qp.qp_type = attr->qp_type; + + // fill the rest request fields + req->mr.start = (uint64_t)(ULONG_PTR)qp->buf; + req->mr.length = qp->buf_size; + req->mr.hca_va = 0; + req->mr.pd_handle = pd->handle; + req->mr.pdn = to_mpd(pd)->pdn; + req->mr.access_flags = 0; //local read + req->user_handle = (uint64_t)(ULONG_PTR)qp; + req->send_cq_handle = attr->send_cq->handle; + req->recv_cq_handle = attr->recv_cq->handle; + req->srq_handle = (attr->srq) ? attr->srq->handle : 0; + req->max_send_wr = attr->cap.max_send_wr; + req->max_recv_wr = attr->cap.max_recv_wr; + req->max_send_sge = attr->cap.max_send_sge; + req->max_recv_sge = attr->cap.max_recv_sge; + req->max_inline_data = attr->cap.max_inline_data; + req->sq_sig_all = (uint8_t)attr->sq_sig_all; + req->qp_type = attr->qp_type; + req->is_srq = !!attr->srq; + + + UVP_EXIT(UVP_DBG_QP); + return &qp->ibv_qp; + +err_rq_db: + if (mthca_is_memfree(context)) + mthca_free_db(to_mctx(context)->db_tab, + MTHCA_DB_TYPE_SQ, qp->sq.db_index); + +err_sq_db: + cl_spinlock_destroy(&qp->rq.lock); + +err_spinlock_rq: + cl_spinlock_destroy(&qp->sq.lock); + +err_spinlock_sq: + cl_free(qp->wrid); +#ifdef NOT_USE_VIRTUAL_ALLOC + cl_free(qp->buf); +#else + VirtualFree( qp->buf, 0, MEM_RELEASE); +#endif + +err_nomem: + cl_free(qp); + +exit: + + UVP_EXIT(UVP_DBG_QP); + return ERR_PTR(ret); +} + +struct ibv_qp *mthca_create_qp_post(struct ibv_pd *pd, + struct ibv_create_qp_resp *resp) +{ + struct mthca_qp *qp; + int ret; + UVP_ENTER(UVP_DBG_QP); + qp = (struct mthca_qp *)(ULONG_PTR)resp->user_handle; + + qp->ibv_qp.handle = resp->qp_handle; + qp->ibv_qp.qp_num = resp->qpn; + qp->sq.max = resp->max_send_wr; + qp->rq.max = resp->max_recv_wr; + qp->sq.max_gs = resp->max_send_sge; + qp->rq.max_gs = resp->max_recv_sge; + qp->max_inline_data = resp->max_inline_data; + qp->mr.handle = resp->mr.mr_handle; + qp->mr.lkey = resp->mr.lkey; + qp->mr.rkey = resp->mr.rkey; + qp->mr.pd = pd; + qp->mr.context = pd->context; + + if (mthca_is_memfree(pd->context)) { + mthca_set_db_qn(qp->sq.db, MTHCA_DB_TYPE_SQ, qp->ibv_qp.qp_num); + mthca_set_db_qn(qp->rq.db, MTHCA_DB_TYPE_RQ, qp->ibv_qp.qp_num); + } + + ret = mthca_store_qp(to_mctx(pd->context), qp->ibv_qp.qp_num, qp); + if (ret) + goto err_store_qp; + + UVP_EXIT(UVP_DBG_QP); + return &qp->ibv_qp; + +err_store_qp: + UVP_EXIT(UVP_DBG_QP); + return ERR_PTR(ret); +} + + +int mthca_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask) +{ + int ret = 0; + + if (attr_mask & IBV_QP_STATE) + qp->state = attr->qp_state; + + if ((attr_mask & IBV_QP_STATE) && + (attr->qp_state == IBV_QPS_RESET)) { + mthca_cq_clean(to_mcq(qp->recv_cq), qp->qp_num, + qp->srq ? to_msrq(qp->srq) : NULL); + if (qp->send_cq != qp->recv_cq) + mthca_cq_clean(to_mcq(qp->send_cq), qp->qp_num, NULL); + + mthca_init_qp_indices(to_mqp(qp)); + + if (mthca_is_memfree(qp->pd->context)) { + *to_mqp(qp)->sq.db = 0; + *to_mqp(qp)->rq.db = 0; + } + } + + return ret; +} + + +void mthca_destroy_qp_pre(struct ibv_qp *qp) +{ + int ret; + + mthca_cq_clean(to_mcq(qp->recv_cq), qp->qp_num, + qp->srq ? to_msrq(qp->srq) : NULL); + if (qp->send_cq != qp->recv_cq) + mthca_cq_clean(to_mcq(qp->send_cq), qp->qp_num, NULL); + + cl_spinlock_acquire(&to_mcq(qp->send_cq)->lock); + if (qp->send_cq != qp->recv_cq) + cl_spinlock_acquire(&to_mcq(qp->recv_cq)->lock); + mthca_clear_qp(to_mctx(qp->pd->context), qp->qp_num); + if (qp->send_cq != qp->recv_cq) + cl_spinlock_release(&to_mcq(qp->recv_cq)->lock); + cl_spinlock_release(&to_mcq(qp->send_cq)->lock); +} + +void mthca_destroy_qp_post(struct ibv_qp *qp, int ret) +{ + if (ret) { + cl_spinlock_acquire(&to_mcq(qp->send_cq)->lock); + if (qp->send_cq != qp->recv_cq) + cl_spinlock_acquire(&to_mcq(qp->recv_cq)->lock); + mthca_store_qp(to_mctx(qp->pd->context), qp->qp_num, to_mqp(qp)); + if (qp->send_cq != qp->recv_cq) + cl_spinlock_release(&to_mcq(qp->recv_cq)->lock); + cl_spinlock_release(&to_mcq(qp->send_cq)->lock); + } + else { + if (mthca_is_memfree(qp->pd->context)) { + mthca_free_db(to_mctx(qp->pd->context)->db_tab, MTHCA_DB_TYPE_RQ, + to_mqp(qp)->rq.db_index); + mthca_free_db(to_mctx(qp->pd->context)->db_tab, MTHCA_DB_TYPE_SQ, + to_mqp(qp)->sq.db_index); + } + + cl_spinlock_destroy(&((struct mthca_qp *)qp)->sq.lock); + cl_spinlock_destroy(&((struct mthca_qp *)qp)->rq.lock); + +#ifdef NOT_USE_VIRTUAL_ALLOC + cl_free(to_mqp(qp)->buf); +#else + VirtualFree( to_mqp(qp)->buf, 0, MEM_RELEASE); +#endif + cl_free(to_mqp(qp)->wrid); + cl_free(to_mqp(qp)); + } + +} + +int mthca_attach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid) +{ +#ifdef WIN_TO_BE_CHANGED + return ibv_cmd_attach_mcast(qp, gid, lid); +#else + return -ENOSYS; +#endif +} + +int mthca_detach_mcast(struct ibv_qp *qp, union ibv_gid *gid, uint16_t lid) +{ +#ifdef WIN_TO_BE_CHANGED + return ibv_cmd_detach_mcast(qp, gid, lid); +#else + return -ENOSYS; +#endif +} + diff --git a/branches/IBFD/hw/mthca/user/mlnx_uvp_verbs.h b/branches/IBFD/hw/mthca/user/mlnx_uvp_verbs.h new file mode 100644 index 00000000..5ea2dabb --- /dev/null +++ b/branches/IBFD/hw/mthca/user/mlnx_uvp_verbs.h @@ -0,0 +1,490 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#ifndef MLNX_UVP_VERBS_H +#define MLNX_UVP_VERBS_H + +#include + +#ifdef __cplusplus +# define BEGIN_C_DECLS extern "C" { +# define END_C_DECLS } +#else /* !__cplusplus */ +# define BEGIN_C_DECLS +# define END_C_DECLS +#endif /* __cplusplus */ + +BEGIN_C_DECLS + +union ibv_gid { + uint8_t raw[16]; + struct { + uint64_t subnet_prefix; + uint64_t interface_id; + } global; +}; + +enum ibv_node_type { + IBV_NODE_CA = 1, + IBV_NODE_SWITCH, + IBV_NODE_ROUTER +}; + +enum ibv_device_cap_flags { + IBV_DEVICE_RESIZE_MAX_WR = 1, + IBV_DEVICE_BAD_PKEY_CNTR = 1 << 1, + IBV_DEVICE_BAD_QKEY_CNTR = 1 << 2, + IBV_DEVICE_RAW_MULTI = 1 << 3, + IBV_DEVICE_AUTO_PATH_MIG = 1 << 4, + IBV_DEVICE_CHANGE_PHY_PORT = 1 << 5, + IBV_DEVICE_UD_AV_PORT_ENFORCE = 1 << 6, + IBV_DEVICE_CURR_QP_STATE_MOD = 1 << 7, + IBV_DEVICE_SHUTDOWN_PORT = 1 << 8, + IBV_DEVICE_INIT_TYPE = 1 << 9, + IBV_DEVICE_PORT_ACTIVE_EVENT = 1 << 10, + IBV_DEVICE_SYS_IMAGE_GUID = 1 << 11, + IBV_DEVICE_RC_RNR_NAK_GEN = 1 << 12, + IBV_DEVICE_SRQ_RESIZE = 1 << 13, + IBV_DEVICE_N_NOTIFY_CQ = 1 << 14, +}; + +enum ibv_atomic_cap { + IBV_ATOMIC_NONE, + IBV_ATOMIC_HCA, + IBV_ATOMIC_GLOB +}; + +struct ibv_device_attr { + char fw_ver[64]; + uint64_t node_guid; + uint64_t sys_image_guid; + uint64_t max_mr_size; + uint64_t page_size_cap; + uint32_t vendor_id; + uint32_t vendor_part_id; + uint32_t hw_ver; + int max_qp; + int max_qp_wr; + int device_cap_flags; + int max_sge; + int max_sge_rd; + int max_cq; + int max_cqe; + int max_mr; + int max_pd; + int max_qp_rd_atom; + int max_ee_rd_atom; + int max_res_rd_atom; + int max_qp_init_rd_atom; + int max_ee_init_rd_atom; + enum ibv_atomic_cap atomic_cap; + int max_ee; + int max_rdd; + int max_mw; + int max_raw_ipv6_qp; + int max_raw_ethy_qp; + int max_mcast_grp; + int max_mcast_qp_attach; + int max_total_mcast_qp_attach; + int max_ah; + int max_fmr; + int max_map_per_fmr; + int max_srq; + int max_srq_wr; + int max_srq_sge; + uint16_t max_pkeys; + uint8_t local_ca_ack_delay; + uint8_t phys_port_cnt; +}; + +enum ibv_mtu { + IBV_MTU_256 = 1, + IBV_MTU_512 = 2, + IBV_MTU_1024 = 3, + IBV_MTU_2048 = 4, + IBV_MTU_4096 = 5 +}; + +enum ibv_port_state { + IBV_PORT_NOP = 0, + IBV_PORT_DOWN = 1, + IBV_PORT_INIT = 2, + IBV_PORT_ARMED = 3, + IBV_PORT_ACTIVE = 4, + IBV_PORT_ACTIVE_DEFER = 5 +}; + +struct ibv_port_attr { + enum ibv_port_state state; + enum ibv_mtu max_mtu; + enum ibv_mtu active_mtu; + int gid_tbl_len; + uint32_t port_cap_flags; + uint32_t max_msg_sz; + uint32_t bad_pkey_cntr; + uint32_t qkey_viol_cntr; + uint16_t pkey_tbl_len; + uint16_t lid; + uint16_t sm_lid; + uint8_t lmc; + uint8_t max_vl_num; + uint8_t sm_sl; + uint8_t subnet_timeout; + uint8_t init_type_reply; + uint8_t active_width; + uint8_t active_speed; + uint8_t phys_state; +}; + +enum ibv_event_type { + IBV_EVENT_CQ_ERR, + IBV_EVENT_QP_FATAL, + IBV_EVENT_QP_REQ_ERR, + IBV_EVENT_QP_ACCESS_ERR, + IBV_EVENT_COMM_EST, + IBV_EVENT_SQ_DRAINED, + IBV_EVENT_PATH_MIG, + IBV_EVENT_PATH_MIG_ERR, + IBV_EVENT_DEVICE_FATAL, + IBV_EVENT_PORT_ACTIVE, + IBV_EVENT_PORT_ERR, + IBV_EVENT_LID_CHANGE, + IBV_EVENT_PKEY_CHANGE, + IBV_EVENT_SM_CHANGE, + IBV_EVENT_SRQ_ERR, + IBV_EVENT_SRQ_LIMIT_REACHED, + IBV_EVENT_QP_LAST_WQE_REACHED +}; + +struct ibv_async_event { + union { + struct ibv_cq *cq; + struct ibv_qp *qp; + struct ibv_srq *srq; + int port_num; + } element; + enum ibv_event_type event_type; +}; + +enum ibv_access_flags { + IBV_ACCESS_LOCAL_WRITE = 1, + IBV_ACCESS_REMOTE_WRITE = (1<<1), + IBV_ACCESS_REMOTE_READ = (1<<2), + IBV_ACCESS_REMOTE_ATOMIC = (1<<3), + IBV_ACCESS_MW_BIND = (1<<4) +}; + +struct ibv_pd { + struct ibv_context *context; + uint64_t handle; +}; + +struct ibv_mr { + struct ibv_context *context; + struct ibv_pd *pd; + uint64_t handle; + uint32_t lkey; + uint32_t rkey; +}; + +struct ibv_global_route { + ib_gid_t dgid; + uint32_t flow_label; + uint8_t sgid_index; + uint8_t hop_limit; + uint8_t traffic_class; +}; + +struct ibv_ah_attr { + struct ibv_global_route grh; + uint16_t dlid; + uint8_t sl; + uint8_t src_path_bits; + uint8_t static_rate; + uint8_t is_global; + uint8_t port_num; +}; + + +enum ib_cq_notify { + IB_CQ_SOLICITED, + IB_CQ_NEXT_COMP +}; + +enum ibv_srq_attr_mask { + IBV_SRQ_MAX_WR = 1 << 0, + IBV_SRQ_LIMIT = 1 << 1, +}; + +struct ibv_srq_attr { + uint32_t max_wr; + uint32_t max_sge; + uint32_t srq_limit; +}; + +struct ibv_srq_init_attr { + void *srq_context; + struct ibv_srq_attr attr; +}; + +struct ibv_qp_cap { + uint32_t max_send_wr; + uint32_t max_recv_wr; + uint32_t max_send_sge; + uint32_t max_recv_sge; + uint32_t max_inline_data; +}; + +struct ibv_qp_init_attr { + void *qp_context; + struct ibv_cq *send_cq; + struct ibv_cq *recv_cq; + struct ibv_srq *srq; + struct ibv_qp_cap cap; + ib_qp_type_t qp_type; + int sq_sig_all; +}; + +enum ibv_qp_attr_mask { + IBV_QP_STATE = 1 << 0, + IBV_QP_CUR_STATE = 1 << 1, + IBV_QP_EN_SQD_ASYNC_NOTIFY = 1 << 2, + IBV_QP_ACCESS_FLAGS = 1 << 3, + IBV_QP_PKEY_INDEX = 1 << 4, + IBV_QP_PORT = 1 << 5, + IBV_QP_QKEY = 1 << 6, + IBV_QP_AV = 1 << 7, + IBV_QP_PATH_MTU = 1 << 8, + IBV_QP_TIMEOUT = 1 << 9, + IBV_QP_RETRY_CNT = 1 << 10, + IBV_QP_RNR_RETRY = 1 << 11, + IBV_QP_RQ_PSN = 1 << 12, + IBV_QP_MAX_QP_RD_ATOMIC = 1 << 13, + IBV_QP_ALT_PATH = 1 << 14, + IBV_QP_MIN_RNR_TIMER = 1 << 15, + IBV_QP_SQ_PSN = 1 << 16, + IBV_QP_MAX_DEST_RD_ATOMIC = 1 << 17, + IBV_QP_PATH_MIG_STATE = 1 << 18, + IBV_QP_CAP = 1 << 19, + IBV_QP_DEST_QPN = 1 << 20 +}; + +enum ibv_qp_state { + IBV_QPS_RESET, + IBV_QPS_INIT, + IBV_QPS_RTR, + IBV_QPS_RTS, + IBV_QPS_SQD, + IBV_QPS_SQE, + IBV_QPS_ERR +}; + +enum ibv_mig_state { + IBV_MIG_MIGRATED, + IBV_MIG_REARM, + IBV_MIG_ARMED +}; + +struct ibv_qp_attr { + enum ibv_qp_state qp_state; + enum ibv_qp_state cur_qp_state; + enum ibv_mtu path_mtu; + enum ibv_mig_state path_mig_state; + uint32_t qkey; + uint32_t rq_psn; + uint32_t sq_psn; + uint32_t dest_qp_num; + int qp_access_flags; + struct ibv_qp_cap cap; + struct ibv_ah_attr ah_attr; + struct ibv_ah_attr alt_ah_attr; + uint16_t pkey_index; + uint16_t alt_pkey_index; + uint8_t en_sqd_async_notify; + uint8_t sq_draining; + uint8_t max_rd_atomic; + uint8_t max_dest_rd_atomic; + uint8_t min_rnr_timer; + uint8_t port_num; + uint8_t timeout; + uint8_t retry_cnt; + uint8_t rnr_retry; + uint8_t alt_port_num; + uint8_t alt_timeout; +}; + + +enum ibv_send_flags { + IBV_SEND_FENCE = 1 << 0, + IBV_SEND_SIGNALED = 1 << 1, + IBV_SEND_SOLICITED = 1 << 2, + IBV_SEND_INLINE = 1 << 3 +}; + +struct ibv_sge { + uint64_t addr; + uint32_t length; + uint32_t lkey; +}; + +struct ibv_send_wr { + struct ibv_send_wr *next; + uint64_t wr_id; + struct ibv_sge *sg_list; + int num_sge; + enum ibv_wr_opcode opcode; + enum ibv_send_flags send_flags; + uint32_t imm_data; /* in network byte order */ + union { + struct { + uint64_t remote_addr; + uint32_t rkey; + } rdma; + struct { + uint64_t remote_addr; + uint64_t compare_add; + uint64_t swap; + uint32_t rkey; + } atomic; + struct { + struct mthca_ah *ah; + uint32_t remote_qpn; + uint32_t remote_qkey; + } ud; + } wr; +}; + +struct ibv_recv_wr { + struct ibv_recv_wr *next; + uint64_t wr_id; + struct ibv_sge *sg_list; + int num_sge; +}; + +typedef enum MTHCA_QP_ACCESS_FLAGS { + MTHCA_ACCESS_LOCAL_WRITE = 1, + MTHCA_ACCESS_REMOTE_WRITE = (1<<1), + MTHCA_ACCESS_REMOTE_READ = (1<<2), + MTHCA_ACCESS_REMOTE_ATOMIC = (1<<3), + MTHCA_ACCESS_MW_BIND = (1<<4) +} mthca_qp_access_t; + + +struct ibv_srq { + struct ibv_pd *pd; + uint64_t handle; + struct ibv_context *context; +}; + +struct ibv_qp { + struct ibv_pd *pd; + struct ibv_cq *send_cq; + struct ibv_cq *recv_cq; + struct ibv_srq *srq; + uint64_t handle; + uint32_t qp_num; + enum ibv_qp_state state; + ib_qp_type_t qp_type; + struct ibv_context *context; +}; + +struct ibv_cq { + uint64_t handle; + int cqe; + struct ibv_context *context; +}; + +struct ibv_ah { + struct ibv_pd *pd; +}; + +struct ibv_context_ops { + int (*query_device)(struct ibv_context *context, + struct ibv_device_attr *device_attr); + int (*query_port)(struct ibv_context *context, uint8_t port_num, + struct ibv_port_attr *port_attr); + struct ibv_pd * (*alloc_pd)(struct ibv_context *context, struct ibv_alloc_pd_resp *resp_p); + int (*dealloc_pd)(struct ibv_pd *pd); + struct ibv_mr * (*reg_mr)(struct ibv_pd *pd, void *addr, size_t length, + enum ibv_access_flags access); + int (*dereg_mr)(struct ibv_mr *mr); + struct ibv_cq * (*create_cq_pre)(struct ibv_context *context, int *cqe, + struct ibv_create_cq *req); + struct ibv_cq * (*create_cq_post)(struct ibv_context *context, + struct ibv_create_cq_resp *resp); + int (*poll_cq)(struct ibv_cq *cq, int num_entries, struct _ib_wc *wc); + int (*poll_cq_list)( struct ibv_cq *ibcq, + struct _ib_wc** const pp_free_wclist, + struct _ib_wc** const pp_done_wclist ); + int (*req_notify_cq)(struct ibv_cq *cq, int solicited_only); + int (*destroy_cq)(struct ibv_cq *cq); + struct ibv_srq * (*create_srq)(struct ibv_pd *pd, + struct ibv_srq_init_attr *srq_init_attr); + int (*modify_srq)(struct ibv_srq *srq, + struct ibv_srq_attr *srq_attr, + enum ibv_srq_attr_mask srq_attr_mask); + int (*destroy_srq)(struct ibv_srq *srq); + int (*post_srq_recv)(struct ibv_srq *srq, + struct _ib_recv_wr *recv_wr, + struct _ib_recv_wr **bad_recv_wr); + struct ibv_qp *(*create_qp_pre)(struct ibv_pd *pd, + struct ibv_qp_init_attr *attr, struct ibv_create_qp *req); + struct ibv_qp *(*create_qp_post)(struct ibv_pd *pd, + struct ibv_create_qp_resp *resp); + int (*modify_qp)(struct ibv_qp *qp, struct ibv_qp_attr *attr, + enum ibv_qp_attr_mask attr_mask); + int (*destroy_qp)(struct ibv_qp *qp); + int (*post_send)(struct ibv_qp *qp, struct _ib_send_wr *wr, + struct _ib_send_wr **bad_wr); + int (*post_recv)(struct ibv_qp *qp, struct _ib_recv_wr *wr, + struct _ib_recv_wr **bad_wr); + int (*attach_mcast)(struct ibv_qp *qp, union ibv_gid *gid, + uint16_t lid); + int (*detach_mcast)(struct ibv_qp *qp, union ibv_gid *gid, + uint16_t lid); +}; + +struct ibv_context { + struct ibv_context_ops ops; + void *abi_compat; +}; + +int align_queue_size(struct ibv_context *context, int size, int spare); + +END_C_DECLS + +#endif /* INFINIBAND_VERBS_H */ diff --git a/branches/IBFD/hw/mthca/user/mt_l2w.h b/branches/IBFD/hw/mthca/user/mt_l2w.h new file mode 100644 index 00000000..9f204d22 --- /dev/null +++ b/branches/IBFD/hw/mthca/user/mt_l2w.h @@ -0,0 +1,87 @@ +#ifndef UMT_L2W_H +#define UMT_L2W_H + +// =========================================== +// INCLUDES +// =========================================== + +// OS +#include +#include +#include +//#include +#include +#include +//#include +#include + + +// =========================================== +// SUBSTITUTIONS +// =========================================== + +#define inline __inline +#define likely(x) (x) +#define unlikely(x) (x) + +// =========================================== +// LITERALS +// =========================================== + + + +// =========================================== +// TYPES +// =========================================== + + +// =========================================== +// MACROS +// =========================================== + +// nullifying macros + +#define ERR_PTR(error) ((void*)(LONG_PTR)(error)) +#define PTR_ERR(ptr) ((long)(LONG_PTR)(void*)(ptr)) +//TODO: there are 2 assumptions here: +// - pointer can't be too big (around -1) +// - error can't be bigger than 1000 +#define IS_ERR(ptr) ((ULONG_PTR)ptr > (ULONG_PTR)-1000L) + +#define ffsl(val) ffs(val) + +extern size_t g_page_size; + +static inline int posix_memalign(void **memptr, size_t alignment, size_t size) +{ +#ifdef NOT_USE_VIRTUAL_ALLOC + // sanity checks + if (alignment % sizeof(void*)) + return EINVAL; + if (alignment < g_page_size) { + fprintf(stderr, "mthca: Fatal (posix_memalign): alignment too small - %d \n", alignment ); + return EINVAL; + } + + // allocation + *memptr = cl_malloc(size); + if (*memptr) + return 0; + else + return ENOMEM; +#else + *memptr = VirtualAlloc( NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE ); + if (*memptr) + return 0; + else + return ENOMEM; +#endif +} + +// =========================================== +// FUNCTIONS +// =========================================== + + +#endif + diff --git a/branches/IBFD/hw/mthca/user/opcode.h b/branches/IBFD/hw/mthca/user/opcode.h new file mode 100644 index 00000000..cf2598b6 --- /dev/null +++ b/branches/IBFD/hw/mthca/user/opcode.h @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id$ + */ + +#ifndef INFINIBAND_OPCODE_H +#define INFINIBAND_OPCODE_H + +/* + * This macro cleans up the definitions of constants for BTH opcodes. + * It is used to define constants such as IBV_OPCODE_UD_SEND_ONLY, + * which becomes IBV_OPCODE_UD + IBV_OPCODE_SEND_ONLY, and this gives + * the correct value. + * + * In short, user code should use the constants defined using the + * macro rather than worrying about adding together other constants. +*/ +#define IBV_OPCODE(transport, op) \ + IBV_OPCODE_ ## transport ## _ ## op = \ + IBV_OPCODE_ ## transport + IBV_OPCODE_ ## op + +enum { + /* transport types -- just used to define real constants */ + IBV_OPCODE_RC = 0x00, + IBV_OPCODE_UC = 0x20, + IBV_OPCODE_RD = 0x40, + IBV_OPCODE_UD = 0x60, + + /* operations -- just used to define real constants */ + IBV_OPCODE_SEND_FIRST = 0x00, + IBV_OPCODE_SEND_MIDDLE = 0x01, + IBV_OPCODE_SEND_LAST = 0x02, + IBV_OPCODE_SEND_LAST_WITH_IMMEDIATE = 0x03, + IBV_OPCODE_SEND_ONLY = 0x04, + IBV_OPCODE_SEND_ONLY_WITH_IMMEDIATE = 0x05, + IBV_OPCODE_RDMA_WRITE_FIRST = 0x06, + IBV_OPCODE_RDMA_WRITE_MIDDLE = 0x07, + IBV_OPCODE_RDMA_WRITE_LAST = 0x08, + IBV_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE = 0x09, + IBV_OPCODE_RDMA_WRITE_ONLY = 0x0a, + IBV_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE = 0x0b, + IBV_OPCODE_RDMA_READ_REQUEST = 0x0c, + IBV_OPCODE_RDMA_READ_RESPONSE_FIRST = 0x0d, + IBV_OPCODE_RDMA_READ_RESPONSE_MIDDLE = 0x0e, + IBV_OPCODE_RDMA_READ_RESPONSE_LAST = 0x0f, + IBV_OPCODE_RDMA_READ_RESPONSE_ONLY = 0x10, + IBV_OPCODE_ACKNOWLEDGE = 0x11, + IBV_OPCODE_ATOMIC_ACKNOWLEDGE = 0x12, + IBV_OPCODE_COMPARE_SWAP = 0x13, + IBV_OPCODE_FETCH_ADD = 0x14, + + /* real constants follow -- see comment about above IBV_OPCODE() + macro for more details */ + + /* RC */ + IBV_OPCODE(RC, SEND_FIRST), + IBV_OPCODE(RC, SEND_MIDDLE), + IBV_OPCODE(RC, SEND_LAST), + IBV_OPCODE(RC, SEND_LAST_WITH_IMMEDIATE), + IBV_OPCODE(RC, SEND_ONLY), + IBV_OPCODE(RC, SEND_ONLY_WITH_IMMEDIATE), + IBV_OPCODE(RC, RDMA_WRITE_FIRST), + IBV_OPCODE(RC, RDMA_WRITE_MIDDLE), + IBV_OPCODE(RC, RDMA_WRITE_LAST), + IBV_OPCODE(RC, RDMA_WRITE_LAST_WITH_IMMEDIATE), + IBV_OPCODE(RC, RDMA_WRITE_ONLY), + IBV_OPCODE(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE), + IBV_OPCODE(RC, RDMA_READ_REQUEST), + IBV_OPCODE(RC, RDMA_READ_RESPONSE_FIRST), + IBV_OPCODE(RC, RDMA_READ_RESPONSE_MIDDLE), + IBV_OPCODE(RC, RDMA_READ_RESPONSE_LAST), + IBV_OPCODE(RC, RDMA_READ_RESPONSE_ONLY), + IBV_OPCODE(RC, ACKNOWLEDGE), + IBV_OPCODE(RC, ATOMIC_ACKNOWLEDGE), + IBV_OPCODE(RC, COMPARE_SWAP), + IBV_OPCODE(RC, FETCH_ADD), + + /* UC */ + IBV_OPCODE(UC, SEND_FIRST), + IBV_OPCODE(UC, SEND_MIDDLE), + IBV_OPCODE(UC, SEND_LAST), + IBV_OPCODE(UC, SEND_LAST_WITH_IMMEDIATE), + IBV_OPCODE(UC, SEND_ONLY), + IBV_OPCODE(UC, SEND_ONLY_WITH_IMMEDIATE), + IBV_OPCODE(UC, RDMA_WRITE_FIRST), + IBV_OPCODE(UC, RDMA_WRITE_MIDDLE), + IBV_OPCODE(UC, RDMA_WRITE_LAST), + IBV_OPCODE(UC, RDMA_WRITE_LAST_WITH_IMMEDIATE), + IBV_OPCODE(UC, RDMA_WRITE_ONLY), + IBV_OPCODE(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE), + + /* RD */ + IBV_OPCODE(RD, SEND_FIRST), + IBV_OPCODE(RD, SEND_MIDDLE), + IBV_OPCODE(RD, SEND_LAST), + IBV_OPCODE(RD, SEND_LAST_WITH_IMMEDIATE), + IBV_OPCODE(RD, SEND_ONLY), + IBV_OPCODE(RD, SEND_ONLY_WITH_IMMEDIATE), + IBV_OPCODE(RD, RDMA_WRITE_FIRST), + IBV_OPCODE(RD, RDMA_WRITE_MIDDLE), + IBV_OPCODE(RD, RDMA_WRITE_LAST), + IBV_OPCODE(RD, RDMA_WRITE_LAST_WITH_IMMEDIATE), + IBV_OPCODE(RD, RDMA_WRITE_ONLY), + IBV_OPCODE(RD, RDMA_WRITE_ONLY_WITH_IMMEDIATE), + IBV_OPCODE(RD, RDMA_READ_REQUEST), + IBV_OPCODE(RD, RDMA_READ_RESPONSE_FIRST), + IBV_OPCODE(RD, RDMA_READ_RESPONSE_MIDDLE), + IBV_OPCODE(RD, RDMA_READ_RESPONSE_LAST), + IBV_OPCODE(RD, RDMA_READ_RESPONSE_ONLY), + IBV_OPCODE(RD, ACKNOWLEDGE), + IBV_OPCODE(RD, ATOMIC_ACKNOWLEDGE), + IBV_OPCODE(RD, COMPARE_SWAP), + IBV_OPCODE(RD, FETCH_ADD), + + /* UD */ + IBV_OPCODE(UD, SEND_ONLY), + IBV_OPCODE(UD, SEND_ONLY_WITH_IMMEDIATE) +}; + +#endif /* INFINIBAND_OPCODE_H */